@hsingjui/contextweaver 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +380 -0
- package/dist/SearchService-YLOUJF4S.js +1496 -0
- package/dist/chunk-34YZ2U3O.js +1177 -0
- package/dist/chunk-5SRSUMKW.js +612 -0
- package/dist/chunk-5TV4JNTE.js +258 -0
- package/dist/chunk-6C2D5Y4R.js +798 -0
- package/dist/chunk-PN7DP6XL.js +158 -0
- package/dist/codebaseRetrieval-RDCNIUDM.js +10 -0
- package/dist/config-IEL3M4V5.js +18 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +130 -0
- package/dist/scanner-66CLKCSZ.js +9 -0
- package/dist/server-2SAFEAEY.js +131 -0
- package/package.json +59 -0
|
@@ -0,0 +1,612 @@
|
|
|
1
|
+
import {
|
|
2
|
+
isDev
|
|
3
|
+
} from "./chunk-PN7DP6XL.js";
|
|
4
|
+
|
|
5
|
+
// src/utils/logger.ts
|
|
6
|
+
import pino from "pino";
|
|
7
|
+
import path from "path";
|
|
8
|
+
import os from "os";
|
|
9
|
+
import fs from "fs";
|
|
10
|
+
import { Writable } from "stream";
|
|
11
|
+
var logLevel = isDev ? "debug" : "info";
|
|
12
|
+
var logDir = path.join(os.homedir(), ".contextweaver", "logs");
|
|
13
|
+
var LOG_RETENTION_DAYS = 7;
|
|
14
|
+
function ensureLogDir(dir) {
|
|
15
|
+
if (!fs.existsSync(dir)) {
|
|
16
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
function getLogFileName() {
|
|
20
|
+
const now = /* @__PURE__ */ new Date();
|
|
21
|
+
const dateStr = now.toISOString().split("T")[0];
|
|
22
|
+
return `app.${dateStr}.log`;
|
|
23
|
+
}
|
|
24
|
+
function formatTime() {
|
|
25
|
+
const now = /* @__PURE__ */ new Date();
|
|
26
|
+
const pad = (n) => n.toString().padStart(2, "0");
|
|
27
|
+
return `${now.getFullYear()}-${pad(now.getMonth() + 1)}-${pad(now.getDate())} ${pad(now.getHours())}:${pad(now.getMinutes())}:${pad(now.getSeconds())}`;
|
|
28
|
+
}
|
|
29
|
+
function getLevelLabel(level) {
|
|
30
|
+
const labels = {
|
|
31
|
+
10: "TRACE",
|
|
32
|
+
20: "DEBUG",
|
|
33
|
+
30: "INFO",
|
|
34
|
+
40: "WARN",
|
|
35
|
+
50: "ERROR",
|
|
36
|
+
60: "FATAL"
|
|
37
|
+
};
|
|
38
|
+
return labels[level] || "INFO";
|
|
39
|
+
}
|
|
40
|
+
function cleanupOldLogs(dir) {
|
|
41
|
+
try {
|
|
42
|
+
if (!fs.existsSync(dir)) return;
|
|
43
|
+
const files = fs.readdirSync(dir);
|
|
44
|
+
const now = Date.now();
|
|
45
|
+
const maxAge = LOG_RETENTION_DAYS * 24 * 60 * 60 * 1e3;
|
|
46
|
+
const logPattern = /^app\.(\d{4}-\d{2}-\d{2})\.log$/;
|
|
47
|
+
for (const file of files) {
|
|
48
|
+
const match = file.match(logPattern);
|
|
49
|
+
if (!match) continue;
|
|
50
|
+
const dateStr = match[1];
|
|
51
|
+
const fileDate = new Date(dateStr).getTime();
|
|
52
|
+
if (isNaN(fileDate)) continue;
|
|
53
|
+
if (now - fileDate > maxAge) {
|
|
54
|
+
const filePath = path.join(dir, file);
|
|
55
|
+
try {
|
|
56
|
+
fs.unlinkSync(filePath);
|
|
57
|
+
console.log(`[Logger] \u6E05\u7406\u8FC7\u671F\u65E5\u5FD7: ${file}`);
|
|
58
|
+
} catch {
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
} catch {
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
function createFormattedStream(filePath) {
|
|
66
|
+
const writeStream = fs.createWriteStream(filePath, { flags: "a" });
|
|
67
|
+
return new Writable({
|
|
68
|
+
write(chunk, encoding, callback) {
|
|
69
|
+
try {
|
|
70
|
+
const log = JSON.parse(chunk.toString());
|
|
71
|
+
const time = formatTime();
|
|
72
|
+
const level = getLevelLabel(log.level);
|
|
73
|
+
const msg = log.msg || "";
|
|
74
|
+
const { level: _l, time: _t, pid: _p, hostname: _h, name: _n, msg: _m, ...extra } = log;
|
|
75
|
+
let line = `${time} [${level}] ${msg}`;
|
|
76
|
+
if (Object.keys(extra).length > 0) {
|
|
77
|
+
const extraLines = JSON.stringify(extra, null, 2).split("\n").map((l, i) => i === 0 ? l : " " + l).join("\n");
|
|
78
|
+
line += `
|
|
79
|
+
${extraLines}`;
|
|
80
|
+
}
|
|
81
|
+
writeStream.write(line + "\n", callback);
|
|
82
|
+
} catch {
|
|
83
|
+
writeStream.write(chunk.toString(), callback);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
function createDevLogger() {
|
|
89
|
+
ensureLogDir(logDir);
|
|
90
|
+
cleanupOldLogs(logDir);
|
|
91
|
+
const logPath = path.join(logDir, getLogFileName());
|
|
92
|
+
const logStream = createFormattedStream(logPath);
|
|
93
|
+
const consoleTransport = pino.transport({
|
|
94
|
+
target: "pino-pretty",
|
|
95
|
+
options: {
|
|
96
|
+
colorize: true,
|
|
97
|
+
translateTime: "SYS:yyyy-mm-dd HH:MM:ss",
|
|
98
|
+
ignore: "pid,hostname"
|
|
99
|
+
}
|
|
100
|
+
});
|
|
101
|
+
return pino(
|
|
102
|
+
{
|
|
103
|
+
level: logLevel,
|
|
104
|
+
name: "contextweaver"
|
|
105
|
+
},
|
|
106
|
+
pino.multistream([
|
|
107
|
+
{ stream: logStream, level: logLevel },
|
|
108
|
+
{ stream: consoleTransport, level: logLevel }
|
|
109
|
+
])
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
function createProdLogger() {
|
|
113
|
+
ensureLogDir(logDir);
|
|
114
|
+
cleanupOldLogs(logDir);
|
|
115
|
+
const logPath = path.join(logDir, getLogFileName());
|
|
116
|
+
const logStream = createFormattedStream(logPath);
|
|
117
|
+
return pino(
|
|
118
|
+
{
|
|
119
|
+
level: logLevel,
|
|
120
|
+
name: "contextweaver"
|
|
121
|
+
},
|
|
122
|
+
logStream
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
var logger = isDev ? createDevLogger() : createProdLogger();
|
|
126
|
+
var info = logger.info.bind(logger);
|
|
127
|
+
var warn = logger.warn.bind(logger);
|
|
128
|
+
var error = logger.error.bind(logger);
|
|
129
|
+
var debug = logger.debug.bind(logger);
|
|
130
|
+
function isDebugEnabled() {
|
|
131
|
+
return logger.isLevelEnabled("debug");
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// src/search/fts.ts
|
|
135
|
+
var tokenizerCache = /* @__PURE__ */ new WeakMap();
|
|
136
|
+
function detectFtsTokenizer(db) {
|
|
137
|
+
const cached = tokenizerCache.get(db);
|
|
138
|
+
if (cached) return cached;
|
|
139
|
+
let tokenizer;
|
|
140
|
+
try {
|
|
141
|
+
db.exec(`
|
|
142
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS _fts_probe USING fts5(content, tokenize='trigram');
|
|
143
|
+
DROP TABLE IF EXISTS _fts_probe;
|
|
144
|
+
`);
|
|
145
|
+
tokenizer = "trigram";
|
|
146
|
+
logger.debug("FTS tokenizer: trigram \u53EF\u7528");
|
|
147
|
+
} catch (err) {
|
|
148
|
+
tokenizer = "unicode61";
|
|
149
|
+
logger.debug("FTS tokenizer: \u964D\u7EA7\u5230 unicode61");
|
|
150
|
+
}
|
|
151
|
+
tokenizerCache.set(db, tokenizer);
|
|
152
|
+
return tokenizer;
|
|
153
|
+
}
|
|
154
|
+
function initFilesFts(db) {
|
|
155
|
+
const tokenizer = detectFtsTokenizer(db);
|
|
156
|
+
const tableExists = db.prepare(`
|
|
157
|
+
SELECT name FROM sqlite_master
|
|
158
|
+
WHERE type='table' AND name='files_fts'
|
|
159
|
+
`).get();
|
|
160
|
+
if (!tableExists) {
|
|
161
|
+
db.exec(`
|
|
162
|
+
CREATE VIRTUAL TABLE files_fts USING fts5(
|
|
163
|
+
path,
|
|
164
|
+
content,
|
|
165
|
+
tokenize='${tokenizer}'
|
|
166
|
+
);
|
|
167
|
+
`);
|
|
168
|
+
logger.info(`\u521B\u5EFA files_fts \u8868\uFF0Ctokenizer=${tokenizer}`);
|
|
169
|
+
syncFilesFts(db);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
function syncFilesFts(db) {
|
|
173
|
+
const fileCount = db.prepare("SELECT COUNT(*) as c FROM files WHERE content IS NOT NULL").get().c;
|
|
174
|
+
const ftsCount = db.prepare("SELECT COUNT(*) as c FROM files_fts").get().c;
|
|
175
|
+
if (ftsCount < fileCount) {
|
|
176
|
+
logger.info(`\u540C\u6B65 FTS \u7D22\u5F15: files=${fileCount}, fts=${ftsCount}`);
|
|
177
|
+
db.exec(`
|
|
178
|
+
DELETE FROM files_fts;
|
|
179
|
+
INSERT INTO files_fts(path, content)
|
|
180
|
+
SELECT path, content FROM files WHERE content IS NOT NULL;
|
|
181
|
+
`);
|
|
182
|
+
logger.info(`FTS \u7D22\u5F15\u540C\u6B65\u5B8C\u6210: ${fileCount} \u6761\u8BB0\u5F55`);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
function initChunksFts(db) {
|
|
186
|
+
const tokenizer = detectFtsTokenizer(db);
|
|
187
|
+
const tableExists = db.prepare(`
|
|
188
|
+
SELECT name FROM sqlite_master
|
|
189
|
+
WHERE type='table' AND name='chunks_fts'
|
|
190
|
+
`).get();
|
|
191
|
+
if (!tableExists) {
|
|
192
|
+
db.exec(`
|
|
193
|
+
CREATE VIRTUAL TABLE chunks_fts USING fts5(
|
|
194
|
+
chunk_id UNINDEXED,
|
|
195
|
+
file_path UNINDEXED,
|
|
196
|
+
chunk_index UNINDEXED,
|
|
197
|
+
breadcrumb,
|
|
198
|
+
content,
|
|
199
|
+
tokenize='${tokenizer}'
|
|
200
|
+
);
|
|
201
|
+
`);
|
|
202
|
+
logger.info(`\u521B\u5EFA chunks_fts \u8868\uFF0Ctokenizer=${tokenizer}`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
function isChunksFtsInitialized(db) {
|
|
206
|
+
const result = db.prepare(`
|
|
207
|
+
SELECT name FROM sqlite_master
|
|
208
|
+
WHERE type='table' AND name='chunks_fts'
|
|
209
|
+
`).get();
|
|
210
|
+
return !!result;
|
|
211
|
+
}
|
|
212
|
+
function batchUpsertChunkFts(db, chunks) {
|
|
213
|
+
const deleteStmt = db.prepare("DELETE FROM chunks_fts WHERE chunk_id = ?");
|
|
214
|
+
const insertStmt = db.prepare(
|
|
215
|
+
"INSERT INTO chunks_fts(chunk_id, file_path, chunk_index, breadcrumb, content) VALUES (?, ?, ?, ?, ?)"
|
|
216
|
+
);
|
|
217
|
+
const transaction = db.transaction((items) => {
|
|
218
|
+
for (const item of items) {
|
|
219
|
+
deleteStmt.run(item.chunkId);
|
|
220
|
+
insertStmt.run(item.chunkId, item.filePath, item.chunkIndex, item.breadcrumb, item.content);
|
|
221
|
+
}
|
|
222
|
+
});
|
|
223
|
+
transaction(chunks);
|
|
224
|
+
}
|
|
225
|
+
function batchDeleteFileChunksFts(db, filePaths) {
|
|
226
|
+
const stmt = db.prepare("DELETE FROM chunks_fts WHERE file_path = ?");
|
|
227
|
+
const transaction = db.transaction((paths) => {
|
|
228
|
+
for (const p of paths) {
|
|
229
|
+
stmt.run(p);
|
|
230
|
+
}
|
|
231
|
+
});
|
|
232
|
+
transaction(filePaths);
|
|
233
|
+
}
|
|
234
|
+
function searchChunksFts(db, query, limit) {
|
|
235
|
+
const tokens = segmentQuery(query);
|
|
236
|
+
if (tokens.length === 0) {
|
|
237
|
+
logger.debug("Chunk FTS \u5206\u8BCD\u540E\u65E0\u6709\u6548 token\uFF0C\u8DF3\u8FC7\u641C\u7D22");
|
|
238
|
+
return [];
|
|
239
|
+
}
|
|
240
|
+
logger.debug({
|
|
241
|
+
rawQuery: query,
|
|
242
|
+
tokens
|
|
243
|
+
}, "Chunk FTS \u5206\u8BCD\u7ED3\u679C");
|
|
244
|
+
const runQuery = (qStr, queryLimit) => {
|
|
245
|
+
try {
|
|
246
|
+
const rows = db.prepare(`
|
|
247
|
+
SELECT chunk_id, file_path, chunk_index, bm25(chunks_fts) as score
|
|
248
|
+
FROM chunks_fts
|
|
249
|
+
WHERE chunks_fts MATCH ?
|
|
250
|
+
ORDER BY score
|
|
251
|
+
LIMIT ?
|
|
252
|
+
`).all(qStr, queryLimit);
|
|
253
|
+
return rows.map((r) => ({
|
|
254
|
+
chunkId: r.chunk_id,
|
|
255
|
+
filePath: r.file_path,
|
|
256
|
+
chunkIndex: r.chunk_index,
|
|
257
|
+
score: -r.score
|
|
258
|
+
}));
|
|
259
|
+
} catch (e) {
|
|
260
|
+
logger.debug({ error: e }, "Chunk FTS \u67E5\u8BE2\u51FA\u9519");
|
|
261
|
+
return [];
|
|
262
|
+
}
|
|
263
|
+
};
|
|
264
|
+
const strictQuery = tokens.map((t) => `"${t.replace(/"/g, "")}"`).join(" AND ");
|
|
265
|
+
let results = runQuery(strictQuery, limit);
|
|
266
|
+
logger.debug({ type: "strict", count: results.length, query: strictQuery }, "Chunk FTS \u7CBE\u51C6\u641C\u7D22");
|
|
267
|
+
if (results.length < limit && tokens.length > 1) {
|
|
268
|
+
const beforeCount = results.length;
|
|
269
|
+
const remainingLimit = limit - results.length;
|
|
270
|
+
const relaxedQuery = tokens.map((t) => `"${t.replace(/"/g, "")}"`).join(" OR ");
|
|
271
|
+
const relaxedResults = runQuery(relaxedQuery, remainingLimit + 10);
|
|
272
|
+
const existingIds = new Set(results.map((r) => r.chunkId));
|
|
273
|
+
for (const row of relaxedResults) {
|
|
274
|
+
if (!existingIds.has(row.chunkId)) {
|
|
275
|
+
if (results.length >= limit) break;
|
|
276
|
+
results.push(row);
|
|
277
|
+
existingIds.add(row.chunkId);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
logger.debug({ type: "relaxed", added: results.length - beforeCount, query: relaxedQuery }, "Chunk FTS \u5BBD\u5BB9\u641C\u7D22\u8865\u5F55");
|
|
281
|
+
}
|
|
282
|
+
logger.debug({
|
|
283
|
+
chunkCount: results.length,
|
|
284
|
+
topChunks: results.slice(0, 5).map((r) => ({
|
|
285
|
+
path: r.filePath.split("/").slice(-2).join("/"),
|
|
286
|
+
chunkIndex: r.chunkIndex,
|
|
287
|
+
bm25: r.score.toFixed(3)
|
|
288
|
+
}))
|
|
289
|
+
}, "Chunk FTS \u53EC\u56DE\u7ED3\u679C");
|
|
290
|
+
return results.sort((a, b) => b.score - a.score);
|
|
291
|
+
}
|
|
292
|
+
function batchUpsertFileFts(db, files) {
|
|
293
|
+
const deleteFts = db.prepare("DELETE FROM files_fts WHERE path = ?");
|
|
294
|
+
const insertFts = db.prepare("INSERT INTO files_fts(path, content) VALUES (?, ?)");
|
|
295
|
+
const transaction = db.transaction((items) => {
|
|
296
|
+
for (const item of items) {
|
|
297
|
+
deleteFts.run(item.path);
|
|
298
|
+
insertFts.run(item.path, item.content);
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
transaction(files);
|
|
302
|
+
}
|
|
303
|
+
function batchDeleteFileFts(db, paths) {
|
|
304
|
+
const stmt = db.prepare("DELETE FROM files_fts WHERE path = ?");
|
|
305
|
+
const transaction = db.transaction((items) => {
|
|
306
|
+
for (const path3 of items) {
|
|
307
|
+
stmt.run(path3);
|
|
308
|
+
}
|
|
309
|
+
});
|
|
310
|
+
transaction(paths);
|
|
311
|
+
}
|
|
312
|
+
function sanitizeQuery(query) {
|
|
313
|
+
return query.replace(/[():"*^.\/\\:@#$%&=+\[\]{}<>|~`!?,;]/g, " ").replace(/\b(AND|OR|NOT|NEAR)\b/gi, " ").replace(/\s+/g, " ").trim();
|
|
314
|
+
}
|
|
315
|
+
var zhSegmenter = null;
|
|
316
|
+
function getZhSegmenter() {
|
|
317
|
+
if (zhSegmenter === null) {
|
|
318
|
+
try {
|
|
319
|
+
zhSegmenter = new Intl.Segmenter("zh-CN", { granularity: "word" });
|
|
320
|
+
} catch {
|
|
321
|
+
return null;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
return zhSegmenter;
|
|
325
|
+
}
|
|
326
|
+
function segmentQuery(query) {
|
|
327
|
+
const uniqueTokens = /* @__PURE__ */ new Set();
|
|
328
|
+
const cleanRaw = sanitizeQuery(query);
|
|
329
|
+
if (!cleanRaw) return [];
|
|
330
|
+
query.split(/\s+/).forEach((t) => {
|
|
331
|
+
if (/[._\/]/.test(t) || /[a-z][A-Z]/.test(t)) {
|
|
332
|
+
uniqueTokens.add(t.toLowerCase());
|
|
333
|
+
}
|
|
334
|
+
});
|
|
335
|
+
const segmenter = getZhSegmenter();
|
|
336
|
+
if (segmenter) {
|
|
337
|
+
const segments = segmenter.segment(cleanRaw);
|
|
338
|
+
for (const seg of segments) {
|
|
339
|
+
if (seg.isWordLike) {
|
|
340
|
+
const t = seg.segment.toLowerCase();
|
|
341
|
+
if (t.trim().length > 0) uniqueTokens.add(t);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
} else {
|
|
345
|
+
logger.warn("Intl.Segmenter \u4E0D\u53EF\u7528\uFF0C\u4E2D\u6587\u641C\u7D22\u5C06\u9000\u5316\u4E3A\u7CBE\u786E\u5339\u914D");
|
|
346
|
+
cleanRaw.split(/[\s\p{P}]+/u).forEach((t) => {
|
|
347
|
+
if (t.length > 0) uniqueTokens.add(t.toLowerCase());
|
|
348
|
+
});
|
|
349
|
+
}
|
|
350
|
+
return Array.from(uniqueTokens);
|
|
351
|
+
}
|
|
352
|
+
function searchFilesFts(db, query, limit) {
|
|
353
|
+
const tokens = segmentQuery(query);
|
|
354
|
+
if (tokens.length === 0) {
|
|
355
|
+
logger.debug("FTS \u5206\u8BCD\u540E\u65E0\u6709\u6548 token\uFF0C\u8DF3\u8FC7\u641C\u7D22");
|
|
356
|
+
return [];
|
|
357
|
+
}
|
|
358
|
+
logger.debug({
|
|
359
|
+
rawQuery: query,
|
|
360
|
+
tokens
|
|
361
|
+
}, "FTS \u5206\u8BCD\u7ED3\u679C");
|
|
362
|
+
const runQuery = (qStr, queryLimit) => {
|
|
363
|
+
try {
|
|
364
|
+
const rows = db.prepare(`
|
|
365
|
+
SELECT path, bm25(files_fts) as score
|
|
366
|
+
FROM files_fts
|
|
367
|
+
WHERE files_fts MATCH ?
|
|
368
|
+
ORDER BY score
|
|
369
|
+
LIMIT ?
|
|
370
|
+
`).all(qStr, queryLimit);
|
|
371
|
+
return rows.map((r) => ({ path: r.path, score: -r.score }));
|
|
372
|
+
} catch (e) {
|
|
373
|
+
return [];
|
|
374
|
+
}
|
|
375
|
+
};
|
|
376
|
+
const strictQuery = tokens.map((t) => `"${t.replace(/"/g, "")}"`).join(" AND ");
|
|
377
|
+
let results = runQuery(strictQuery, limit);
|
|
378
|
+
logger.debug({ type: "strict", count: results.length, query: strictQuery }, "FTS \u7CBE\u51C6\u641C\u7D22");
|
|
379
|
+
if (results.length < limit && tokens.length > 1) {
|
|
380
|
+
const beforeCount = results.length;
|
|
381
|
+
const remainingLimit = limit - results.length;
|
|
382
|
+
const relaxedQuery = tokens.map((t) => `"${t.replace(/"/g, "")}"`).join(" OR ");
|
|
383
|
+
const relaxedResults = runQuery(relaxedQuery, remainingLimit + 10);
|
|
384
|
+
const existingPaths = new Set(results.map((r) => r.path));
|
|
385
|
+
for (const row of relaxedResults) {
|
|
386
|
+
if (!existingPaths.has(row.path)) {
|
|
387
|
+
if (results.length >= limit) break;
|
|
388
|
+
results.push(row);
|
|
389
|
+
existingPaths.add(row.path);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
logger.debug({ type: "relaxed", added: results.length - beforeCount, query: relaxedQuery }, "FTS \u5BBD\u5BB9\u641C\u7D22\u8865\u5F55");
|
|
393
|
+
}
|
|
394
|
+
logger.debug({
|
|
395
|
+
fileCount: results.length,
|
|
396
|
+
topFiles: results.slice(0, 5).map((r) => ({
|
|
397
|
+
path: r.path.split("/").slice(-2).join("/"),
|
|
398
|
+
bm25: r.score.toFixed(3)
|
|
399
|
+
}))
|
|
400
|
+
}, "FTS \u53EC\u56DE\u7ED3\u679C");
|
|
401
|
+
return results.sort((a, b) => b.score - a.score);
|
|
402
|
+
}
|
|
403
|
+
function isFtsInitialized(db) {
|
|
404
|
+
const result = db.prepare(`
|
|
405
|
+
SELECT name FROM sqlite_master
|
|
406
|
+
WHERE type='table' AND name='files_fts'
|
|
407
|
+
`).get();
|
|
408
|
+
return !!result;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// src/db/index.ts
|
|
412
|
+
import Database from "better-sqlite3";
|
|
413
|
+
import path2 from "path";
|
|
414
|
+
import os2 from "os";
|
|
415
|
+
import fs2 from "fs";
|
|
416
|
+
import crypto from "crypto";
|
|
417
|
+
var BASE_DIR = path2.join(os2.homedir(), ".contextweaver");
|
|
418
|
+
function getDirectoryBirthtime(projectPath) {
|
|
419
|
+
const gitDir = path2.join(projectPath, ".git");
|
|
420
|
+
try {
|
|
421
|
+
const gitStats = fs2.statSync(gitDir);
|
|
422
|
+
if (gitStats.isDirectory() && gitStats.birthtimeMs) {
|
|
423
|
+
return Math.floor(gitStats.birthtimeMs);
|
|
424
|
+
}
|
|
425
|
+
} catch {
|
|
426
|
+
}
|
|
427
|
+
try {
|
|
428
|
+
const rootStats = fs2.statSync(projectPath);
|
|
429
|
+
if (rootStats.birthtimeMs) {
|
|
430
|
+
return Math.floor(rootStats.birthtimeMs);
|
|
431
|
+
}
|
|
432
|
+
} catch {
|
|
433
|
+
}
|
|
434
|
+
return 0;
|
|
435
|
+
}
|
|
436
|
+
function generateProjectId(projectPath) {
|
|
437
|
+
const birthtime = getDirectoryBirthtime(projectPath);
|
|
438
|
+
const uniqueKey = `${projectPath}::${birthtime}`;
|
|
439
|
+
return crypto.createHash("md5").update(uniqueKey).digest("hex").slice(0, 10);
|
|
440
|
+
}
|
|
441
|
+
function initDb(projectId) {
|
|
442
|
+
const projectDir = path2.join(BASE_DIR, projectId);
|
|
443
|
+
if (!fs2.existsSync(projectDir)) {
|
|
444
|
+
fs2.mkdirSync(projectDir, { recursive: true });
|
|
445
|
+
}
|
|
446
|
+
const dbPath = path2.join(projectDir, "index.db");
|
|
447
|
+
const db = new Database(dbPath);
|
|
448
|
+
db.pragma("journal_mode = WAL");
|
|
449
|
+
db.exec(`
|
|
450
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
451
|
+
path TEXT PRIMARY KEY,
|
|
452
|
+
hash TEXT NOT NULL,
|
|
453
|
+
mtime INTEGER NOT NULL,
|
|
454
|
+
size INTEGER NOT NULL,
|
|
455
|
+
content TEXT,
|
|
456
|
+
language TEXT NOT NULL,
|
|
457
|
+
vector_index_hash TEXT
|
|
458
|
+
)
|
|
459
|
+
`);
|
|
460
|
+
try {
|
|
461
|
+
db.exec(`ALTER TABLE files ADD COLUMN vector_index_hash TEXT`);
|
|
462
|
+
} catch {
|
|
463
|
+
}
|
|
464
|
+
db.exec(`
|
|
465
|
+
CREATE INDEX IF NOT EXISTS idx_files_hash ON files(hash);
|
|
466
|
+
CREATE INDEX IF NOT EXISTS idx_files_mtime ON files(mtime);
|
|
467
|
+
`);
|
|
468
|
+
db.exec(`
|
|
469
|
+
CREATE TABLE IF NOT EXISTS metadata (
|
|
470
|
+
key TEXT PRIMARY KEY,
|
|
471
|
+
value TEXT NOT NULL
|
|
472
|
+
)
|
|
473
|
+
`);
|
|
474
|
+
initFilesFts(db);
|
|
475
|
+
initChunksFts(db);
|
|
476
|
+
return db;
|
|
477
|
+
}
|
|
478
|
+
function closeDb(db) {
|
|
479
|
+
db.close();
|
|
480
|
+
}
|
|
481
|
+
function getAllFileMeta(db) {
|
|
482
|
+
const rows = db.prepare("SELECT path, hash, mtime, size, vector_index_hash FROM files").all();
|
|
483
|
+
const map = /* @__PURE__ */ new Map();
|
|
484
|
+
for (const row of rows) {
|
|
485
|
+
map.set(row.path, {
|
|
486
|
+
mtime: row.mtime,
|
|
487
|
+
hash: row.hash,
|
|
488
|
+
size: row.size,
|
|
489
|
+
vectorIndexHash: row.vector_index_hash
|
|
490
|
+
});
|
|
491
|
+
}
|
|
492
|
+
return map;
|
|
493
|
+
}
|
|
494
|
+
function getFilesNeedingVectorIndex(db) {
|
|
495
|
+
const rows = db.prepare("SELECT path FROM files WHERE vector_index_hash IS NULL OR vector_index_hash != hash").all();
|
|
496
|
+
return rows.map((r) => r.path);
|
|
497
|
+
}
|
|
498
|
+
function batchUpdateVectorIndexHash(db, items) {
|
|
499
|
+
const update = db.prepare("UPDATE files SET vector_index_hash = ? WHERE path = ?");
|
|
500
|
+
const transaction = db.transaction((data) => {
|
|
501
|
+
for (const item of data) {
|
|
502
|
+
update.run(item.hash, item.path);
|
|
503
|
+
}
|
|
504
|
+
});
|
|
505
|
+
transaction(items);
|
|
506
|
+
}
|
|
507
|
+
function clearVectorIndexHash(db, paths) {
|
|
508
|
+
const update = db.prepare("UPDATE files SET vector_index_hash = NULL WHERE path = ?");
|
|
509
|
+
const transaction = db.transaction((items) => {
|
|
510
|
+
for (const item of items) {
|
|
511
|
+
update.run(item);
|
|
512
|
+
}
|
|
513
|
+
});
|
|
514
|
+
transaction(paths);
|
|
515
|
+
}
|
|
516
|
+
function batchUpsert(db, files) {
|
|
517
|
+
const insert = db.prepare(`
|
|
518
|
+
INSERT INTO files (path, hash, mtime, size, content, language)
|
|
519
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
520
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
521
|
+
hash = excluded.hash,
|
|
522
|
+
mtime = excluded.mtime,
|
|
523
|
+
size = excluded.size,
|
|
524
|
+
content = excluded.content,
|
|
525
|
+
language = excluded.language
|
|
526
|
+
`);
|
|
527
|
+
const transaction = db.transaction((items) => {
|
|
528
|
+
for (const item of items) {
|
|
529
|
+
insert.run(item.path, item.hash, item.mtime, item.size, item.content, item.language);
|
|
530
|
+
}
|
|
531
|
+
});
|
|
532
|
+
transaction(files);
|
|
533
|
+
const ftsFiles = files.filter((f) => f.content !== null).map((f) => ({ path: f.path, content: f.content }));
|
|
534
|
+
if (ftsFiles.length > 0) {
|
|
535
|
+
batchUpsertFileFts(db, ftsFiles);
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
function batchUpdateMtime(db, items) {
|
|
539
|
+
const update = db.prepare("UPDATE files SET mtime = ? WHERE path = ?");
|
|
540
|
+
const transaction = db.transaction((data) => {
|
|
541
|
+
for (const item of data) {
|
|
542
|
+
update.run(item.mtime, item.path);
|
|
543
|
+
}
|
|
544
|
+
});
|
|
545
|
+
transaction(items);
|
|
546
|
+
}
|
|
547
|
+
function getAllPaths(db) {
|
|
548
|
+
const rows = db.prepare("SELECT path FROM files").all();
|
|
549
|
+
return rows.map((r) => r.path);
|
|
550
|
+
}
|
|
551
|
+
function batchDelete(db, paths) {
|
|
552
|
+
const stmt = db.prepare("DELETE FROM files WHERE path = ?");
|
|
553
|
+
const transaction = db.transaction((items) => {
|
|
554
|
+
for (const item of items) {
|
|
555
|
+
stmt.run(item);
|
|
556
|
+
}
|
|
557
|
+
});
|
|
558
|
+
transaction(paths);
|
|
559
|
+
if (paths.length > 0) {
|
|
560
|
+
batchDeleteFileFts(db, paths);
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
function clear(db) {
|
|
564
|
+
db.exec("DELETE FROM files");
|
|
565
|
+
}
|
|
566
|
+
var METADATA_KEY_EMBEDDING_DIMENSIONS = "embedding_dimensions";
|
|
567
|
+
function getMetadata(db, key) {
|
|
568
|
+
const row = db.prepare("SELECT value FROM metadata WHERE key = ?").get(key);
|
|
569
|
+
return row?.value ?? null;
|
|
570
|
+
}
|
|
571
|
+
function setMetadata(db, key, value) {
|
|
572
|
+
db.prepare(`
|
|
573
|
+
INSERT INTO metadata (key, value)
|
|
574
|
+
VALUES (?, ?)
|
|
575
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value
|
|
576
|
+
`).run(key, value);
|
|
577
|
+
}
|
|
578
|
+
function getStoredEmbeddingDimensions(db) {
|
|
579
|
+
const value = getMetadata(db, METADATA_KEY_EMBEDDING_DIMENSIONS);
|
|
580
|
+
if (value === null) return null;
|
|
581
|
+
const parsed = parseInt(value, 10);
|
|
582
|
+
return isNaN(parsed) ? null : parsed;
|
|
583
|
+
}
|
|
584
|
+
function setStoredEmbeddingDimensions(db, dimensions) {
|
|
585
|
+
setMetadata(db, METADATA_KEY_EMBEDDING_DIMENSIONS, String(dimensions));
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
export {
|
|
589
|
+
logger,
|
|
590
|
+
isDebugEnabled,
|
|
591
|
+
isChunksFtsInitialized,
|
|
592
|
+
batchUpsertChunkFts,
|
|
593
|
+
batchDeleteFileChunksFts,
|
|
594
|
+
searchChunksFts,
|
|
595
|
+
segmentQuery,
|
|
596
|
+
searchFilesFts,
|
|
597
|
+
isFtsInitialized,
|
|
598
|
+
generateProjectId,
|
|
599
|
+
initDb,
|
|
600
|
+
closeDb,
|
|
601
|
+
getAllFileMeta,
|
|
602
|
+
getFilesNeedingVectorIndex,
|
|
603
|
+
batchUpdateVectorIndexHash,
|
|
604
|
+
clearVectorIndexHash,
|
|
605
|
+
batchUpsert,
|
|
606
|
+
batchUpdateMtime,
|
|
607
|
+
getAllPaths,
|
|
608
|
+
batchDelete,
|
|
609
|
+
clear,
|
|
610
|
+
getStoredEmbeddingDimensions,
|
|
611
|
+
setStoredEmbeddingDimensions
|
|
612
|
+
};
|