@chiway/contextweaver 1.0.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/SearchService-OS7CYHNJ.js +932 -0
- package/dist/chunk-AB24E3Z7.js +1882 -0
- package/dist/{chunk-EZG4H4MN.js → chunk-EMSMLPMK.js} +6 -7
- package/dist/{chunk-AMQQK4P7.js → chunk-JVKVSTQ3.js} +1 -2
- package/dist/{chunk-B6OWNBOD.js → chunk-RGJSXUFS.js} +274 -79
- package/dist/{chunk-RJURH22T.js → chunk-SKBAE26T.js} +0 -1
- package/dist/{chunk-2CY5SYBI.js → chunk-X7PAYQMT.js} +43 -11
- package/dist/chunk-ZOMGPIU6.js +377 -0
- package/dist/codebaseRetrieval-3Z4CRA7X.js +11 -0
- package/dist/{config-BWZ6CU3W.js → config-LCOJHTCF.js} +1 -2
- package/dist/db-PMVM7557.js +54 -0
- package/dist/index.js +37 -9
- package/dist/{lock-DVY3KJSK.js → lock-FL54LIQL.js} +2 -3
- package/dist/scanner-2XGJWYHR.js +11 -0
- package/dist/{server-PPQUHCUB.js → server-XK6EINRV.js} +4 -5
- package/dist/vectorStore-HPQZOVWF.js +12 -0
- package/package.json +5 -3
- package/dist/SearchService-533KL2HP.js +0 -1654
- package/dist/chunk-HR5KUQSM.js +0 -906
- package/dist/codebaseRetrieval-IC44RHCL.js +0 -12
- package/dist/scanner-SZ2BDYDS.js +0 -11
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import {
|
|
2
2
|
generateProjectId
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-RGJSXUFS.js";
|
|
4
4
|
import {
|
|
5
5
|
logger
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-JVKVSTQ3.js";
|
|
7
7
|
|
|
8
8
|
// src/mcp/tools/codebaseRetrieval.ts
|
|
9
9
|
import fs from "fs";
|
|
@@ -59,8 +59,8 @@ function isProjectIndexed(projectId) {
|
|
|
59
59
|
return fs.existsSync(dbPath);
|
|
60
60
|
}
|
|
61
61
|
async function ensureIndexed(repoPath, projectId, onProgress) {
|
|
62
|
-
const { withLock } = await import("./lock-
|
|
63
|
-
const { scan } = await import("./scanner-
|
|
62
|
+
const { withLock } = await import("./lock-FL54LIQL.js");
|
|
63
|
+
const { scan } = await import("./scanner-2XGJWYHR.js");
|
|
64
64
|
await withLock(
|
|
65
65
|
projectId,
|
|
66
66
|
"index",
|
|
@@ -105,7 +105,7 @@ async function handleCodebaseRetrieval(args, onProgress) {
|
|
|
105
105
|
},
|
|
106
106
|
"MCP codebase-retrieval \u8C03\u7528\u5F00\u59CB"
|
|
107
107
|
);
|
|
108
|
-
const { checkEmbeddingEnv, checkRerankerEnv } = await import("./config-
|
|
108
|
+
const { checkEmbeddingEnv, checkRerankerEnv } = await import("./config-LCOJHTCF.js");
|
|
109
109
|
const embeddingCheck = checkEmbeddingEnv();
|
|
110
110
|
const rerankerCheck = checkRerankerEnv();
|
|
111
111
|
const allMissingVars = [...embeddingCheck.missingVars, ...rerankerCheck.missingVars];
|
|
@@ -124,7 +124,7 @@ async function handleCodebaseRetrieval(args, onProgress) {
|
|
|
124
124
|
},
|
|
125
125
|
"MCP \u67E5\u8BE2\u6784\u5EFA"
|
|
126
126
|
);
|
|
127
|
-
const { SearchService } = await import("./SearchService-
|
|
127
|
+
const { SearchService } = await import("./SearchService-OS7CYHNJ.js");
|
|
128
128
|
const service = new SearchService(projectId, repo_path);
|
|
129
129
|
await service.init();
|
|
130
130
|
logger.debug("SearchService \u521D\u59CB\u5316\u5B8C\u6210");
|
|
@@ -283,4 +283,3 @@ export {
|
|
|
283
283
|
codebaseRetrievalSchema,
|
|
284
284
|
handleCodebaseRetrieval
|
|
285
285
|
};
|
|
286
|
-
//# sourceMappingURL=chunk-EZG4H4MN.js.map
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
isDev,
|
|
3
3
|
isMcpMode
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-SKBAE26T.js";
|
|
5
5
|
|
|
6
6
|
// src/utils/logger.ts
|
|
7
7
|
import fs from "fs";
|
|
@@ -168,4 +168,3 @@ export {
|
|
|
168
168
|
logger,
|
|
169
169
|
isDebugEnabled
|
|
170
170
|
};
|
|
171
|
-
//# sourceMappingURL=chunk-AMQQK4P7.js.map
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
import {
|
|
2
|
+
isDebugEnabled,
|
|
2
3
|
logger
|
|
3
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-JVKVSTQ3.js";
|
|
5
|
+
|
|
6
|
+
// src/db/index.ts
|
|
7
|
+
import crypto from "crypto";
|
|
8
|
+
import fs from "fs";
|
|
9
|
+
import os from "os";
|
|
10
|
+
import path from "path";
|
|
11
|
+
import Database from "better-sqlite3";
|
|
4
12
|
|
|
5
13
|
// src/search/fts.ts
|
|
6
14
|
var tokenizerCache = /* @__PURE__ */ new WeakMap();
|
|
@@ -25,7 +33,7 @@ function detectFtsTokenizer(db) {
|
|
|
25
33
|
function initFilesFts(db) {
|
|
26
34
|
const tokenizer = detectFtsTokenizer(db);
|
|
27
35
|
const tableExists = db.prepare(`
|
|
28
|
-
SELECT name FROM sqlite_master
|
|
36
|
+
SELECT name FROM sqlite_master
|
|
29
37
|
WHERE type='table' AND name='files_fts'
|
|
30
38
|
`).get();
|
|
31
39
|
if (!tableExists) {
|
|
@@ -33,25 +41,40 @@ function initFilesFts(db) {
|
|
|
33
41
|
CREATE VIRTUAL TABLE files_fts USING fts5(
|
|
34
42
|
path,
|
|
35
43
|
content,
|
|
44
|
+
content='files',
|
|
45
|
+
content_rowid='rowid',
|
|
36
46
|
tokenize='${tokenizer}'
|
|
37
47
|
);
|
|
38
48
|
`);
|
|
39
|
-
logger.info(`\u521B\u5EFA files_fts \u8868\uFF0Ctokenizer=${tokenizer}`);
|
|
40
|
-
syncFilesFts(db);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
function syncFilesFts(db) {
|
|
44
|
-
const fileCount = db.prepare("SELECT COUNT(*) as c FROM files WHERE content IS NOT NULL").get().c;
|
|
45
|
-
const ftsCount = db.prepare("SELECT COUNT(*) as c FROM files_fts").get().c;
|
|
46
|
-
if (ftsCount < fileCount) {
|
|
47
|
-
logger.info(`\u540C\u6B65 FTS \u7D22\u5F15: files=${fileCount}, fts=${ftsCount}`);
|
|
48
|
-
db.exec(`
|
|
49
|
-
DELETE FROM files_fts;
|
|
50
|
-
INSERT INTO files_fts(path, content)
|
|
51
|
-
SELECT path, content FROM files WHERE content IS NOT NULL;
|
|
52
|
-
`);
|
|
53
|
-
logger.info(`FTS \u7D22\u5F15\u540C\u6B65\u5B8C\u6210: ${fileCount} \u6761\u8BB0\u5F55`);
|
|
49
|
+
logger.info(`\u521B\u5EFA files_fts \u8868\uFF08\u5916\u90E8\u5185\u5BB9\u8868\uFF09\uFF0Ctokenizer=${tokenizer}`);
|
|
54
50
|
}
|
|
51
|
+
db.exec(`
|
|
52
|
+
CREATE TRIGGER IF NOT EXISTS files_ai
|
|
53
|
+
AFTER INSERT ON files
|
|
54
|
+
WHEN new.content IS NOT NULL
|
|
55
|
+
BEGIN
|
|
56
|
+
INSERT INTO files_fts(rowid, path, content)
|
|
57
|
+
VALUES (new.rowid, new.path, new.content);
|
|
58
|
+
END;
|
|
59
|
+
|
|
60
|
+
CREATE TRIGGER IF NOT EXISTS files_ad
|
|
61
|
+
AFTER DELETE ON files
|
|
62
|
+
WHEN old.content IS NOT NULL
|
|
63
|
+
BEGIN
|
|
64
|
+
INSERT INTO files_fts(files_fts, rowid, path, content)
|
|
65
|
+
VALUES('delete', old.rowid, old.path, old.content);
|
|
66
|
+
END;
|
|
67
|
+
|
|
68
|
+
CREATE TRIGGER IF NOT EXISTS files_au
|
|
69
|
+
AFTER UPDATE ON files
|
|
70
|
+
WHEN old.content IS NOT NULL OR new.content IS NOT NULL
|
|
71
|
+
BEGIN
|
|
72
|
+
INSERT INTO files_fts(files_fts, rowid, path, content)
|
|
73
|
+
SELECT 'delete', old.rowid, old.path, old.content WHERE old.content IS NOT NULL;
|
|
74
|
+
INSERT INTO files_fts(rowid, path, content)
|
|
75
|
+
SELECT new.rowid, new.path, new.content WHERE new.content IS NOT NULL;
|
|
76
|
+
END;
|
|
77
|
+
`);
|
|
55
78
|
}
|
|
56
79
|
function initChunksFts(db) {
|
|
57
80
|
const tokenizer = detectFtsTokenizer(db);
|
|
@@ -81,13 +104,17 @@ function isChunksFtsInitialized(db) {
|
|
|
81
104
|
return !!result;
|
|
82
105
|
}
|
|
83
106
|
function batchUpsertChunkFts(db, chunks) {
|
|
84
|
-
|
|
107
|
+
if (chunks.length === 0) return;
|
|
108
|
+
const paths = Array.from(new Set(chunks.map((c) => c.filePath)));
|
|
109
|
+
const deleteByPath = db.prepare("DELETE FROM chunks_fts WHERE file_path = ?");
|
|
85
110
|
const insertStmt = db.prepare(
|
|
86
111
|
"INSERT INTO chunks_fts(chunk_id, file_path, chunk_index, breadcrumb, content) VALUES (?, ?, ?, ?, ?)"
|
|
87
112
|
);
|
|
88
113
|
const transaction = db.transaction((items) => {
|
|
114
|
+
for (const p of paths) {
|
|
115
|
+
deleteByPath.run(p);
|
|
116
|
+
}
|
|
89
117
|
for (const item of items) {
|
|
90
|
-
deleteStmt.run(item.chunkId);
|
|
91
118
|
insertStmt.run(item.chunkId, item.filePath, item.chunkIndex, item.breadcrumb, item.content);
|
|
92
119
|
}
|
|
93
120
|
});
|
|
@@ -156,39 +183,21 @@ function searchChunksFts(db, query, limit) {
|
|
|
156
183
|
"Chunk FTS \u5BBD\u5BB9\u641C\u7D22\u8865\u5F55"
|
|
157
184
|
);
|
|
158
185
|
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
186
|
+
if (isDebugEnabled()) {
|
|
187
|
+
logger.debug(
|
|
188
|
+
{
|
|
189
|
+
chunkCount: results.length,
|
|
190
|
+
topChunks: results.slice(0, 5).map((r) => ({
|
|
191
|
+
path: r.filePath.split("/").slice(-2).join("/"),
|
|
192
|
+
chunkIndex: r.chunkIndex,
|
|
193
|
+
bm25: r.score.toFixed(3)
|
|
194
|
+
}))
|
|
195
|
+
},
|
|
196
|
+
"Chunk FTS \u53EC\u56DE\u7ED3\u679C"
|
|
197
|
+
);
|
|
198
|
+
}
|
|
170
199
|
return results.sort((a, b) => b.score - a.score);
|
|
171
200
|
}
|
|
172
|
-
function batchUpsertFileFts(db, files) {
|
|
173
|
-
const deleteFts = db.prepare("DELETE FROM files_fts WHERE path = ?");
|
|
174
|
-
const insertFts = db.prepare("INSERT INTO files_fts(path, content) VALUES (?, ?)");
|
|
175
|
-
const transaction = db.transaction((items) => {
|
|
176
|
-
for (const item of items) {
|
|
177
|
-
deleteFts.run(item.path);
|
|
178
|
-
insertFts.run(item.path, item.content);
|
|
179
|
-
}
|
|
180
|
-
});
|
|
181
|
-
transaction(files);
|
|
182
|
-
}
|
|
183
|
-
function batchDeleteFileFts(db, paths) {
|
|
184
|
-
const stmt = db.prepare("DELETE FROM files_fts WHERE path = ?");
|
|
185
|
-
const transaction = db.transaction((items) => {
|
|
186
|
-
for (const path2 of items) {
|
|
187
|
-
stmt.run(path2);
|
|
188
|
-
}
|
|
189
|
-
});
|
|
190
|
-
transaction(paths);
|
|
191
|
-
}
|
|
192
201
|
function sanitizeQuery(query) {
|
|
193
202
|
return query.replace(/[():"*^./\\:@#$%&=+[\]{}<>|~`!?,;]/g, " ").replace(/\b(AND|OR|NOT|NEAR)\b/gi, " ").replace(/\s+/g, " ").trim();
|
|
194
203
|
}
|
|
@@ -316,16 +325,18 @@ function searchFilesFts(db, query, limit) {
|
|
|
316
325
|
"FTS \u5BBD\u5BB9\u641C\u7D22\u8865\u5F55"
|
|
317
326
|
);
|
|
318
327
|
}
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
328
|
+
if (isDebugEnabled()) {
|
|
329
|
+
logger.debug(
|
|
330
|
+
{
|
|
331
|
+
fileCount: results.length,
|
|
332
|
+
topFiles: results.slice(0, 5).map((r) => ({
|
|
333
|
+
path: r.path.split("/").slice(-2).join("/"),
|
|
334
|
+
bm25: r.score.toFixed(3)
|
|
335
|
+
}))
|
|
336
|
+
},
|
|
337
|
+
"FTS \u53EC\u56DE\u7ED3\u679C"
|
|
338
|
+
);
|
|
339
|
+
}
|
|
329
340
|
return results.sort((a, b) => b.score - a.score);
|
|
330
341
|
}
|
|
331
342
|
function isFtsInitialized(db) {
|
|
@@ -337,11 +348,6 @@ function isFtsInitialized(db) {
|
|
|
337
348
|
}
|
|
338
349
|
|
|
339
350
|
// src/db/index.ts
|
|
340
|
-
import crypto from "crypto";
|
|
341
|
-
import fs from "fs";
|
|
342
|
-
import os from "os";
|
|
343
|
-
import path from "path";
|
|
344
|
-
import Database from "better-sqlite3";
|
|
345
351
|
var BASE_DIR = path.join(os.homedir(), ".contextweaver");
|
|
346
352
|
function getDirectoryBirthtime(projectPath) {
|
|
347
353
|
const gitDir = path.join(projectPath, ".git");
|
|
@@ -400,10 +406,156 @@ function initDb(projectId) {
|
|
|
400
406
|
value TEXT NOT NULL
|
|
401
407
|
)
|
|
402
408
|
`);
|
|
409
|
+
migrateSchema(db);
|
|
403
410
|
initFilesFts(db);
|
|
404
411
|
initChunksFts(db);
|
|
412
|
+
db.pragma("synchronous = NORMAL");
|
|
413
|
+
db.pragma("temp_store = MEMORY");
|
|
414
|
+
db.pragma("cache_size = -64000");
|
|
405
415
|
return db;
|
|
406
416
|
}
|
|
417
|
+
var CURRENT_SCHEMA_VERSION = 3;
|
|
418
|
+
var METADATA_KEY_SCHEMA_VERSION = "schema_version";
|
|
419
|
+
function getSchemaVersion(db) {
|
|
420
|
+
const row = db.prepare("SELECT value FROM metadata WHERE key = ?").get(METADATA_KEY_SCHEMA_VERSION);
|
|
421
|
+
if (!row) return null;
|
|
422
|
+
const parsed = parseInt(row.value, 10);
|
|
423
|
+
return Number.isNaN(parsed) ? null : parsed;
|
|
424
|
+
}
|
|
425
|
+
function setSchemaVersion(db, version) {
|
|
426
|
+
db.prepare(`
|
|
427
|
+
INSERT INTO metadata (key, value)
|
|
428
|
+
VALUES (?, ?)
|
|
429
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value
|
|
430
|
+
`).run(METADATA_KEY_SCHEMA_VERSION, String(version));
|
|
431
|
+
}
|
|
432
|
+
function isOldFilesFtsSchema(db) {
|
|
433
|
+
const row = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
|
|
434
|
+
if (!row?.sql) return false;
|
|
435
|
+
return !row.sql.includes("content='files'");
|
|
436
|
+
}
|
|
437
|
+
function migrateSchema(db) {
|
|
438
|
+
const backupExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts_v1_backup'`).get();
|
|
439
|
+
const currentFtsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
|
|
440
|
+
if (backupExists && currentFtsExists && !isOldFilesFtsSchema(db)) {
|
|
441
|
+
logger.warn("\u68C0\u6D4B\u5230\u6B8B\u7559\u5907\u4EFD\u8868 files_fts_v1_backup\uFF0C\u6E05\u7406\u4E2D");
|
|
442
|
+
db.exec("DROP TABLE files_fts_v1_backup");
|
|
443
|
+
}
|
|
444
|
+
const current = getSchemaVersion(db);
|
|
445
|
+
if (current === null) {
|
|
446
|
+
const fileCount = db.prepare("SELECT COUNT(*) as c FROM files").get().c;
|
|
447
|
+
const ftsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
|
|
448
|
+
if (fileCount === 0 && !ftsExists) {
|
|
449
|
+
migrateToV3(db);
|
|
450
|
+
setSchemaVersion(db, CURRENT_SCHEMA_VERSION);
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
if ((current ?? 1) < 2) {
|
|
455
|
+
migrateToV2(db);
|
|
456
|
+
setSchemaVersion(db, 2);
|
|
457
|
+
}
|
|
458
|
+
if ((current ?? 2) < 3) {
|
|
459
|
+
migrateToV3(db);
|
|
460
|
+
setSchemaVersion(db, 3);
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
function migrateToV2(db) {
|
|
464
|
+
const ftsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
|
|
465
|
+
if (!ftsExists) {
|
|
466
|
+
return;
|
|
467
|
+
}
|
|
468
|
+
if (!isOldFilesFtsSchema(db)) {
|
|
469
|
+
return;
|
|
470
|
+
}
|
|
471
|
+
logger.info("\u6267\u884C schema \u8FC1\u79FB v1 \u2192 v2: files_fts \u8F6C\u4E3A\u5916\u90E8\u5185\u5BB9\u8868");
|
|
472
|
+
db.exec("DROP TABLE files_fts");
|
|
473
|
+
let tokenizer;
|
|
474
|
+
try {
|
|
475
|
+
db.exec(
|
|
476
|
+
`CREATE VIRTUAL TABLE IF NOT EXISTS _fts_probe USING fts5(content, tokenize='trigram');
|
|
477
|
+
DROP TABLE IF EXISTS _fts_probe;`
|
|
478
|
+
);
|
|
479
|
+
tokenizer = "trigram";
|
|
480
|
+
} catch {
|
|
481
|
+
tokenizer = "unicode61";
|
|
482
|
+
}
|
|
483
|
+
db.exec(`
|
|
484
|
+
CREATE VIRTUAL TABLE files_fts USING fts5(
|
|
485
|
+
path,
|
|
486
|
+
content,
|
|
487
|
+
content='files',
|
|
488
|
+
content_rowid='rowid',
|
|
489
|
+
tokenize='${tokenizer}'
|
|
490
|
+
);
|
|
491
|
+
`);
|
|
492
|
+
db.exec(`INSERT INTO files_fts(files_fts) VALUES('rebuild')`);
|
|
493
|
+
logger.info("schema \u8FC1\u79FB v1 \u2192 v2 \u5B8C\u6210");
|
|
494
|
+
}
|
|
495
|
+
function migrateToV3(db) {
|
|
496
|
+
db.exec(`
|
|
497
|
+
CREATE TABLE IF NOT EXISTS pending_marks (
|
|
498
|
+
path TEXT PRIMARY KEY,
|
|
499
|
+
hash TEXT NOT NULL,
|
|
500
|
+
created_at INTEGER NOT NULL
|
|
501
|
+
);
|
|
502
|
+
`);
|
|
503
|
+
logger.info("schema \u8FC1\u79FB v2 \u2192 v3 \u5B8C\u6210: pending_marks \u8868\u5DF2\u521B\u5EFA");
|
|
504
|
+
}
|
|
505
|
+
function insertPendingMarks(db, items) {
|
|
506
|
+
if (items.length === 0) return;
|
|
507
|
+
const now = Date.now();
|
|
508
|
+
const insert = db.prepare(`
|
|
509
|
+
INSERT INTO pending_marks (path, hash, created_at)
|
|
510
|
+
VALUES (?, ?, ?)
|
|
511
|
+
ON CONFLICT(path) DO UPDATE SET hash = excluded.hash, created_at = excluded.created_at
|
|
512
|
+
`);
|
|
513
|
+
const tx = db.transaction((data) => {
|
|
514
|
+
for (const it of data) {
|
|
515
|
+
insert.run(it.path, it.hash, now);
|
|
516
|
+
}
|
|
517
|
+
});
|
|
518
|
+
tx(items);
|
|
519
|
+
}
|
|
520
|
+
function deletePendingMarks(db, paths) {
|
|
521
|
+
if (paths.length === 0) return;
|
|
522
|
+
const del = db.prepare("DELETE FROM pending_marks WHERE path = ?");
|
|
523
|
+
const tx = db.transaction((items) => {
|
|
524
|
+
for (const p of items) del.run(p);
|
|
525
|
+
});
|
|
526
|
+
tx(paths);
|
|
527
|
+
}
|
|
528
|
+
function replayPendingMarks(db) {
|
|
529
|
+
const rows = db.prepare("SELECT path, hash FROM pending_marks").all();
|
|
530
|
+
if (rows.length === 0) return { applied: 0, discarded: 0 };
|
|
531
|
+
const update = db.prepare(`
|
|
532
|
+
UPDATE files SET vector_index_hash = ?
|
|
533
|
+
WHERE path = ? AND hash = ?
|
|
534
|
+
`);
|
|
535
|
+
const del = db.prepare("DELETE FROM pending_marks WHERE path = ?");
|
|
536
|
+
let applied = 0;
|
|
537
|
+
let discarded = 0;
|
|
538
|
+
const tx = db.transaction(() => {
|
|
539
|
+
for (const r of rows) {
|
|
540
|
+
const info = update.run(r.hash, r.path, r.hash);
|
|
541
|
+
if (info.changes > 0) {
|
|
542
|
+
applied++;
|
|
543
|
+
} else {
|
|
544
|
+
discarded++;
|
|
545
|
+
}
|
|
546
|
+
del.run(r.path);
|
|
547
|
+
}
|
|
548
|
+
});
|
|
549
|
+
tx();
|
|
550
|
+
if (applied > 0 || discarded > 0) {
|
|
551
|
+
logger.info({ applied, discarded }, "pending_marks \u91CD\u653E\u5B8C\u6210");
|
|
552
|
+
}
|
|
553
|
+
return { applied, discarded };
|
|
554
|
+
}
|
|
555
|
+
function countPendingMarks(db) {
|
|
556
|
+
const row = db.prepare("SELECT COUNT(*) as c FROM pending_marks").get();
|
|
557
|
+
return row.c;
|
|
558
|
+
}
|
|
407
559
|
function closeDb(db) {
|
|
408
560
|
db.close();
|
|
409
561
|
}
|
|
@@ -459,15 +611,6 @@ function batchUpsert(db, files) {
|
|
|
459
611
|
}
|
|
460
612
|
});
|
|
461
613
|
transaction(files);
|
|
462
|
-
const ftsFiles = [];
|
|
463
|
-
for (const f of files) {
|
|
464
|
-
if (f.content !== null) {
|
|
465
|
-
ftsFiles.push({ path: f.path, content: f.content });
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
if (ftsFiles.length > 0) {
|
|
469
|
-
batchUpsertFileFts(db, ftsFiles);
|
|
470
|
-
}
|
|
471
614
|
}
|
|
472
615
|
function batchUpdateMtime(db, items) {
|
|
473
616
|
const update = db.prepare("UPDATE files SET mtime = ? WHERE path = ?");
|
|
@@ -490,14 +633,16 @@ function batchDelete(db, paths) {
|
|
|
490
633
|
}
|
|
491
634
|
});
|
|
492
635
|
transaction(paths);
|
|
493
|
-
if (paths.length > 0) {
|
|
494
|
-
batchDeleteFileFts(db, paths);
|
|
495
|
-
}
|
|
496
636
|
}
|
|
497
637
|
function clear(db) {
|
|
498
638
|
db.exec("DELETE FROM files");
|
|
639
|
+
db.exec("DELETE FROM files_fts");
|
|
640
|
+
db.exec("DELETE FROM chunks_fts");
|
|
499
641
|
}
|
|
500
642
|
var METADATA_KEY_EMBEDDING_DIMENSIONS = "embedding_dimensions";
|
|
643
|
+
var METADATA_KEY_LANCEDB_MIGRATION_STATE = "lancedb_migration_displaycode_state";
|
|
644
|
+
var METADATA_KEY_LANCEDB_MIGRATION_LOCK = "lancedb_migration_lock";
|
|
645
|
+
var MIGRATION_LOCK_STALE_MS = 10 * 60 * 1e3;
|
|
501
646
|
function getMetadata(db, key) {
|
|
502
647
|
const row = db.prepare("SELECT value FROM metadata WHERE key = ?").get(key);
|
|
503
648
|
return row?.value ?? null;
|
|
@@ -518,6 +663,47 @@ function getStoredEmbeddingDimensions(db) {
|
|
|
518
663
|
function setStoredEmbeddingDimensions(db, dimensions) {
|
|
519
664
|
setMetadata(db, METADATA_KEY_EMBEDDING_DIMENSIONS, String(dimensions));
|
|
520
665
|
}
|
|
666
|
+
function getLanceDbMigrationState(db) {
|
|
667
|
+
const value = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_STATE);
|
|
668
|
+
if (value === "pending" || value === "done" || value === "aborted") return value;
|
|
669
|
+
return null;
|
|
670
|
+
}
|
|
671
|
+
function setLanceDbMigrationState(db, state) {
|
|
672
|
+
setMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_STATE, state);
|
|
673
|
+
}
|
|
674
|
+
function clearAllVectorIndexHash(db) {
|
|
675
|
+
const info = db.prepare("UPDATE files SET vector_index_hash = NULL").run();
|
|
676
|
+
return info.changes;
|
|
677
|
+
}
|
|
678
|
+
function tryAcquireLanceDbMigrationLock(db) {
|
|
679
|
+
const now = Date.now();
|
|
680
|
+
const pid = process.pid;
|
|
681
|
+
const lockValue = JSON.stringify({ pid, acquiredAt: now });
|
|
682
|
+
const existing = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_LOCK);
|
|
683
|
+
if (existing) {
|
|
684
|
+
try {
|
|
685
|
+
const parsed = JSON.parse(existing);
|
|
686
|
+
if (parsed.pid === pid) return true;
|
|
687
|
+
if (now - parsed.acquiredAt < MIGRATION_LOCK_STALE_MS) return false;
|
|
688
|
+
logger.warn(
|
|
689
|
+
{ stalePid: parsed.pid, age: now - parsed.acquiredAt },
|
|
690
|
+
"\u68C0\u6D4B\u5230\u50F5\u5C38\u8FC1\u79FB\u9501\uFF0C\u5F3A\u5236\u593A\u53D6"
|
|
691
|
+
);
|
|
692
|
+
} catch {
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
db.prepare(`
|
|
696
|
+
INSERT INTO metadata (key, value)
|
|
697
|
+
VALUES (?, ?)
|
|
698
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value
|
|
699
|
+
`).run(METADATA_KEY_LANCEDB_MIGRATION_LOCK, lockValue);
|
|
700
|
+
const reread = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_LOCK);
|
|
701
|
+
if (reread !== lockValue) return false;
|
|
702
|
+
return true;
|
|
703
|
+
}
|
|
704
|
+
function releaseLanceDbMigrationLock(db) {
|
|
705
|
+
db.prepare("DELETE FROM metadata WHERE key = ?").run(METADATA_KEY_LANCEDB_MIGRATION_LOCK);
|
|
706
|
+
}
|
|
521
707
|
|
|
522
708
|
export {
|
|
523
709
|
isChunksFtsInitialized,
|
|
@@ -529,6 +715,11 @@ export {
|
|
|
529
715
|
isFtsInitialized,
|
|
530
716
|
generateProjectId,
|
|
531
717
|
initDb,
|
|
718
|
+
migrateSchema,
|
|
719
|
+
insertPendingMarks,
|
|
720
|
+
deletePendingMarks,
|
|
721
|
+
replayPendingMarks,
|
|
722
|
+
countPendingMarks,
|
|
532
723
|
closeDb,
|
|
533
724
|
getAllFileMeta,
|
|
534
725
|
getFilesNeedingVectorIndex,
|
|
@@ -540,6 +731,10 @@ export {
|
|
|
540
731
|
batchDelete,
|
|
541
732
|
clear,
|
|
542
733
|
getStoredEmbeddingDimensions,
|
|
543
|
-
setStoredEmbeddingDimensions
|
|
734
|
+
setStoredEmbeddingDimensions,
|
|
735
|
+
getLanceDbMigrationState,
|
|
736
|
+
setLanceDbMigrationState,
|
|
737
|
+
clearAllVectorIndexHash,
|
|
738
|
+
tryAcquireLanceDbMigrationLock,
|
|
739
|
+
releaseLanceDbMigrationLock
|
|
544
740
|
};
|
|
545
|
-
//# sourceMappingURL=chunk-B6OWNBOD.js.map
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import {
|
|
2
2
|
closeAllIndexers,
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
} from "./chunk-
|
|
3
|
+
getIndexer,
|
|
4
|
+
invalidateAllExpanderCaches
|
|
5
|
+
} from "./chunk-AB24E3Z7.js";
|
|
6
|
+
import {
|
|
7
|
+
closeAllVectorStores
|
|
8
|
+
} from "./chunk-ZOMGPIU6.js";
|
|
6
9
|
import {
|
|
7
10
|
batchDelete,
|
|
8
11
|
batchUpdateMtime,
|
|
@@ -16,14 +19,14 @@ import {
|
|
|
16
19
|
getStoredEmbeddingDimensions,
|
|
17
20
|
initDb,
|
|
18
21
|
setStoredEmbeddingDimensions
|
|
19
|
-
} from "./chunk-
|
|
22
|
+
} from "./chunk-RGJSXUFS.js";
|
|
20
23
|
import {
|
|
21
24
|
logger
|
|
22
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-JVKVSTQ3.js";
|
|
23
26
|
import {
|
|
24
27
|
getEmbeddingConfig,
|
|
25
28
|
getExcludePatterns
|
|
26
|
-
} from "./chunk-
|
|
29
|
+
} from "./chunk-SKBAE26T.js";
|
|
27
30
|
|
|
28
31
|
// src/scanner/index.ts
|
|
29
32
|
import path3 from "path";
|
|
@@ -522,6 +525,19 @@ var SourceAdapter = class {
|
|
|
522
525
|
/**
|
|
523
526
|
* 将字节偏移转换为字符偏移
|
|
524
527
|
*/
|
|
528
|
+
/**
|
|
529
|
+
* 将 tree-sitter 返回的偏移(可能是 UTF-8 字节或 UTF-16 字符域)
|
|
530
|
+
* 标准化为 UTF-16 字符域偏移。下游 String.prototype.slice 直接可用。
|
|
531
|
+
*
|
|
532
|
+
* 导出供 SemanticSplitter 在生成 ChunkMetadata 时统一域。
|
|
533
|
+
*/
|
|
534
|
+
toCharOffset(offset) {
|
|
535
|
+
if (this.domain === "utf16" || this.domain === "unknown") return offset;
|
|
536
|
+
return this.byteToChar(offset);
|
|
537
|
+
}
|
|
538
|
+
/**
|
|
539
|
+
* 将字节偏移转换为字符偏移(仅 utf8 域有效;utf16/unknown 直接返回原值)
|
|
540
|
+
*/
|
|
525
541
|
byteToChar(byteOffset) {
|
|
526
542
|
if (!this.byteToCharMap) return byteOffset;
|
|
527
543
|
const safeOffset = Math.max(0, Math.min(this.byteToCharMap.length - 1, byteOffset));
|
|
@@ -914,11 +930,12 @@ ${displayCode}`,
|
|
|
914
930
|
const vectorEnd = end;
|
|
915
931
|
const displayCode = this.adapter.slice(start, end);
|
|
916
932
|
const vectorCode = this.adapter.slice(vectorStart, vectorEnd);
|
|
933
|
+
const toChar = (n) => this.adapter.toCharOffset(n);
|
|
917
934
|
const metadata = {
|
|
918
|
-
startIndex: start,
|
|
919
|
-
endIndex: end,
|
|
920
|
-
rawSpan: { start: prevEnd, end: rawSpanEnd },
|
|
921
|
-
vectorSpan: { start: vectorStart, end: vectorEnd },
|
|
935
|
+
startIndex: toChar(start),
|
|
936
|
+
endIndex: toChar(end),
|
|
937
|
+
rawSpan: { start: toChar(prevEnd), end: toChar(rawSpanEnd) },
|
|
938
|
+
vectorSpan: { start: toChar(vectorStart), end: toChar(vectorEnd) },
|
|
922
939
|
filePath,
|
|
923
940
|
language,
|
|
924
941
|
contextPath: w.contextPath
|
|
@@ -1350,6 +1367,22 @@ async function scan(rootPath, options = {}) {
|
|
|
1350
1367
|
}
|
|
1351
1368
|
}
|
|
1352
1369
|
options.onProgress?.(100, 100, "\u7D22\u5F15\u5B8C\u6210");
|
|
1370
|
+
if (options.vectorIndex !== false) {
|
|
1371
|
+
try {
|
|
1372
|
+
const embeddingConfig = getEmbeddingConfig();
|
|
1373
|
+
const indexer = await getIndexer(projectId, embeddingConfig.dimensions);
|
|
1374
|
+
const gcResult = await indexer.gc(db);
|
|
1375
|
+
if (gcResult.orphans > 0) {
|
|
1376
|
+
logger.info({ orphans: gcResult.orphans }, "GC \u5B8C\u6210");
|
|
1377
|
+
} else if (gcResult.truncated) {
|
|
1378
|
+
logger.debug("GC \u8D85\u65F6\u8DF3\u8FC7\uFF0C\u4E0B\u6B21\u626B\u63CF\u91CD\u8BD5");
|
|
1379
|
+
}
|
|
1380
|
+
} catch (err) {
|
|
1381
|
+
const error = err;
|
|
1382
|
+
logger.warn({ error: error.message }, "GC \u8DF3\u8FC7");
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
invalidateAllExpanderCaches();
|
|
1353
1386
|
return stats;
|
|
1354
1387
|
} finally {
|
|
1355
1388
|
closeDb(db);
|
|
@@ -1361,4 +1394,3 @@ async function scan(rootPath, options = {}) {
|
|
|
1361
1394
|
export {
|
|
1362
1395
|
scan
|
|
1363
1396
|
};
|
|
1364
|
-
//# sourceMappingURL=chunk-2CY5SYBI.js.map
|