@chiway/contextweaver 1.0.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  generateProjectId
3
- } from "./chunk-B6OWNBOD.js";
3
+ } from "./chunk-RGJSXUFS.js";
4
4
  import {
5
5
  logger
6
- } from "./chunk-AMQQK4P7.js";
6
+ } from "./chunk-JVKVSTQ3.js";
7
7
 
8
8
  // src/mcp/tools/codebaseRetrieval.ts
9
9
  import fs from "fs";
@@ -59,8 +59,8 @@ function isProjectIndexed(projectId) {
59
59
  return fs.existsSync(dbPath);
60
60
  }
61
61
  async function ensureIndexed(repoPath, projectId, onProgress) {
62
- const { withLock } = await import("./lock-DVY3KJSK.js");
63
- const { scan } = await import("./scanner-SZ2BDYDS.js");
62
+ const { withLock } = await import("./lock-FL54LIQL.js");
63
+ const { scan } = await import("./scanner-2XGJWYHR.js");
64
64
  await withLock(
65
65
  projectId,
66
66
  "index",
@@ -105,7 +105,7 @@ async function handleCodebaseRetrieval(args, onProgress) {
105
105
  },
106
106
  "MCP codebase-retrieval \u8C03\u7528\u5F00\u59CB"
107
107
  );
108
- const { checkEmbeddingEnv, checkRerankerEnv } = await import("./config-BWZ6CU3W.js");
108
+ const { checkEmbeddingEnv, checkRerankerEnv } = await import("./config-LCOJHTCF.js");
109
109
  const embeddingCheck = checkEmbeddingEnv();
110
110
  const rerankerCheck = checkRerankerEnv();
111
111
  const allMissingVars = [...embeddingCheck.missingVars, ...rerankerCheck.missingVars];
@@ -124,7 +124,7 @@ async function handleCodebaseRetrieval(args, onProgress) {
124
124
  },
125
125
  "MCP \u67E5\u8BE2\u6784\u5EFA"
126
126
  );
127
- const { SearchService } = await import("./SearchService-533KL2HP.js");
127
+ const { SearchService } = await import("./SearchService-OS7CYHNJ.js");
128
128
  const service = new SearchService(projectId, repo_path);
129
129
  await service.init();
130
130
  logger.debug("SearchService \u521D\u59CB\u5316\u5B8C\u6210");
@@ -283,4 +283,3 @@ export {
283
283
  codebaseRetrievalSchema,
284
284
  handleCodebaseRetrieval
285
285
  };
286
- //# sourceMappingURL=chunk-EZG4H4MN.js.map
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  isDev,
3
3
  isMcpMode
4
- } from "./chunk-RJURH22T.js";
4
+ } from "./chunk-SKBAE26T.js";
5
5
 
6
6
  // src/utils/logger.ts
7
7
  import fs from "fs";
@@ -168,4 +168,3 @@ export {
168
168
  logger,
169
169
  isDebugEnabled
170
170
  };
171
- //# sourceMappingURL=chunk-AMQQK4P7.js.map
@@ -1,6 +1,14 @@
1
1
  import {
2
+ isDebugEnabled,
2
3
  logger
3
- } from "./chunk-AMQQK4P7.js";
4
+ } from "./chunk-JVKVSTQ3.js";
5
+
6
+ // src/db/index.ts
7
+ import crypto from "crypto";
8
+ import fs from "fs";
9
+ import os from "os";
10
+ import path from "path";
11
+ import Database from "better-sqlite3";
4
12
 
5
13
  // src/search/fts.ts
6
14
  var tokenizerCache = /* @__PURE__ */ new WeakMap();
@@ -25,7 +33,7 @@ function detectFtsTokenizer(db) {
25
33
  function initFilesFts(db) {
26
34
  const tokenizer = detectFtsTokenizer(db);
27
35
  const tableExists = db.prepare(`
28
- SELECT name FROM sqlite_master
36
+ SELECT name FROM sqlite_master
29
37
  WHERE type='table' AND name='files_fts'
30
38
  `).get();
31
39
  if (!tableExists) {
@@ -33,25 +41,40 @@ function initFilesFts(db) {
33
41
  CREATE VIRTUAL TABLE files_fts USING fts5(
34
42
  path,
35
43
  content,
44
+ content='files',
45
+ content_rowid='rowid',
36
46
  tokenize='${tokenizer}'
37
47
  );
38
48
  `);
39
- logger.info(`\u521B\u5EFA files_fts \u8868\uFF0Ctokenizer=${tokenizer}`);
40
- syncFilesFts(db);
41
- }
42
- }
43
- function syncFilesFts(db) {
44
- const fileCount = db.prepare("SELECT COUNT(*) as c FROM files WHERE content IS NOT NULL").get().c;
45
- const ftsCount = db.prepare("SELECT COUNT(*) as c FROM files_fts").get().c;
46
- if (ftsCount < fileCount) {
47
- logger.info(`\u540C\u6B65 FTS \u7D22\u5F15: files=${fileCount}, fts=${ftsCount}`);
48
- db.exec(`
49
- DELETE FROM files_fts;
50
- INSERT INTO files_fts(path, content)
51
- SELECT path, content FROM files WHERE content IS NOT NULL;
52
- `);
53
- logger.info(`FTS \u7D22\u5F15\u540C\u6B65\u5B8C\u6210: ${fileCount} \u6761\u8BB0\u5F55`);
49
+ logger.info(`\u521B\u5EFA files_fts \u8868\uFF08\u5916\u90E8\u5185\u5BB9\u8868\uFF09\uFF0Ctokenizer=${tokenizer}`);
54
50
  }
51
+ db.exec(`
52
+ CREATE TRIGGER IF NOT EXISTS files_ai
53
+ AFTER INSERT ON files
54
+ WHEN new.content IS NOT NULL
55
+ BEGIN
56
+ INSERT INTO files_fts(rowid, path, content)
57
+ VALUES (new.rowid, new.path, new.content);
58
+ END;
59
+
60
+ CREATE TRIGGER IF NOT EXISTS files_ad
61
+ AFTER DELETE ON files
62
+ WHEN old.content IS NOT NULL
63
+ BEGIN
64
+ INSERT INTO files_fts(files_fts, rowid, path, content)
65
+ VALUES('delete', old.rowid, old.path, old.content);
66
+ END;
67
+
68
+ CREATE TRIGGER IF NOT EXISTS files_au
69
+ AFTER UPDATE ON files
70
+ WHEN old.content IS NOT NULL OR new.content IS NOT NULL
71
+ BEGIN
72
+ INSERT INTO files_fts(files_fts, rowid, path, content)
73
+ SELECT 'delete', old.rowid, old.path, old.content WHERE old.content IS NOT NULL;
74
+ INSERT INTO files_fts(rowid, path, content)
75
+ SELECT new.rowid, new.path, new.content WHERE new.content IS NOT NULL;
76
+ END;
77
+ `);
55
78
  }
56
79
  function initChunksFts(db) {
57
80
  const tokenizer = detectFtsTokenizer(db);
@@ -81,13 +104,17 @@ function isChunksFtsInitialized(db) {
81
104
  return !!result;
82
105
  }
83
106
  function batchUpsertChunkFts(db, chunks) {
84
- const deleteStmt = db.prepare("DELETE FROM chunks_fts WHERE chunk_id = ?");
107
+ if (chunks.length === 0) return;
108
+ const paths = Array.from(new Set(chunks.map((c) => c.filePath)));
109
+ const deleteByPath = db.prepare("DELETE FROM chunks_fts WHERE file_path = ?");
85
110
  const insertStmt = db.prepare(
86
111
  "INSERT INTO chunks_fts(chunk_id, file_path, chunk_index, breadcrumb, content) VALUES (?, ?, ?, ?, ?)"
87
112
  );
88
113
  const transaction = db.transaction((items) => {
114
+ for (const p of paths) {
115
+ deleteByPath.run(p);
116
+ }
89
117
  for (const item of items) {
90
- deleteStmt.run(item.chunkId);
91
118
  insertStmt.run(item.chunkId, item.filePath, item.chunkIndex, item.breadcrumb, item.content);
92
119
  }
93
120
  });
@@ -156,39 +183,21 @@ function searchChunksFts(db, query, limit) {
156
183
  "Chunk FTS \u5BBD\u5BB9\u641C\u7D22\u8865\u5F55"
157
184
  );
158
185
  }
159
- logger.debug(
160
- {
161
- chunkCount: results.length,
162
- topChunks: results.slice(0, 5).map((r) => ({
163
- path: r.filePath.split("/").slice(-2).join("/"),
164
- chunkIndex: r.chunkIndex,
165
- bm25: r.score.toFixed(3)
166
- }))
167
- },
168
- "Chunk FTS \u53EC\u56DE\u7ED3\u679C"
169
- );
186
+ if (isDebugEnabled()) {
187
+ logger.debug(
188
+ {
189
+ chunkCount: results.length,
190
+ topChunks: results.slice(0, 5).map((r) => ({
191
+ path: r.filePath.split("/").slice(-2).join("/"),
192
+ chunkIndex: r.chunkIndex,
193
+ bm25: r.score.toFixed(3)
194
+ }))
195
+ },
196
+ "Chunk FTS \u53EC\u56DE\u7ED3\u679C"
197
+ );
198
+ }
170
199
  return results.sort((a, b) => b.score - a.score);
171
200
  }
172
- function batchUpsertFileFts(db, files) {
173
- const deleteFts = db.prepare("DELETE FROM files_fts WHERE path = ?");
174
- const insertFts = db.prepare("INSERT INTO files_fts(path, content) VALUES (?, ?)");
175
- const transaction = db.transaction((items) => {
176
- for (const item of items) {
177
- deleteFts.run(item.path);
178
- insertFts.run(item.path, item.content);
179
- }
180
- });
181
- transaction(files);
182
- }
183
- function batchDeleteFileFts(db, paths) {
184
- const stmt = db.prepare("DELETE FROM files_fts WHERE path = ?");
185
- const transaction = db.transaction((items) => {
186
- for (const path2 of items) {
187
- stmt.run(path2);
188
- }
189
- });
190
- transaction(paths);
191
- }
192
201
  function sanitizeQuery(query) {
193
202
  return query.replace(/[():"*^./\\:@#$%&=+[\]{}<>|~`!?,;]/g, " ").replace(/\b(AND|OR|NOT|NEAR)\b/gi, " ").replace(/\s+/g, " ").trim();
194
203
  }
@@ -316,16 +325,18 @@ function searchFilesFts(db, query, limit) {
316
325
  "FTS \u5BBD\u5BB9\u641C\u7D22\u8865\u5F55"
317
326
  );
318
327
  }
319
- logger.debug(
320
- {
321
- fileCount: results.length,
322
- topFiles: results.slice(0, 5).map((r) => ({
323
- path: r.path.split("/").slice(-2).join("/"),
324
- bm25: r.score.toFixed(3)
325
- }))
326
- },
327
- "FTS \u53EC\u56DE\u7ED3\u679C"
328
- );
328
+ if (isDebugEnabled()) {
329
+ logger.debug(
330
+ {
331
+ fileCount: results.length,
332
+ topFiles: results.slice(0, 5).map((r) => ({
333
+ path: r.path.split("/").slice(-2).join("/"),
334
+ bm25: r.score.toFixed(3)
335
+ }))
336
+ },
337
+ "FTS \u53EC\u56DE\u7ED3\u679C"
338
+ );
339
+ }
329
340
  return results.sort((a, b) => b.score - a.score);
330
341
  }
331
342
  function isFtsInitialized(db) {
@@ -337,11 +348,6 @@ function isFtsInitialized(db) {
337
348
  }
338
349
 
339
350
  // src/db/index.ts
340
- import crypto from "crypto";
341
- import fs from "fs";
342
- import os from "os";
343
- import path from "path";
344
- import Database from "better-sqlite3";
345
351
  var BASE_DIR = path.join(os.homedir(), ".contextweaver");
346
352
  function getDirectoryBirthtime(projectPath) {
347
353
  const gitDir = path.join(projectPath, ".git");
@@ -400,10 +406,156 @@ function initDb(projectId) {
400
406
  value TEXT NOT NULL
401
407
  )
402
408
  `);
409
+ migrateSchema(db);
403
410
  initFilesFts(db);
404
411
  initChunksFts(db);
412
+ db.pragma("synchronous = NORMAL");
413
+ db.pragma("temp_store = MEMORY");
414
+ db.pragma("cache_size = -64000");
405
415
  return db;
406
416
  }
417
+ var CURRENT_SCHEMA_VERSION = 3;
418
+ var METADATA_KEY_SCHEMA_VERSION = "schema_version";
419
+ function getSchemaVersion(db) {
420
+ const row = db.prepare("SELECT value FROM metadata WHERE key = ?").get(METADATA_KEY_SCHEMA_VERSION);
421
+ if (!row) return null;
422
+ const parsed = parseInt(row.value, 10);
423
+ return Number.isNaN(parsed) ? null : parsed;
424
+ }
425
+ function setSchemaVersion(db, version) {
426
+ db.prepare(`
427
+ INSERT INTO metadata (key, value)
428
+ VALUES (?, ?)
429
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value
430
+ `).run(METADATA_KEY_SCHEMA_VERSION, String(version));
431
+ }
432
+ function isOldFilesFtsSchema(db) {
433
+ const row = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
434
+ if (!row?.sql) return false;
435
+ return !row.sql.includes("content='files'");
436
+ }
437
+ function migrateSchema(db) {
438
+ const backupExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts_v1_backup'`).get();
439
+ const currentFtsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
440
+ if (backupExists && currentFtsExists && !isOldFilesFtsSchema(db)) {
441
+ logger.warn("\u68C0\u6D4B\u5230\u6B8B\u7559\u5907\u4EFD\u8868 files_fts_v1_backup\uFF0C\u6E05\u7406\u4E2D");
442
+ db.exec("DROP TABLE files_fts_v1_backup");
443
+ }
444
+ const current = getSchemaVersion(db);
445
+ if (current === null) {
446
+ const fileCount = db.prepare("SELECT COUNT(*) as c FROM files").get().c;
447
+ const ftsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
448
+ if (fileCount === 0 && !ftsExists) {
449
+ migrateToV3(db);
450
+ setSchemaVersion(db, CURRENT_SCHEMA_VERSION);
451
+ return;
452
+ }
453
+ }
454
+ if ((current ?? 1) < 2) {
455
+ migrateToV2(db);
456
+ setSchemaVersion(db, 2);
457
+ }
458
+ if ((current ?? 2) < 3) {
459
+ migrateToV3(db);
460
+ setSchemaVersion(db, 3);
461
+ }
462
+ }
463
+ function migrateToV2(db) {
464
+ const ftsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
465
+ if (!ftsExists) {
466
+ return;
467
+ }
468
+ if (!isOldFilesFtsSchema(db)) {
469
+ return;
470
+ }
471
+ logger.info("\u6267\u884C schema \u8FC1\u79FB v1 \u2192 v2: files_fts \u8F6C\u4E3A\u5916\u90E8\u5185\u5BB9\u8868");
472
+ db.exec("DROP TABLE files_fts");
473
+ let tokenizer;
474
+ try {
475
+ db.exec(
476
+ `CREATE VIRTUAL TABLE IF NOT EXISTS _fts_probe USING fts5(content, tokenize='trigram');
477
+ DROP TABLE IF EXISTS _fts_probe;`
478
+ );
479
+ tokenizer = "trigram";
480
+ } catch {
481
+ tokenizer = "unicode61";
482
+ }
483
+ db.exec(`
484
+ CREATE VIRTUAL TABLE files_fts USING fts5(
485
+ path,
486
+ content,
487
+ content='files',
488
+ content_rowid='rowid',
489
+ tokenize='${tokenizer}'
490
+ );
491
+ `);
492
+ db.exec(`INSERT INTO files_fts(files_fts) VALUES('rebuild')`);
493
+ logger.info("schema \u8FC1\u79FB v1 \u2192 v2 \u5B8C\u6210");
494
+ }
495
+ function migrateToV3(db) {
496
+ db.exec(`
497
+ CREATE TABLE IF NOT EXISTS pending_marks (
498
+ path TEXT PRIMARY KEY,
499
+ hash TEXT NOT NULL,
500
+ created_at INTEGER NOT NULL
501
+ );
502
+ `);
503
+ logger.info("schema \u8FC1\u79FB v2 \u2192 v3 \u5B8C\u6210: pending_marks \u8868\u5DF2\u521B\u5EFA");
504
+ }
505
+ function insertPendingMarks(db, items) {
506
+ if (items.length === 0) return;
507
+ const now = Date.now();
508
+ const insert = db.prepare(`
509
+ INSERT INTO pending_marks (path, hash, created_at)
510
+ VALUES (?, ?, ?)
511
+ ON CONFLICT(path) DO UPDATE SET hash = excluded.hash, created_at = excluded.created_at
512
+ `);
513
+ const tx = db.transaction((data) => {
514
+ for (const it of data) {
515
+ insert.run(it.path, it.hash, now);
516
+ }
517
+ });
518
+ tx(items);
519
+ }
520
+ function deletePendingMarks(db, paths) {
521
+ if (paths.length === 0) return;
522
+ const del = db.prepare("DELETE FROM pending_marks WHERE path = ?");
523
+ const tx = db.transaction((items) => {
524
+ for (const p of items) del.run(p);
525
+ });
526
+ tx(paths);
527
+ }
528
+ function replayPendingMarks(db) {
529
+ const rows = db.prepare("SELECT path, hash FROM pending_marks").all();
530
+ if (rows.length === 0) return { applied: 0, discarded: 0 };
531
+ const update = db.prepare(`
532
+ UPDATE files SET vector_index_hash = ?
533
+ WHERE path = ? AND hash = ?
534
+ `);
535
+ const del = db.prepare("DELETE FROM pending_marks WHERE path = ?");
536
+ let applied = 0;
537
+ let discarded = 0;
538
+ const tx = db.transaction(() => {
539
+ for (const r of rows) {
540
+ const info = update.run(r.hash, r.path, r.hash);
541
+ if (info.changes > 0) {
542
+ applied++;
543
+ } else {
544
+ discarded++;
545
+ }
546
+ del.run(r.path);
547
+ }
548
+ });
549
+ tx();
550
+ if (applied > 0 || discarded > 0) {
551
+ logger.info({ applied, discarded }, "pending_marks \u91CD\u653E\u5B8C\u6210");
552
+ }
553
+ return { applied, discarded };
554
+ }
555
+ function countPendingMarks(db) {
556
+ const row = db.prepare("SELECT COUNT(*) as c FROM pending_marks").get();
557
+ return row.c;
558
+ }
407
559
  function closeDb(db) {
408
560
  db.close();
409
561
  }
@@ -459,15 +611,6 @@ function batchUpsert(db, files) {
459
611
  }
460
612
  });
461
613
  transaction(files);
462
- const ftsFiles = [];
463
- for (const f of files) {
464
- if (f.content !== null) {
465
- ftsFiles.push({ path: f.path, content: f.content });
466
- }
467
- }
468
- if (ftsFiles.length > 0) {
469
- batchUpsertFileFts(db, ftsFiles);
470
- }
471
614
  }
472
615
  function batchUpdateMtime(db, items) {
473
616
  const update = db.prepare("UPDATE files SET mtime = ? WHERE path = ?");
@@ -490,14 +633,16 @@ function batchDelete(db, paths) {
490
633
  }
491
634
  });
492
635
  transaction(paths);
493
- if (paths.length > 0) {
494
- batchDeleteFileFts(db, paths);
495
- }
496
636
  }
497
637
  function clear(db) {
498
638
  db.exec("DELETE FROM files");
639
+ db.exec("DELETE FROM files_fts");
640
+ db.exec("DELETE FROM chunks_fts");
499
641
  }
500
642
  var METADATA_KEY_EMBEDDING_DIMENSIONS = "embedding_dimensions";
643
+ var METADATA_KEY_LANCEDB_MIGRATION_STATE = "lancedb_migration_displaycode_state";
644
+ var METADATA_KEY_LANCEDB_MIGRATION_LOCK = "lancedb_migration_lock";
645
+ var MIGRATION_LOCK_STALE_MS = 10 * 60 * 1e3;
501
646
  function getMetadata(db, key) {
502
647
  const row = db.prepare("SELECT value FROM metadata WHERE key = ?").get(key);
503
648
  return row?.value ?? null;
@@ -518,6 +663,47 @@ function getStoredEmbeddingDimensions(db) {
518
663
  function setStoredEmbeddingDimensions(db, dimensions) {
519
664
  setMetadata(db, METADATA_KEY_EMBEDDING_DIMENSIONS, String(dimensions));
520
665
  }
666
+ function getLanceDbMigrationState(db) {
667
+ const value = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_STATE);
668
+ if (value === "pending" || value === "done" || value === "aborted") return value;
669
+ return null;
670
+ }
671
+ function setLanceDbMigrationState(db, state) {
672
+ setMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_STATE, state);
673
+ }
674
+ function clearAllVectorIndexHash(db) {
675
+ const info = db.prepare("UPDATE files SET vector_index_hash = NULL").run();
676
+ return info.changes;
677
+ }
678
+ function tryAcquireLanceDbMigrationLock(db) {
679
+ const now = Date.now();
680
+ const pid = process.pid;
681
+ const lockValue = JSON.stringify({ pid, acquiredAt: now });
682
+ const existing = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_LOCK);
683
+ if (existing) {
684
+ try {
685
+ const parsed = JSON.parse(existing);
686
+ if (parsed.pid === pid) return true;
687
+ if (now - parsed.acquiredAt < MIGRATION_LOCK_STALE_MS) return false;
688
+ logger.warn(
689
+ { stalePid: parsed.pid, age: now - parsed.acquiredAt },
690
+ "\u68C0\u6D4B\u5230\u50F5\u5C38\u8FC1\u79FB\u9501\uFF0C\u5F3A\u5236\u593A\u53D6"
691
+ );
692
+ } catch {
693
+ }
694
+ }
695
+ db.prepare(`
696
+ INSERT INTO metadata (key, value)
697
+ VALUES (?, ?)
698
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value
699
+ `).run(METADATA_KEY_LANCEDB_MIGRATION_LOCK, lockValue);
700
+ const reread = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_LOCK);
701
+ if (reread !== lockValue) return false;
702
+ return true;
703
+ }
704
+ function releaseLanceDbMigrationLock(db) {
705
+ db.prepare("DELETE FROM metadata WHERE key = ?").run(METADATA_KEY_LANCEDB_MIGRATION_LOCK);
706
+ }
521
707
 
522
708
  export {
523
709
  isChunksFtsInitialized,
@@ -529,6 +715,11 @@ export {
529
715
  isFtsInitialized,
530
716
  generateProjectId,
531
717
  initDb,
718
+ migrateSchema,
719
+ insertPendingMarks,
720
+ deletePendingMarks,
721
+ replayPendingMarks,
722
+ countPendingMarks,
532
723
  closeDb,
533
724
  getAllFileMeta,
534
725
  getFilesNeedingVectorIndex,
@@ -540,6 +731,10 @@ export {
540
731
  batchDelete,
541
732
  clear,
542
733
  getStoredEmbeddingDimensions,
543
- setStoredEmbeddingDimensions
734
+ setStoredEmbeddingDimensions,
735
+ getLanceDbMigrationState,
736
+ setLanceDbMigrationState,
737
+ clearAllVectorIndexHash,
738
+ tryAcquireLanceDbMigrationLock,
739
+ releaseLanceDbMigrationLock
544
740
  };
545
- //# sourceMappingURL=chunk-B6OWNBOD.js.map
@@ -263,4 +263,3 @@ export {
263
263
  getRerankerConfig,
264
264
  getExcludePatterns
265
265
  };
266
- //# sourceMappingURL=chunk-RJURH22T.js.map
@@ -1,8 +1,11 @@
1
1
  import {
2
2
  closeAllIndexers,
3
- closeAllVectorStores,
4
- getIndexer
5
- } from "./chunk-HR5KUQSM.js";
3
+ getIndexer,
4
+ invalidateAllExpanderCaches
5
+ } from "./chunk-AB24E3Z7.js";
6
+ import {
7
+ closeAllVectorStores
8
+ } from "./chunk-ZOMGPIU6.js";
6
9
  import {
7
10
  batchDelete,
8
11
  batchUpdateMtime,
@@ -16,14 +19,14 @@ import {
16
19
  getStoredEmbeddingDimensions,
17
20
  initDb,
18
21
  setStoredEmbeddingDimensions
19
- } from "./chunk-B6OWNBOD.js";
22
+ } from "./chunk-RGJSXUFS.js";
20
23
  import {
21
24
  logger
22
- } from "./chunk-AMQQK4P7.js";
25
+ } from "./chunk-JVKVSTQ3.js";
23
26
  import {
24
27
  getEmbeddingConfig,
25
28
  getExcludePatterns
26
- } from "./chunk-RJURH22T.js";
29
+ } from "./chunk-SKBAE26T.js";
27
30
 
28
31
  // src/scanner/index.ts
29
32
  import path3 from "path";
@@ -522,6 +525,19 @@ var SourceAdapter = class {
522
525
  /**
523
526
  * 将字节偏移转换为字符偏移
524
527
  */
528
+ /**
529
+ * 将 tree-sitter 返回的偏移(可能是 UTF-8 字节或 UTF-16 字符域)
530
+ * 标准化为 UTF-16 字符域偏移。下游 String.prototype.slice 直接可用。
531
+ *
532
+ * 导出供 SemanticSplitter 在生成 ChunkMetadata 时统一域。
533
+ */
534
+ toCharOffset(offset) {
535
+ if (this.domain === "utf16" || this.domain === "unknown") return offset;
536
+ return this.byteToChar(offset);
537
+ }
538
+ /**
539
+ * 将字节偏移转换为字符偏移(仅 utf8 域有效;utf16/unknown 直接返回原值)
540
+ */
525
541
  byteToChar(byteOffset) {
526
542
  if (!this.byteToCharMap) return byteOffset;
527
543
  const safeOffset = Math.max(0, Math.min(this.byteToCharMap.length - 1, byteOffset));
@@ -914,11 +930,12 @@ ${displayCode}`,
914
930
  const vectorEnd = end;
915
931
  const displayCode = this.adapter.slice(start, end);
916
932
  const vectorCode = this.adapter.slice(vectorStart, vectorEnd);
933
+ const toChar = (n) => this.adapter.toCharOffset(n);
917
934
  const metadata = {
918
- startIndex: start,
919
- endIndex: end,
920
- rawSpan: { start: prevEnd, end: rawSpanEnd },
921
- vectorSpan: { start: vectorStart, end: vectorEnd },
935
+ startIndex: toChar(start),
936
+ endIndex: toChar(end),
937
+ rawSpan: { start: toChar(prevEnd), end: toChar(rawSpanEnd) },
938
+ vectorSpan: { start: toChar(vectorStart), end: toChar(vectorEnd) },
922
939
  filePath,
923
940
  language,
924
941
  contextPath: w.contextPath
@@ -1350,6 +1367,22 @@ async function scan(rootPath, options = {}) {
1350
1367
  }
1351
1368
  }
1352
1369
  options.onProgress?.(100, 100, "\u7D22\u5F15\u5B8C\u6210");
1370
+ if (options.vectorIndex !== false) {
1371
+ try {
1372
+ const embeddingConfig = getEmbeddingConfig();
1373
+ const indexer = await getIndexer(projectId, embeddingConfig.dimensions);
1374
+ const gcResult = await indexer.gc(db);
1375
+ if (gcResult.orphans > 0) {
1376
+ logger.info({ orphans: gcResult.orphans }, "GC \u5B8C\u6210");
1377
+ } else if (gcResult.truncated) {
1378
+ logger.debug("GC \u8D85\u65F6\u8DF3\u8FC7\uFF0C\u4E0B\u6B21\u626B\u63CF\u91CD\u8BD5");
1379
+ }
1380
+ } catch (err) {
1381
+ const error = err;
1382
+ logger.warn({ error: error.message }, "GC \u8DF3\u8FC7");
1383
+ }
1384
+ }
1385
+ invalidateAllExpanderCaches();
1353
1386
  return stats;
1354
1387
  } finally {
1355
1388
  closeDb(db);
@@ -1361,4 +1394,3 @@ async function scan(rootPath, options = {}) {
1361
1394
  export {
1362
1395
  scan
1363
1396
  };
1364
- //# sourceMappingURL=chunk-2CY5SYBI.js.map