@chiway/contextweaver 1.1.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  import {
2
2
  generateProjectId
3
- } from "./chunk-6Z4JEEVJ.js";
3
+ } from "./chunk-RGJSXUFS.js";
4
4
  import {
5
5
  logger
6
- } from "./chunk-AMQQK4P7.js";
6
+ } from "./chunk-JVKVSTQ3.js";
7
7
 
8
8
  // src/mcp/tools/codebaseRetrieval.ts
9
9
  import fs from "fs";
@@ -59,8 +59,8 @@ function isProjectIndexed(projectId) {
59
59
  return fs.existsSync(dbPath);
60
60
  }
61
61
  async function ensureIndexed(repoPath, projectId, onProgress) {
62
- const { withLock } = await import("./lock-DVY3KJSK.js");
63
- const { scan } = await import("./scanner-RFG4YWYI.js");
62
+ const { withLock } = await import("./lock-FL54LIQL.js");
63
+ const { scan } = await import("./scanner-2XGJWYHR.js");
64
64
  await withLock(
65
65
  projectId,
66
66
  "index",
@@ -105,7 +105,7 @@ async function handleCodebaseRetrieval(args, onProgress) {
105
105
  },
106
106
  "MCP codebase-retrieval \u8C03\u7528\u5F00\u59CB"
107
107
  );
108
- const { checkEmbeddingEnv, checkRerankerEnv } = await import("./config-BWZ6CU3W.js");
108
+ const { checkEmbeddingEnv, checkRerankerEnv } = await import("./config-LCOJHTCF.js");
109
109
  const embeddingCheck = checkEmbeddingEnv();
110
110
  const rerankerCheck = checkRerankerEnv();
111
111
  const allMissingVars = [...embeddingCheck.missingVars, ...rerankerCheck.missingVars];
@@ -124,7 +124,7 @@ async function handleCodebaseRetrieval(args, onProgress) {
124
124
  },
125
125
  "MCP \u67E5\u8BE2\u6784\u5EFA"
126
126
  );
127
- const { SearchService } = await import("./SearchService-MYPOCM3B.js");
127
+ const { SearchService } = await import("./SearchService-OS7CYHNJ.js");
128
128
  const service = new SearchService(projectId, repo_path);
129
129
  await service.init();
130
130
  logger.debug("SearchService \u521D\u59CB\u5316\u5B8C\u6210");
@@ -283,4 +283,3 @@ export {
283
283
  codebaseRetrievalSchema,
284
284
  handleCodebaseRetrieval
285
285
  };
286
- //# sourceMappingURL=chunk-7G5V7YT5.js.map
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  isDev,
3
3
  isMcpMode
4
- } from "./chunk-RJURH22T.js";
4
+ } from "./chunk-SKBAE26T.js";
5
5
 
6
6
  // src/utils/logger.ts
7
7
  import fs from "fs";
@@ -168,4 +168,3 @@ export {
168
168
  logger,
169
169
  isDebugEnabled
170
170
  };
171
- //# sourceMappingURL=chunk-AMQQK4P7.js.map
@@ -1,7 +1,14 @@
1
1
  import {
2
2
  isDebugEnabled,
3
3
  logger
4
- } from "./chunk-AMQQK4P7.js";
4
+ } from "./chunk-JVKVSTQ3.js";
5
+
6
+ // src/db/index.ts
7
+ import crypto from "crypto";
8
+ import fs from "fs";
9
+ import os from "os";
10
+ import path from "path";
11
+ import Database from "better-sqlite3";
5
12
 
6
13
  // src/search/fts.ts
7
14
  var tokenizerCache = /* @__PURE__ */ new WeakMap();
@@ -26,7 +33,7 @@ function detectFtsTokenizer(db) {
26
33
  function initFilesFts(db) {
27
34
  const tokenizer = detectFtsTokenizer(db);
28
35
  const tableExists = db.prepare(`
29
- SELECT name FROM sqlite_master
36
+ SELECT name FROM sqlite_master
30
37
  WHERE type='table' AND name='files_fts'
31
38
  `).get();
32
39
  if (!tableExists) {
@@ -34,25 +41,40 @@ function initFilesFts(db) {
34
41
  CREATE VIRTUAL TABLE files_fts USING fts5(
35
42
  path,
36
43
  content,
44
+ content='files',
45
+ content_rowid='rowid',
37
46
  tokenize='${tokenizer}'
38
47
  );
39
48
  `);
40
- logger.info(`\u521B\u5EFA files_fts \u8868\uFF0Ctokenizer=${tokenizer}`);
41
- syncFilesFts(db);
42
- }
43
- }
44
- function syncFilesFts(db) {
45
- const fileCount = db.prepare("SELECT COUNT(*) as c FROM files WHERE content IS NOT NULL").get().c;
46
- const ftsCount = db.prepare("SELECT COUNT(*) as c FROM files_fts").get().c;
47
- if (ftsCount < fileCount) {
48
- logger.info(`\u540C\u6B65 FTS \u7D22\u5F15: files=${fileCount}, fts=${ftsCount}`);
49
- db.exec(`
50
- DELETE FROM files_fts;
51
- INSERT INTO files_fts(path, content)
52
- SELECT path, content FROM files WHERE content IS NOT NULL;
53
- `);
54
- logger.info(`FTS \u7D22\u5F15\u540C\u6B65\u5B8C\u6210: ${fileCount} \u6761\u8BB0\u5F55`);
49
+ logger.info(`\u521B\u5EFA files_fts \u8868\uFF08\u5916\u90E8\u5185\u5BB9\u8868\uFF09\uFF0Ctokenizer=${tokenizer}`);
55
50
  }
51
+ db.exec(`
52
+ CREATE TRIGGER IF NOT EXISTS files_ai
53
+ AFTER INSERT ON files
54
+ WHEN new.content IS NOT NULL
55
+ BEGIN
56
+ INSERT INTO files_fts(rowid, path, content)
57
+ VALUES (new.rowid, new.path, new.content);
58
+ END;
59
+
60
+ CREATE TRIGGER IF NOT EXISTS files_ad
61
+ AFTER DELETE ON files
62
+ WHEN old.content IS NOT NULL
63
+ BEGIN
64
+ INSERT INTO files_fts(files_fts, rowid, path, content)
65
+ VALUES('delete', old.rowid, old.path, old.content);
66
+ END;
67
+
68
+ CREATE TRIGGER IF NOT EXISTS files_au
69
+ AFTER UPDATE ON files
70
+ WHEN old.content IS NOT NULL OR new.content IS NOT NULL
71
+ BEGIN
72
+ INSERT INTO files_fts(files_fts, rowid, path, content)
73
+ SELECT 'delete', old.rowid, old.path, old.content WHERE old.content IS NOT NULL;
74
+ INSERT INTO files_fts(rowid, path, content)
75
+ SELECT new.rowid, new.path, new.content WHERE new.content IS NOT NULL;
76
+ END;
77
+ `);
56
78
  }
57
79
  function initChunksFts(db) {
58
80
  const tokenizer = detectFtsTokenizer(db);
@@ -82,13 +104,17 @@ function isChunksFtsInitialized(db) {
82
104
  return !!result;
83
105
  }
84
106
  function batchUpsertChunkFts(db, chunks) {
85
- const deleteStmt = db.prepare("DELETE FROM chunks_fts WHERE chunk_id = ?");
107
+ if (chunks.length === 0) return;
108
+ const paths = Array.from(new Set(chunks.map((c) => c.filePath)));
109
+ const deleteByPath = db.prepare("DELETE FROM chunks_fts WHERE file_path = ?");
86
110
  const insertStmt = db.prepare(
87
111
  "INSERT INTO chunks_fts(chunk_id, file_path, chunk_index, breadcrumb, content) VALUES (?, ?, ?, ?, ?)"
88
112
  );
89
113
  const transaction = db.transaction((items) => {
114
+ for (const p of paths) {
115
+ deleteByPath.run(p);
116
+ }
90
117
  for (const item of items) {
91
- deleteStmt.run(item.chunkId);
92
118
  insertStmt.run(item.chunkId, item.filePath, item.chunkIndex, item.breadcrumb, item.content);
93
119
  }
94
120
  });
@@ -172,26 +198,6 @@ function searchChunksFts(db, query, limit) {
172
198
  }
173
199
  return results.sort((a, b) => b.score - a.score);
174
200
  }
175
- function batchUpsertFileFts(db, files) {
176
- const deleteFts = db.prepare("DELETE FROM files_fts WHERE path = ?");
177
- const insertFts = db.prepare("INSERT INTO files_fts(path, content) VALUES (?, ?)");
178
- const transaction = db.transaction((items) => {
179
- for (const item of items) {
180
- deleteFts.run(item.path);
181
- insertFts.run(item.path, item.content);
182
- }
183
- });
184
- transaction(files);
185
- }
186
- function batchDeleteFileFts(db, paths) {
187
- const stmt = db.prepare("DELETE FROM files_fts WHERE path = ?");
188
- const transaction = db.transaction((items) => {
189
- for (const path2 of items) {
190
- stmt.run(path2);
191
- }
192
- });
193
- transaction(paths);
194
- }
195
201
  function sanitizeQuery(query) {
196
202
  return query.replace(/[():"*^./\\:@#$%&=+[\]{}<>|~`!?,;]/g, " ").replace(/\b(AND|OR|NOT|NEAR)\b/gi, " ").replace(/\s+/g, " ").trim();
197
203
  }
@@ -342,11 +348,6 @@ function isFtsInitialized(db) {
342
348
  }
343
349
 
344
350
  // src/db/index.ts
345
- import crypto from "crypto";
346
- import fs from "fs";
347
- import os from "os";
348
- import path from "path";
349
- import Database from "better-sqlite3";
350
351
  var BASE_DIR = path.join(os.homedir(), ".contextweaver");
351
352
  function getDirectoryBirthtime(projectPath) {
352
353
  const gitDir = path.join(projectPath, ".git");
@@ -405,6 +406,7 @@ function initDb(projectId) {
405
406
  value TEXT NOT NULL
406
407
  )
407
408
  `);
409
+ migrateSchema(db);
408
410
  initFilesFts(db);
409
411
  initChunksFts(db);
410
412
  db.pragma("synchronous = NORMAL");
@@ -412,6 +414,148 @@ function initDb(projectId) {
412
414
  db.pragma("cache_size = -64000");
413
415
  return db;
414
416
  }
417
+ var CURRENT_SCHEMA_VERSION = 3;
418
+ var METADATA_KEY_SCHEMA_VERSION = "schema_version";
419
+ function getSchemaVersion(db) {
420
+ const row = db.prepare("SELECT value FROM metadata WHERE key = ?").get(METADATA_KEY_SCHEMA_VERSION);
421
+ if (!row) return null;
422
+ const parsed = parseInt(row.value, 10);
423
+ return Number.isNaN(parsed) ? null : parsed;
424
+ }
425
+ function setSchemaVersion(db, version) {
426
+ db.prepare(`
427
+ INSERT INTO metadata (key, value)
428
+ VALUES (?, ?)
429
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value
430
+ `).run(METADATA_KEY_SCHEMA_VERSION, String(version));
431
+ }
432
+ function isOldFilesFtsSchema(db) {
433
+ const row = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
434
+ if (!row?.sql) return false;
435
+ return !row.sql.includes("content='files'");
436
+ }
437
+ function migrateSchema(db) {
438
+ const backupExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts_v1_backup'`).get();
439
+ const currentFtsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
440
+ if (backupExists && currentFtsExists && !isOldFilesFtsSchema(db)) {
441
+ logger.warn("\u68C0\u6D4B\u5230\u6B8B\u7559\u5907\u4EFD\u8868 files_fts_v1_backup\uFF0C\u6E05\u7406\u4E2D");
442
+ db.exec("DROP TABLE files_fts_v1_backup");
443
+ }
444
+ const current = getSchemaVersion(db);
445
+ if (current === null) {
446
+ const fileCount = db.prepare("SELECT COUNT(*) as c FROM files").get().c;
447
+ const ftsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
448
+ if (fileCount === 0 && !ftsExists) {
449
+ migrateToV3(db);
450
+ setSchemaVersion(db, CURRENT_SCHEMA_VERSION);
451
+ return;
452
+ }
453
+ }
454
+ if ((current ?? 1) < 2) {
455
+ migrateToV2(db);
456
+ setSchemaVersion(db, 2);
457
+ }
458
+ if ((current ?? 2) < 3) {
459
+ migrateToV3(db);
460
+ setSchemaVersion(db, 3);
461
+ }
462
+ }
463
+ function migrateToV2(db) {
464
+ const ftsExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='files_fts'`).get();
465
+ if (!ftsExists) {
466
+ return;
467
+ }
468
+ if (!isOldFilesFtsSchema(db)) {
469
+ return;
470
+ }
471
+ logger.info("\u6267\u884C schema \u8FC1\u79FB v1 \u2192 v2: files_fts \u8F6C\u4E3A\u5916\u90E8\u5185\u5BB9\u8868");
472
+ db.exec("DROP TABLE files_fts");
473
+ let tokenizer;
474
+ try {
475
+ db.exec(
476
+ `CREATE VIRTUAL TABLE IF NOT EXISTS _fts_probe USING fts5(content, tokenize='trigram');
477
+ DROP TABLE IF EXISTS _fts_probe;`
478
+ );
479
+ tokenizer = "trigram";
480
+ } catch {
481
+ tokenizer = "unicode61";
482
+ }
483
+ db.exec(`
484
+ CREATE VIRTUAL TABLE files_fts USING fts5(
485
+ path,
486
+ content,
487
+ content='files',
488
+ content_rowid='rowid',
489
+ tokenize='${tokenizer}'
490
+ );
491
+ `);
492
+ db.exec(`INSERT INTO files_fts(files_fts) VALUES('rebuild')`);
493
+ logger.info("schema \u8FC1\u79FB v1 \u2192 v2 \u5B8C\u6210");
494
+ }
495
+ function migrateToV3(db) {
496
+ db.exec(`
497
+ CREATE TABLE IF NOT EXISTS pending_marks (
498
+ path TEXT PRIMARY KEY,
499
+ hash TEXT NOT NULL,
500
+ created_at INTEGER NOT NULL
501
+ );
502
+ `);
503
+ logger.info("schema \u8FC1\u79FB v2 \u2192 v3 \u5B8C\u6210: pending_marks \u8868\u5DF2\u521B\u5EFA");
504
+ }
505
+ function insertPendingMarks(db, items) {
506
+ if (items.length === 0) return;
507
+ const now = Date.now();
508
+ const insert = db.prepare(`
509
+ INSERT INTO pending_marks (path, hash, created_at)
510
+ VALUES (?, ?, ?)
511
+ ON CONFLICT(path) DO UPDATE SET hash = excluded.hash, created_at = excluded.created_at
512
+ `);
513
+ const tx = db.transaction((data) => {
514
+ for (const it of data) {
515
+ insert.run(it.path, it.hash, now);
516
+ }
517
+ });
518
+ tx(items);
519
+ }
520
+ function deletePendingMarks(db, paths) {
521
+ if (paths.length === 0) return;
522
+ const del = db.prepare("DELETE FROM pending_marks WHERE path = ?");
523
+ const tx = db.transaction((items) => {
524
+ for (const p of items) del.run(p);
525
+ });
526
+ tx(paths);
527
+ }
528
+ function replayPendingMarks(db) {
529
+ const rows = db.prepare("SELECT path, hash FROM pending_marks").all();
530
+ if (rows.length === 0) return { applied: 0, discarded: 0 };
531
+ const update = db.prepare(`
532
+ UPDATE files SET vector_index_hash = ?
533
+ WHERE path = ? AND hash = ?
534
+ `);
535
+ const del = db.prepare("DELETE FROM pending_marks WHERE path = ?");
536
+ let applied = 0;
537
+ let discarded = 0;
538
+ const tx = db.transaction(() => {
539
+ for (const r of rows) {
540
+ const info = update.run(r.hash, r.path, r.hash);
541
+ if (info.changes > 0) {
542
+ applied++;
543
+ } else {
544
+ discarded++;
545
+ }
546
+ del.run(r.path);
547
+ }
548
+ });
549
+ tx();
550
+ if (applied > 0 || discarded > 0) {
551
+ logger.info({ applied, discarded }, "pending_marks \u91CD\u653E\u5B8C\u6210");
552
+ }
553
+ return { applied, discarded };
554
+ }
555
+ function countPendingMarks(db) {
556
+ const row = db.prepare("SELECT COUNT(*) as c FROM pending_marks").get();
557
+ return row.c;
558
+ }
415
559
  function closeDb(db) {
416
560
  db.close();
417
561
  }
@@ -467,15 +611,6 @@ function batchUpsert(db, files) {
467
611
  }
468
612
  });
469
613
  transaction(files);
470
- const ftsFiles = [];
471
- for (const f of files) {
472
- if (f.content !== null) {
473
- ftsFiles.push({ path: f.path, content: f.content });
474
- }
475
- }
476
- if (ftsFiles.length > 0) {
477
- batchUpsertFileFts(db, ftsFiles);
478
- }
479
614
  }
480
615
  function batchUpdateMtime(db, items) {
481
616
  const update = db.prepare("UPDATE files SET mtime = ? WHERE path = ?");
@@ -498,9 +633,6 @@ function batchDelete(db, paths) {
498
633
  }
499
634
  });
500
635
  transaction(paths);
501
- if (paths.length > 0) {
502
- batchDeleteFileFts(db, paths);
503
- }
504
636
  }
505
637
  function clear(db) {
506
638
  db.exec("DELETE FROM files");
@@ -508,6 +640,9 @@ function clear(db) {
508
640
  db.exec("DELETE FROM chunks_fts");
509
641
  }
510
642
  var METADATA_KEY_EMBEDDING_DIMENSIONS = "embedding_dimensions";
643
+ var METADATA_KEY_LANCEDB_MIGRATION_STATE = "lancedb_migration_displaycode_state";
644
+ var METADATA_KEY_LANCEDB_MIGRATION_LOCK = "lancedb_migration_lock";
645
+ var MIGRATION_LOCK_STALE_MS = 10 * 60 * 1e3;
511
646
  function getMetadata(db, key) {
512
647
  const row = db.prepare("SELECT value FROM metadata WHERE key = ?").get(key);
513
648
  return row?.value ?? null;
@@ -528,6 +663,47 @@ function getStoredEmbeddingDimensions(db) {
528
663
  function setStoredEmbeddingDimensions(db, dimensions) {
529
664
  setMetadata(db, METADATA_KEY_EMBEDDING_DIMENSIONS, String(dimensions));
530
665
  }
666
+ function getLanceDbMigrationState(db) {
667
+ const value = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_STATE);
668
+ if (value === "pending" || value === "done" || value === "aborted") return value;
669
+ return null;
670
+ }
671
+ function setLanceDbMigrationState(db, state) {
672
+ setMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_STATE, state);
673
+ }
674
+ function clearAllVectorIndexHash(db) {
675
+ const info = db.prepare("UPDATE files SET vector_index_hash = NULL").run();
676
+ return info.changes;
677
+ }
678
+ function tryAcquireLanceDbMigrationLock(db) {
679
+ const now = Date.now();
680
+ const pid = process.pid;
681
+ const lockValue = JSON.stringify({ pid, acquiredAt: now });
682
+ const existing = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_LOCK);
683
+ if (existing) {
684
+ try {
685
+ const parsed = JSON.parse(existing);
686
+ if (parsed.pid === pid) return true;
687
+ if (now - parsed.acquiredAt < MIGRATION_LOCK_STALE_MS) return false;
688
+ logger.warn(
689
+ { stalePid: parsed.pid, age: now - parsed.acquiredAt },
690
+ "\u68C0\u6D4B\u5230\u50F5\u5C38\u8FC1\u79FB\u9501\uFF0C\u5F3A\u5236\u593A\u53D6"
691
+ );
692
+ } catch {
693
+ }
694
+ }
695
+ db.prepare(`
696
+ INSERT INTO metadata (key, value)
697
+ VALUES (?, ?)
698
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value
699
+ `).run(METADATA_KEY_LANCEDB_MIGRATION_LOCK, lockValue);
700
+ const reread = getMetadata(db, METADATA_KEY_LANCEDB_MIGRATION_LOCK);
701
+ if (reread !== lockValue) return false;
702
+ return true;
703
+ }
704
+ function releaseLanceDbMigrationLock(db) {
705
+ db.prepare("DELETE FROM metadata WHERE key = ?").run(METADATA_KEY_LANCEDB_MIGRATION_LOCK);
706
+ }
531
707
 
532
708
  export {
533
709
  isChunksFtsInitialized,
@@ -539,6 +715,11 @@ export {
539
715
  isFtsInitialized,
540
716
  generateProjectId,
541
717
  initDb,
718
+ migrateSchema,
719
+ insertPendingMarks,
720
+ deletePendingMarks,
721
+ replayPendingMarks,
722
+ countPendingMarks,
542
723
  closeDb,
543
724
  getAllFileMeta,
544
725
  getFilesNeedingVectorIndex,
@@ -550,6 +731,10 @@ export {
550
731
  batchDelete,
551
732
  clear,
552
733
  getStoredEmbeddingDimensions,
553
- setStoredEmbeddingDimensions
734
+ setStoredEmbeddingDimensions,
735
+ getLanceDbMigrationState,
736
+ setLanceDbMigrationState,
737
+ clearAllVectorIndexHash,
738
+ tryAcquireLanceDbMigrationLock,
739
+ releaseLanceDbMigrationLock
554
740
  };
555
- //# sourceMappingURL=chunk-6Z4JEEVJ.js.map
@@ -263,4 +263,3 @@ export {
263
263
  getRerankerConfig,
264
264
  getExcludePatterns
265
265
  };
266
- //# sourceMappingURL=chunk-RJURH22T.js.map
@@ -1,9 +1,11 @@
1
1
  import {
2
2
  closeAllIndexers,
3
- closeAllVectorStores,
4
3
  getIndexer,
5
4
  invalidateAllExpanderCaches
6
- } from "./chunk-6QMYML5V.js";
5
+ } from "./chunk-AB24E3Z7.js";
6
+ import {
7
+ closeAllVectorStores
8
+ } from "./chunk-ZOMGPIU6.js";
7
9
  import {
8
10
  batchDelete,
9
11
  batchUpdateMtime,
@@ -17,14 +19,14 @@ import {
17
19
  getStoredEmbeddingDimensions,
18
20
  initDb,
19
21
  setStoredEmbeddingDimensions
20
- } from "./chunk-6Z4JEEVJ.js";
22
+ } from "./chunk-RGJSXUFS.js";
21
23
  import {
22
24
  logger
23
- } from "./chunk-AMQQK4P7.js";
25
+ } from "./chunk-JVKVSTQ3.js";
24
26
  import {
25
27
  getEmbeddingConfig,
26
28
  getExcludePatterns
27
- } from "./chunk-RJURH22T.js";
29
+ } from "./chunk-SKBAE26T.js";
28
30
 
29
31
  // src/scanner/index.ts
30
32
  import path3 from "path";
@@ -523,6 +525,19 @@ var SourceAdapter = class {
523
525
  /**
524
526
  * 将字节偏移转换为字符偏移
525
527
  */
528
+ /**
529
+ * 将 tree-sitter 返回的偏移(可能是 UTF-8 字节或 UTF-16 字符域)
530
+ * 标准化为 UTF-16 字符域偏移。下游 String.prototype.slice 直接可用。
531
+ *
532
+ * 导出供 SemanticSplitter 在生成 ChunkMetadata 时统一域。
533
+ */
534
+ toCharOffset(offset) {
535
+ if (this.domain === "utf16" || this.domain === "unknown") return offset;
536
+ return this.byteToChar(offset);
537
+ }
538
+ /**
539
+ * 将字节偏移转换为字符偏移(仅 utf8 域有效;utf16/unknown 直接返回原值)
540
+ */
526
541
  byteToChar(byteOffset) {
527
542
  if (!this.byteToCharMap) return byteOffset;
528
543
  const safeOffset = Math.max(0, Math.min(this.byteToCharMap.length - 1, byteOffset));
@@ -915,11 +930,12 @@ ${displayCode}`,
915
930
  const vectorEnd = end;
916
931
  const displayCode = this.adapter.slice(start, end);
917
932
  const vectorCode = this.adapter.slice(vectorStart, vectorEnd);
933
+ const toChar = (n) => this.adapter.toCharOffset(n);
918
934
  const metadata = {
919
- startIndex: start,
920
- endIndex: end,
921
- rawSpan: { start: prevEnd, end: rawSpanEnd },
922
- vectorSpan: { start: vectorStart, end: vectorEnd },
935
+ startIndex: toChar(start),
936
+ endIndex: toChar(end),
937
+ rawSpan: { start: toChar(prevEnd), end: toChar(rawSpanEnd) },
938
+ vectorSpan: { start: toChar(vectorStart), end: toChar(vectorEnd) },
923
939
  filePath,
924
940
  language,
925
941
  contextPath: w.contextPath
@@ -1351,6 +1367,21 @@ async function scan(rootPath, options = {}) {
1351
1367
  }
1352
1368
  }
1353
1369
  options.onProgress?.(100, 100, "\u7D22\u5F15\u5B8C\u6210");
1370
+ if (options.vectorIndex !== false) {
1371
+ try {
1372
+ const embeddingConfig = getEmbeddingConfig();
1373
+ const indexer = await getIndexer(projectId, embeddingConfig.dimensions);
1374
+ const gcResult = await indexer.gc(db);
1375
+ if (gcResult.orphans > 0) {
1376
+ logger.info({ orphans: gcResult.orphans }, "GC \u5B8C\u6210");
1377
+ } else if (gcResult.truncated) {
1378
+ logger.debug("GC \u8D85\u65F6\u8DF3\u8FC7\uFF0C\u4E0B\u6B21\u626B\u63CF\u91CD\u8BD5");
1379
+ }
1380
+ } catch (err) {
1381
+ const error = err;
1382
+ logger.warn({ error: error.message }, "GC \u8DF3\u8FC7");
1383
+ }
1384
+ }
1354
1385
  invalidateAllExpanderCaches();
1355
1386
  return stats;
1356
1387
  } finally {
@@ -1363,4 +1394,3 @@ async function scan(rootPath, options = {}) {
1363
1394
  export {
1364
1395
  scan
1365
1396
  };
1366
- //# sourceMappingURL=chunk-NQR4CGQ6.js.map