@softerist/heuristic-mcp 2.1.47 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/.agent/workflows/code-review.md +60 -0
  2. package/.prettierrc +7 -0
  3. package/ARCHITECTURE.md +105 -170
  4. package/CONTRIBUTING.md +32 -113
  5. package/GEMINI.md +73 -0
  6. package/LICENSE +21 -21
  7. package/README.md +161 -54
  8. package/config.json +876 -75
  9. package/debug-pids.js +27 -0
  10. package/eslint.config.js +36 -0
  11. package/features/ann-config.js +37 -26
  12. package/features/clear-cache.js +28 -19
  13. package/features/find-similar-code.js +142 -66
  14. package/features/hybrid-search.js +253 -93
  15. package/features/index-codebase.js +1455 -394
  16. package/features/lifecycle.js +813 -180
  17. package/features/register.js +58 -52
  18. package/index.js +450 -306
  19. package/lib/cache-ops.js +22 -0
  20. package/lib/cache-utils.js +68 -0
  21. package/lib/cache.js +1392 -587
  22. package/lib/call-graph.js +165 -50
  23. package/lib/cli.js +154 -0
  24. package/lib/config.js +462 -121
  25. package/lib/embedding-process.js +77 -0
  26. package/lib/embedding-worker.js +545 -30
  27. package/lib/ignore-patterns.js +61 -59
  28. package/lib/json-worker.js +14 -0
  29. package/lib/json-writer.js +344 -0
  30. package/lib/logging.js +88 -0
  31. package/lib/memory-logger.js +13 -0
  32. package/lib/project-detector.js +13 -17
  33. package/lib/server-lifecycle.js +38 -0
  34. package/lib/settings-editor.js +645 -0
  35. package/lib/tokenizer.js +207 -104
  36. package/lib/utils.js +273 -198
  37. package/lib/vector-store-binary.js +592 -0
  38. package/mcp_config.example.json +13 -0
  39. package/package.json +13 -2
  40. package/scripts/clear-cache.js +6 -17
  41. package/scripts/download-model.js +14 -9
  42. package/scripts/postinstall.js +5 -5
  43. package/search-configs.js +36 -0
  44. package/test/ann-config.test.js +179 -0
  45. package/test/ann-fallback.test.js +6 -6
  46. package/test/binary-store.test.js +69 -0
  47. package/test/cache-branches.test.js +120 -0
  48. package/test/cache-errors.test.js +264 -0
  49. package/test/cache-extra.test.js +300 -0
  50. package/test/cache-helpers.test.js +205 -0
  51. package/test/cache-hnsw-failure.test.js +40 -0
  52. package/test/cache-json-worker.test.js +190 -0
  53. package/test/cache-worker.test.js +102 -0
  54. package/test/cache.test.js +443 -0
  55. package/test/call-graph.test.js +103 -4
  56. package/test/clear-cache.test.js +69 -68
  57. package/test/code-review-workflow.test.js +50 -0
  58. package/test/config.test.js +418 -0
  59. package/test/coverage-gap.test.js +497 -0
  60. package/test/coverage-maximizer.test.js +236 -0
  61. package/test/debug-analysis.js +107 -0
  62. package/test/embedding-model.test.js +173 -103
  63. package/test/embedding-worker-extra.test.js +272 -0
  64. package/test/embedding-worker.test.js +158 -0
  65. package/test/features.test.js +139 -0
  66. package/test/final-boost.test.js +271 -0
  67. package/test/final-polish.test.js +183 -0
  68. package/test/final.test.js +95 -0
  69. package/test/find-similar-code.test.js +191 -0
  70. package/test/helpers.js +92 -11
  71. package/test/helpers.test.js +46 -0
  72. package/test/hybrid-search-basic.test.js +62 -0
  73. package/test/hybrid-search-branch.test.js +202 -0
  74. package/test/hybrid-search-callgraph.test.js +229 -0
  75. package/test/hybrid-search-extra.test.js +81 -0
  76. package/test/hybrid-search.test.js +484 -71
  77. package/test/index-cli.test.js +520 -0
  78. package/test/index-codebase-batch.test.js +119 -0
  79. package/test/index-codebase-branches.test.js +585 -0
  80. package/test/index-codebase-core.test.js +1032 -0
  81. package/test/index-codebase-edge-cases.test.js +254 -0
  82. package/test/index-codebase-errors.test.js +132 -0
  83. package/test/index-codebase-gap.test.js +239 -0
  84. package/test/index-codebase-lines.test.js +151 -0
  85. package/test/index-codebase-watcher.test.js +259 -0
  86. package/test/index-codebase-zone.test.js +259 -0
  87. package/test/index-codebase.test.js +371 -69
  88. package/test/index-memory.test.js +220 -0
  89. package/test/indexer-detailed.test.js +176 -0
  90. package/test/integration.test.js +148 -92
  91. package/test/json-worker.test.js +50 -0
  92. package/test/lifecycle.test.js +541 -0
  93. package/test/master.test.js +198 -0
  94. package/test/perfection.test.js +349 -0
  95. package/test/project-detector.test.js +65 -0
  96. package/test/register.test.js +262 -0
  97. package/test/tokenizer.test.js +55 -93
  98. package/test/ultra-maximizer.test.js +116 -0
  99. package/test/utils-branches.test.js +161 -0
  100. package/test/utils-extra.test.js +116 -0
  101. package/test/utils.test.js +131 -0
  102. package/test/verify_fixes.js +76 -0
  103. package/test/worker-errors.test.js +96 -0
  104. package/test/worker-init.test.js +102 -0
  105. package/test/worker_throttling.test.js +93 -0
  106. package/tools/scripts/benchmark-search.js +95 -0
  107. package/tools/scripts/cache-stats.js +71 -0
  108. package/tools/scripts/manual-search.js +34 -0
  109. package/vitest.config.js +19 -9
package/lib/cache.js CHANGED
@@ -1,621 +1,1426 @@
1
- import fs from "fs/promises";
2
- import path from "path";
3
-
4
- const CACHE_META_VERSION = 1;
5
- const CACHE_META_FILE = "meta.json";
6
- const ANN_META_VERSION = 1;
7
- const ANN_INDEX_FILE = "ann-index.bin";
8
- const ANN_META_FILE = "ann-meta.json";
9
- const CALL_GRAPH_FILE = "call-graph.json";
10
-
11
- let hnswlibPromise = null;
12
- let hnswlibLoadError = null;
13
-
14
- async function loadHnswlib() {
15
- if (hnswlibLoadError) return null;
16
- if (!hnswlibPromise) {
17
- hnswlibPromise = import("hnswlib-node")
18
- .then((mod) => {
19
- const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
20
- if (!HierarchicalNSW) {
21
- throw new Error("HierarchicalNSW export not found");
22
- }
23
- return HierarchicalNSW;
24
- })
25
- .catch((err) => {
26
- hnswlibLoadError = err;
27
- console.error(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
28
- return null;
29
- });
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+ import { Worker } from 'worker_threads';
4
+ import { StreamingJsonWriter } from './json-writer.js';
5
+ import { BinaryVectorStore } from './vector-store-binary.js';
6
+
7
+ const CACHE_META_VERSION = 1;
8
+ const CACHE_META_FILE = 'meta.json';
9
+
10
+ // ANN meta version stays at 1 for compatibility; maxElements is optional.
11
+ const ANN_META_VERSION = 1;
12
+ const ANN_INDEX_FILE = 'ann-index.bin';
13
+ const ANN_META_FILE = 'ann-meta.json';
14
+
15
+ const CALL_GRAPH_FILE = 'call-graph.json';
16
+
17
+ const DEFAULT_JSON_WORKER_THRESHOLD = 5 * 1024 * 1024;
18
+ const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
19
+
20
+ // Yield to event loop to keep IDE/extension host responsive during heavy CPU loops
21
+ const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
22
+
23
+ let hnswlibPromise = null;
24
+ let hnswlibLoadError = null;
25
+
26
+ async function parseJsonInWorker(filePath) {
27
+ return new Promise((resolve, reject) => {
28
+ let settled = false;
29
+ const worker = new Worker(new URL('./json-worker.js', import.meta.url), {
30
+ workerData: { filePath },
31
+ });
32
+
33
+ const finish = (handler, value) => {
34
+ if (settled) return;
35
+ settled = true;
36
+ worker.removeAllListeners();
37
+ const termination = worker.terminate?.();
38
+ if (termination && typeof termination.catch === 'function') termination.catch(() => null);
39
+ handler(value);
40
+ };
41
+
42
+ worker.once('message', (msg) => {
43
+ if (msg?.ok) {
44
+ finish(resolve, msg.data);
45
+ } else {
46
+ const err = new Error(msg?.error || 'JSON worker failed');
47
+ console.warn(`[Cache] ${err.message}`);
48
+ finish(reject, err);
49
+ }
50
+ });
51
+
52
+ worker.once('error', (err) => {
53
+ console.error(`[Cache] JSON worker error: ${err.message}`);
54
+ finish(reject, err);
55
+ });
56
+
57
+ worker.once('exit', (code) => {
58
+ if (code !== 0) {
59
+ const err = new Error(`JSON worker exited with code ${code}`);
60
+ console.error(`[Cache] ${err.message}`);
61
+ finish(reject, err);
62
+ }
63
+ });
64
+ });
65
+ }
66
+
67
+ async function readJsonFile(filePath, { workerThresholdBytes = DEFAULT_JSON_WORKER_THRESHOLD } = {}) {
68
+ let stats;
69
+ try {
70
+ stats = await fs.stat(filePath);
71
+ } catch {
72
+ return null;
73
+ }
74
+
75
+ try {
76
+ const canUseWorker = typeof Worker === 'function';
77
+ const useWorker =
78
+ canUseWorker && stats && typeof stats.size === 'number'
79
+ ? stats.size >= workerThresholdBytes
80
+ : false;
81
+
82
+ if (useWorker) return await parseJsonInWorker(filePath);
83
+
84
+ const data = await fs.readFile(filePath, 'utf-8');
85
+ return JSON.parse(data);
86
+ } catch (error) {
87
+ console.warn(`[Cache] Failed to parse ${path.basename(filePath)}: ${error.message}`);
88
+ return null;
89
+ }
90
+ }
91
+
92
+ async function loadHnswlib() {
93
+ if (hnswlibLoadError) return null;
94
+
95
+ if (!hnswlibPromise) {
96
+ hnswlibPromise = import('hnswlib-node')
97
+ .then((mod) => {
98
+ const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
99
+ if (!HierarchicalNSW) throw new Error('HierarchicalNSW export not found');
100
+ return HierarchicalNSW;
101
+ })
102
+ .catch((err) => {
103
+ hnswlibLoadError = err;
104
+ console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
105
+ return null;
106
+ });
107
+ }
108
+
109
+ return hnswlibPromise;
110
+ }
111
+
112
+ function initHnswIndex(index, maxElements, m, efConstruction) {
113
+ try {
114
+ index.initIndex(maxElements, m, efConstruction, 100);
115
+ return;
116
+ } catch (err) { console.warn(`[ANN] Standard init failed: ${err.message}`); }
117
+ try {
118
+ index.initIndex(maxElements, m, efConstruction);
119
+ return;
120
+ } catch (err) { console.warn(`[ANN] Legacy init failed: ${err.message}`); }
121
+ index.initIndex(maxElements);
122
+ }
123
+
124
+ function readHnswIndex(index, filePath, maxElements) {
125
+ try {
126
+ index.readIndexSync(filePath, maxElements);
127
+ return true;
128
+ } catch {
129
+ /* ignore */
130
+ }
131
+ try {
132
+ index.readIndexSync(filePath);
133
+ return true;
134
+ } catch (err) {
135
+ console.warn(`[ANN] Read index failed: ${err.message}`);
136
+ }
137
+ return false;
138
+ }
139
+
140
+ function normalizeLabels(result) {
141
+ if (!result) return [];
142
+ if (Array.isArray(result)) return result;
143
+ const labels = result.labels || result.neighbors || result.indices;
144
+ return labels ? Array.from(labels) : [];
145
+ }
146
+
147
+ function ensureFloat32(vector) {
148
+ if (!vector) return null;
149
+ if (vector instanceof Float32Array) return vector;
150
+
151
+ // Convert values (do NOT reinterpret bytes)
152
+ if (ArrayBuffer.isView(vector)) {
153
+ return Float32Array.from(vector);
154
+ }
155
+
156
+ return new Float32Array(vector);
157
+ }
158
+
159
+ function normalizeChunkVector(chunk) {
160
+ if (chunk?.vector) chunk.vector = ensureFloat32(chunk.vector);
161
+ }
162
+
163
+ function assignChunkIndices(store) {
164
+ if (!Array.isArray(store)) return;
165
+ for (let i = 0; i < store.length; i += 1) {
166
+ const chunk = store[i];
167
+ if (chunk) {
168
+ chunk._index = i;
169
+ }
170
+ }
171
+ }
172
+
173
+ function normalizeFileHashEntry(entry) {
174
+ if (!entry) return null;
175
+ if (typeof entry === 'string') return { hash: entry };
176
+ if (typeof entry !== 'object') return null;
177
+ if (typeof entry.hash !== 'string') return null;
178
+ const normalized = { hash: entry.hash };
179
+ if (Number.isFinite(entry.mtimeMs)) normalized.mtimeMs = entry.mtimeMs;
180
+ if (Number.isFinite(entry.size)) normalized.size = entry.size;
181
+ return normalized;
182
+ }
183
+
184
+ function serializeFileHashEntry(entry) {
185
+ if (!entry) return null;
186
+ if (typeof entry === 'string') return { hash: entry };
187
+ if (typeof entry !== 'object') return null;
188
+ if (typeof entry.hash !== 'string') return null;
189
+ const serialized = { hash: entry.hash };
190
+ if (Number.isFinite(entry.mtimeMs)) serialized.mtimeMs = entry.mtimeMs;
191
+ if (Number.isFinite(entry.size)) serialized.size = entry.size;
192
+ return serialized;
193
+ }
194
+
195
+ function computeAnnCapacity(total, config) {
196
+ const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
197
+ const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
198
+ const byFactor = Math.ceil(total * factor);
199
+ const byExtra = total + extra;
200
+ return Math.max(total, byFactor, byExtra);
201
+ }
202
+
203
+ export class EmbeddingsCache {
204
+ constructor(config) {
205
+ this.config = config;
206
+
207
+ this.vectorStore = [];
208
+ this.fileHashes = new Map();
209
+ this.isSaving = false;
210
+ this.lastIndexDurationMs = null;
211
+ this.lastIndexStats = null;
212
+
213
+ this.cacheMeta = {
214
+ version: CACHE_META_VERSION,
215
+ embeddingModel: config.embeddingModel,
216
+ };
217
+
218
+ // Save coalescing / debounce (serialized via saveQueue)
219
+ this.saveQueue = Promise.resolve();
220
+ this._saveTimer = null;
221
+ this._saveRequested = false;
222
+ this._savePromise = null;
223
+
224
+ // ANN state
225
+ this.annIndex = null;
226
+ this.annMeta = null;
227
+ this.annDirty = false; // needs rebuild
228
+ this.annPersistDirty = false; // in-memory differs from disk
229
+ this.annLoading = null;
230
+ this.annVectorCache = null;
231
+
232
+ // Call graph
233
+ this.fileCallData = new Map();
234
+ this.callGraph = null;
235
+ this._callGraphBuild = null;
236
+
237
+ // Binary vector store (optional)
238
+ this.binaryStore = null;
239
+
240
+ // Error tracking
241
+ this.initErrors = [];
242
+
243
+ // Concurrency hooks (read tracking)
244
+ this.activeReads = 0;
245
+ this._readWaiters = [];
246
+
247
+ // Lazy reload support after dropping in-memory vectors
248
+ this._clearedAfterIndex = false;
249
+ this._loadPromise = null;
30
250
  }
31
- return hnswlibPromise;
32
- }
33
-
34
- function initHnswIndex(index, maxElements, m, efConstruction) {
35
- try {
36
- index.initIndex(maxElements, m, efConstruction, 100);
37
- return;
38
- } catch {}
39
- try {
40
- index.initIndex(maxElements, m, efConstruction);
41
- return;
42
- } catch {}
43
- index.initIndex(maxElements);
44
- }
45
-
46
- function readHnswIndex(index, filePath, maxElements) {
47
- try {
48
- index.readIndexSync(filePath, maxElements);
49
- return true;
50
- } catch {}
51
- try {
52
- index.readIndexSync(filePath);
53
- return true;
54
- } catch {}
55
- return false;
56
- }
57
-
58
- function normalizeLabels(result) {
59
- if (!result) return [];
60
- if (Array.isArray(result)) return result;
61
- const labels = result.labels || result.neighbors || result.indices;
62
- if (labels) {
63
- return Array.from(labels);
64
- }
65
- return [];
66
- }
67
-
68
- function toFloat32Array(vector) {
69
- if (vector instanceof Float32Array) {
70
- return vector;
71
- }
72
- return Float32Array.from(vector);
73
- }
74
-
75
- export class EmbeddingsCache {
76
- constructor(config) {
77
- this.config = config;
78
- this.vectorStore = [];
79
- this.fileHashes = new Map();
80
- this.isSaving = false;
81
- this.cacheMeta = {
82
- version: CACHE_META_VERSION,
83
- embeddingModel: config.embeddingModel
84
- };
85
- this.annIndex = null;
86
- this.annMeta = null;
87
- this.annDirty = false;
88
- this.annLoading = null;
89
- this.annVectorCache = null;
90
- // Call graph data
91
- this.fileCallData = new Map(); // file -> { definitions: [], calls: [] }
92
- this.callGraph = null; // { defines, calledBy, fileCalls }
251
+
252
+ async close() {
253
+ if (this.binaryStore) {
254
+ await this.binaryStore.close();
255
+ this.binaryStore = null;
256
+ }
93
257
  }
94
258
 
95
- async load() {
259
+ async ensureLoaded() {
96
260
  if (!this.config.enableCache) return;
261
+ if (!this._clearedAfterIndex) return;
262
+ if (this._loadPromise) return this._loadPromise;
263
+
264
+ this._loadPromise = (async () => {
265
+ await this.load();
266
+ this._clearedAfterIndex = false;
267
+ })().finally(() => {
268
+ this._loadPromise = null;
269
+ });
97
270
 
98
- try {
99
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
100
- const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
101
- const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
102
- const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
103
-
104
- const [metaData, cacheData, hashData] = await Promise.all([
105
- fs.readFile(metaFile, "utf-8").catch(() => null),
106
- fs.readFile(cacheFile, "utf-8").catch(() => null),
107
- fs.readFile(hashFile, "utf-8").catch(() => null)
108
- ]);
109
-
110
- if (!metaData && !cacheData && !hashData) {
111
- return;
112
- }
113
-
114
- if (!metaData) {
115
- console.error("[Cache] Missing cache metadata, ignoring cache");
116
- return;
117
- }
118
-
119
- let meta = null;
120
- try {
121
- meta = JSON.parse(metaData);
122
- } catch {
123
- console.error("[Cache] Invalid cache metadata, ignoring cache");
124
- return;
125
- }
126
-
127
- if (meta?.version !== CACHE_META_VERSION) {
128
- console.error(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
129
- return;
130
- }
131
-
132
- if (meta?.embeddingModel !== this.config.embeddingModel) {
133
- console.error(`[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`);
134
- return;
135
- }
136
-
137
- this.cacheMeta = meta;
138
-
139
- if (cacheData && hashData) {
140
- const rawVectorStore = JSON.parse(cacheData);
141
- const rawHashes = new Map(Object.entries(JSON.parse(hashData)));
142
-
143
- // Filter cache to only include files matching current extensions
144
- const allowedExtensions = this.config.fileExtensions.map(ext => `.${ext}`);
145
-
146
- this.vectorStore = rawVectorStore.filter(chunk => {
147
- const ext = path.extname(chunk.file);
148
- return allowedExtensions.includes(ext);
149
- });
150
-
151
- // Only keep hashes for files matching current extensions
152
- for (const [file, hash] of rawHashes) {
153
- const ext = path.extname(file);
154
- if (allowedExtensions.includes(ext)) {
155
- this.fileHashes.set(file, hash);
156
- }
157
- }
158
-
159
- const filtered = rawVectorStore.length - this.vectorStore.length;
160
- if (filtered > 0) {
161
- console.error(`[Cache] Filtered ${filtered} outdated cache entries`);
162
- }
163
- console.error(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
164
- this.annDirty = false;
165
- this.annIndex = null;
166
- this.annMeta = null;
167
- }
168
-
169
- // Load call-graph data if it exists
170
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
171
- try {
172
- const callGraphData = await fs.readFile(callGraphFile, "utf8");
173
- const parsed = JSON.parse(callGraphData);
174
- this.fileCallData = new Map(Object.entries(parsed));
175
- if (this.config.verbose) {
176
- console.error(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
177
- }
178
- } catch {
179
- // Call-graph file doesn't exist yet, that's OK
180
- }
181
- } catch (error) {
182
- console.error("[Cache] Failed to load cache:", error.message);
183
- }
271
+ return this._loadPromise;
184
272
  }
185
273
 
186
- async save() {
274
+ async dropInMemoryVectors() {
187
275
  if (!this.config.enableCache) return;
188
276
 
189
- this.isSaving = true;
190
-
191
- try {
192
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
193
- const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
194
- const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
195
- const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
196
-
197
- // Include indexing stats in meta for verification
198
- const uniqueFiles = new Set(this.vectorStore.map(chunk => chunk.file));
199
- this.cacheMeta = {
200
- version: CACHE_META_VERSION,
201
- embeddingModel: this.config.embeddingModel,
202
- lastSaveTime: new Date().toISOString(),
203
- filesIndexed: uniqueFiles.size,
204
- chunksStored: this.vectorStore.length,
205
- workspace: this.config.searchDirectory || null
206
- };
207
-
208
- await Promise.all([
209
- fs.writeFile(cacheFile, JSON.stringify(this.vectorStore, null, 2)),
210
- fs.writeFile(hashFile, JSON.stringify(Object.fromEntries(this.fileHashes), null, 2)),
211
- fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2))
212
- ]);
213
-
214
- // Save call-graph data (or remove stale cache if empty)
215
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
216
- if (this.fileCallData.size > 0) {
217
- await fs.writeFile(callGraphFile, JSON.stringify(Object.fromEntries(this.fileCallData), null, 2));
218
- } else {
219
- await fs.rm(callGraphFile, { force: true });
220
- }
221
- } catch (error) {
222
- console.error("[Cache] Failed to save cache:", error.message);
223
- } finally {
224
- this.isSaving = false;
277
+ if (this.activeReads > 0) {
278
+ await this.waitForReaders();
225
279
  }
226
- }
227
-
228
- getVectorStore() {
229
- return this.vectorStore;
230
- }
231
-
232
- setVectorStore(store) {
233
- this.vectorStore = store;
234
- this.invalidateAnnIndex();
235
- }
236
-
237
- getFileHash(file) {
238
- return this.fileHashes.get(file);
239
- }
240
-
241
- setFileHash(file, hash) {
242
- this.fileHashes.set(file, hash);
243
- }
244
-
245
- deleteFileHash(file) {
246
- this.fileHashes.delete(file);
247
- }
248
280
 
249
- removeFileFromStore(file) {
250
- this.vectorStore = this.vectorStore.filter(chunk => chunk.file !== file);
251
- this.invalidateAnnIndex();
252
- // Also clear call-graph data for this file
253
- this.removeFileCallData(file);
254
- }
255
-
256
-
257
- addToStore(chunk) {
258
- this.vectorStore.push(chunk);
259
- this.invalidateAnnIndex();
260
- }
261
-
262
- invalidateAnnIndex() {
281
+ this.vectorStore = [];
282
+ this.annVectorCache = null;
263
283
  this.annIndex = null;
264
284
  this.annMeta = null;
265
285
  this.annDirty = true;
266
- this.annVectorCache = null;
267
- }
268
-
269
- getAnnVector(index) {
270
- if (!this.annVectorCache || this.annVectorCache.length !== this.vectorStore.length) {
271
- this.annVectorCache = new Array(this.vectorStore.length);
272
- }
273
-
274
- let cached = this.annVectorCache[index];
275
- if (!cached) {
276
- const vector = this.vectorStore[index]?.vector;
277
- if (!vector) {
278
- return null;
279
- }
280
- cached = toFloat32Array(vector);
281
- this.annVectorCache[index] = cached;
282
- }
283
-
284
- return cached;
285
- }
286
-
287
- getAnnIndexPaths() {
288
- return {
289
- indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
290
- metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE)
291
- };
292
- }
286
+ this.annPersistDirty = false;
293
287
 
294
- async ensureAnnIndex() {
295
- if (!this.config.annEnabled) return null;
296
- if (this.vectorStore.length < this.config.annMinChunks) return null;
297
- if (this.annIndex && !this.annDirty) return this.annIndex;
298
- if (this.annLoading) return this.annLoading;
299
-
300
- this.annLoading = (async () => {
301
- const HierarchicalNSW = await loadHnswlib();
302
- if (!HierarchicalNSW) return null;
303
-
304
- const dim = this.vectorStore[0]?.vector?.length;
305
- if (!dim) return null;
306
-
307
- if (!this.annDirty && this.config.annIndexCache !== false) {
308
- const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
309
- if (loaded) return this.annIndex;
288
+ if (this.binaryStore) {
289
+ try {
290
+ await this.binaryStore.close();
291
+ } catch {
292
+ // ignore close errors
310
293
  }
311
-
312
- return await this.buildAnnIndex(HierarchicalNSW, dim);
313
- })();
314
-
315
- const index = await this.annLoading;
316
- this.annLoading = null;
317
- return index;
318
- }
319
-
320
- async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
321
- const { indexFile, metaFile } = this.getAnnIndexPaths();
322
- const metaData = await fs.readFile(metaFile, "utf-8").catch(() => null);
323
-
324
- if (!metaData) {
325
- return false;
294
+ this.binaryStore = null;
326
295
  }
327
296
 
328
- let meta = null;
329
- try {
330
- meta = JSON.parse(metaData);
331
- } catch {
332
- console.error("[ANN] Invalid ANN metadata, rebuilding");
333
- return false;
334
- }
335
-
336
- if (meta?.version !== ANN_META_VERSION) {
337
- console.error(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
338
- return false;
339
- }
340
-
341
- if (meta?.embeddingModel !== this.config.embeddingModel) {
342
- console.error(`[ANN] Embedding model changed for ANN index, rebuilding`);
343
- return false;
344
- }
345
-
346
- if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
347
- console.error("[ANN] ANN index size mismatch, rebuilding");
348
- return false;
349
- }
350
-
351
- if (meta?.metric !== this.config.annMetric ||
352
- meta?.m !== this.config.annM ||
353
- meta?.efConstruction !== this.config.annEfConstruction) {
354
- console.error("[ANN] ANN index config changed, rebuilding");
355
- return false;
356
- }
357
-
358
- const index = new HierarchicalNSW(meta.metric, dim);
359
- const loaded = readHnswIndex(index, indexFile, meta.count);
360
- if (!loaded) {
361
- console.error("[ANN] Failed to load ANN index file, rebuilding");
362
- return false;
363
- }
364
-
365
- if (typeof index.setEf === "function") {
366
- index.setEf(this.config.annEfSearch);
367
- }
368
-
369
- this.annIndex = index;
370
- this.annMeta = meta;
371
- this.annDirty = false;
372
- console.error(`[ANN] Loaded ANN index (${meta.count} vectors)`);
373
- return true;
297
+ this._clearedAfterIndex = true;
374
298
  }
375
-
376
- async buildAnnIndex(HierarchicalNSW, dim) {
377
- const total = this.vectorStore.length;
378
- if (total === 0) return null;
379
-
380
- try {
381
- const index = new HierarchicalNSW(this.config.annMetric, dim);
382
- initHnswIndex(index, total, this.config.annM, this.config.annEfConstruction);
383
-
384
- for (let i = 0; i < total; i++) {
385
- const vector = this.getAnnVector(i);
386
- if (!vector) {
387
- throw new Error(`Missing vector for ANN index at position ${i}`);
388
- }
389
- index.addPoint(vector, i);
390
- }
391
-
392
- if (typeof index.setEf === "function") {
393
- index.setEf(this.config.annEfSearch);
394
- }
395
-
396
- this.annIndex = index;
397
- this.annMeta = {
398
- version: ANN_META_VERSION,
399
- embeddingModel: this.config.embeddingModel,
400
- metric: this.config.annMetric,
401
- dim,
402
- count: total,
403
- m: this.config.annM,
404
- efConstruction: this.config.annEfConstruction,
405
- efSearch: this.config.annEfSearch
406
- };
407
- this.annDirty = false;
408
-
409
- if (this.config.annIndexCache !== false) {
299
+
300
+ // -------------------- Concurrency Hooks --------------------
301
+
302
+ startRead() {
303
+ this.activeReads++;
304
+ }
305
+
306
+ endRead() {
307
+ if (this.activeReads > 0) {
308
+ this.activeReads--;
309
+ if (this.activeReads === 0 && this._readWaiters.length > 0) {
310
+ const waiters = this._readWaiters;
311
+ this._readWaiters = [];
312
+ for (const resolve of waiters) {
313
+ resolve();
314
+ }
315
+ }
316
+ }
317
+ }
318
+
319
+ async waitForReaders() {
320
+ if (this.activeReads === 0) return;
321
+ await new Promise((resolve) => {
322
+ this._readWaiters.push(resolve);
323
+ });
324
+ }
325
+
326
+ // -------------------- Reset --------------------
327
+
328
+ /**
329
+ * Resets the cache state (clears vectors, hashes, and call graph).
330
+ * Used for forced reindexing.
331
+ */
332
+ async reset() {
333
+ this.vectorStore = [];
334
+ if (this.binaryStore) {
335
+ try {
336
+ await this.binaryStore.close();
337
+ } catch {
338
+ // ignore close errors
339
+ }
340
+ this.binaryStore = null;
341
+ }
342
+ this.fileHashes.clear();
343
+ this.invalidateAnnIndex();
344
+ await this.clearCallGraphData({ removeFile: true });
345
+ this.initErrors = [];
346
+ }
347
+
348
+ // -------------------- Load --------------------
349
+
350
+ async load() {
351
+ if (!this.config.enableCache) return;
352
+
353
+ try {
354
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
355
+
356
+ const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
357
+ const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
358
+ const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
359
+
360
+ const workerThresholdBytes =
361
+ Number.isInteger(this.config.jsonWorkerThresholdBytes) &&
362
+ this.config.jsonWorkerThresholdBytes > 0
363
+ ? this.config.jsonWorkerThresholdBytes
364
+ : DEFAULT_JSON_WORKER_THRESHOLD;
365
+
366
+ const useBinary = this.config.vectorStoreFormat === 'binary';
367
+
368
+ const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(
369
+ this.config.cacheDirectory,
370
+ );
371
+ const pathExists = async (targetPath) => {
372
+ try {
373
+ await fs.access(targetPath);
374
+ return true;
375
+ } catch {
376
+ return false;
377
+ }
378
+ };
379
+
380
+ // In tests, read cache files eagerly to exercise worker paths.
381
+ let cacheData = null;
382
+ let hashData = null;
383
+ let prefetched = false;
384
+ if (IS_TEST_ENV) {
385
+ prefetched = true;
386
+ const cachePromise = useBinary
387
+ ? Promise.resolve(null)
388
+ : readJsonFile(cacheFile, { workerThresholdBytes });
389
+ [cacheData, hashData] = await Promise.all([
390
+ cachePromise,
391
+ readJsonFile(hashFile, { workerThresholdBytes }),
392
+ ]);
393
+ }
394
+
395
+ // Read meta first to avoid parsing huge cache files when invalid
396
+ const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
397
+ if (!metaData) {
398
+ console.warn('[Cache] Missing cache metadata, ignoring cache');
399
+ return;
400
+ }
401
+
402
+ let meta;
403
+ try {
404
+ meta = JSON.parse(metaData);
405
+ } catch {
406
+ console.warn('[Cache] Invalid cache metadata, ignoring cache');
407
+ return;
408
+ }
409
+
410
+ if (meta?.version !== CACHE_META_VERSION) {
411
+ console.warn(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
412
+ return;
413
+ }
414
+
415
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
416
+ console.warn(
417
+ `[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`,
418
+ );
419
+ return;
420
+ }
421
+
422
+ if (!prefetched) {
423
+ [cacheData, hashData] = await Promise.all([
424
+ useBinary ? Promise.resolve(null) : readJsonFile(cacheFile, { workerThresholdBytes }),
425
+ readJsonFile(hashFile, { workerThresholdBytes }),
426
+ ]);
427
+ }
428
+
429
+ this.cacheMeta = meta;
430
+
431
+ const [binaryFilesPresent, jsonCachePresent] = await Promise.all([
432
+ (async () => {
433
+ const [vectorsOk, recordsOk, contentOk, filesOk] = await Promise.all([
434
+ pathExists(vectorsPath),
435
+ pathExists(recordsPath),
436
+ pathExists(contentPath),
437
+ pathExists(filesPath),
438
+ ]);
439
+ return vectorsOk && recordsOk && contentOk && filesOk;
440
+ })(),
441
+ pathExists(cacheFile),
442
+ ]);
443
+
444
+ if (useBinary && !binaryFilesPresent) {
445
+ if (jsonCachePresent) {
446
+ console.warn(
447
+ '[Cache] vectorStoreFormat=binary but binary cache files are missing; embeddings.json exists. If you switched formats, reindex or set vectorStoreFormat=json.',
448
+ );
449
+ } else {
450
+ console.warn(
451
+ '[Cache] vectorStoreFormat=binary but binary cache files are missing. Reindex to regenerate the cache.',
452
+ );
453
+ }
454
+ } else if (!useBinary && !jsonCachePresent) {
455
+ if (binaryFilesPresent) {
456
+ console.warn(
457
+ '[Cache] vectorStoreFormat=json but binary cache files exist. If you switched formats, set vectorStoreFormat=binary or reindex.',
458
+ );
459
+ } else {
460
+ console.warn(
461
+ '[Cache] vectorStoreFormat=json but embeddings.json is missing. Reindex to regenerate the cache.',
462
+ );
463
+ }
464
+ }
465
+
466
+ if (useBinary) {
410
467
  try {
411
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
412
- const { indexFile, metaFile } = this.getAnnIndexPaths();
413
- index.writeIndexSync(indexFile);
414
- await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
415
- console.error(`[ANN] Saved ANN index (${total} vectors)`);
416
- } catch (error) {
417
- console.error(`[ANN] Failed to save ANN index: ${error.message}`);
468
+ this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
469
+ contentCacheEntries: this.config.contentCacheEntries,
470
+ vectorCacheEntries: this.config.vectorCacheEntries,
471
+ vectorLoadMode: this.config.vectorStoreLoadMode,
472
+ });
473
+ cacheData = await this.binaryStore.toChunkViews({
474
+ includeContent: this.config.vectorStoreContentMode === 'inline',
475
+ includeVector: this.config.vectorStoreLoadMode !== 'disk',
476
+ });
477
+ } catch (err) {
478
+ this.binaryStore = null;
479
+ console.warn(`[Cache] Failed to load binary vector store: ${err.message}`);
418
480
  }
419
481
  }
420
-
421
- return index;
422
- } catch (error) {
423
- console.error(`[ANN] Failed to build ANN index: ${error.message}`);
424
- this.annIndex = null;
425
- this.annMeta = null;
426
- this.annDirty = true;
427
- return null;
428
- }
429
- }
430
-
431
- async queryAnn(queryVector, k) {
432
- const index = await this.ensureAnnIndex();
433
- if (!index) return null;
434
-
435
- const results = index.searchKnn(toFloat32Array(queryVector), k);
436
- const labels = normalizeLabels(results);
437
-
438
- if (labels.length === 0) return null;
439
- const filtered = labels.filter((label) =>
440
- Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
441
- );
442
- return filtered.length > 0 ? filtered : null;
443
- }
444
-
445
- async clear() {
446
- if (!this.config.enableCache) return;
447
-
448
- try {
449
- await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
450
- this.vectorStore = [];
451
- this.fileHashes = new Map();
452
- this.invalidateAnnIndex();
453
- await this.clearCallGraphData();
454
- console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
455
- } catch (error) {
456
- console.error("[Cache] Failed to clear cache:", error.message);
457
- throw error;
458
- }
459
- }
460
-
461
- /**
462
- * Adjust efSearch at runtime for speed/accuracy tradeoff.
463
- * Higher values = more accurate but slower.
464
- * @param {number} efSearch - New efSearch value (typically 16-512)
465
- * @returns {object} Result with success status and current config
466
- */
467
- setEfSearch(efSearch) {
468
- if (typeof efSearch !== "number" || efSearch < 1 || efSearch > 1000) {
469
- return { success: false, error: "efSearch must be a number between 1 and 1000" };
470
- }
471
-
472
- this.config.annEfSearch = efSearch;
473
-
474
- if (this.annIndex && typeof this.annIndex.setEf === "function") {
475
- this.annIndex.setEf(efSearch);
476
- console.error(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
477
- return { success: true, applied: true, efSearch };
478
- } else {
479
- console.error(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
480
- return { success: true, applied: false, efSearch };
481
- }
482
- }
483
-
484
- /**
485
- * Get current ANN index statistics for diagnostics.
486
- * @returns {object} ANN stats including index state, config, and vector count
487
- */
488
- getAnnStats() {
489
- return {
490
- enabled: this.config.annEnabled ?? false,
491
- indexLoaded: this.annIndex !== null,
492
- dirty: this.annDirty,
493
- vectorCount: this.vectorStore.length,
494
- minChunksForAnn: this.config.annMinChunks ?? 5000,
495
- config: this.annMeta ? {
496
- metric: this.annMeta.metric,
497
- dim: this.annMeta.dim,
498
- count: this.annMeta.count,
499
- m: this.annMeta.m,
500
- efConstruction: this.annMeta.efConstruction,
501
- efSearch: this.config.annEfSearch
502
- } : null
503
- };
504
- }
505
-
506
- // ========== Call Graph Methods ==========
507
-
508
- /**
509
- * Clear all call-graph data (optionally remove persisted cache file)
510
- */
511
- async clearCallGraphData({ removeFile = false } = {}) {
512
- this.fileCallData.clear();
513
- this.callGraph = null;
514
-
515
- if (removeFile && this.config.enableCache) {
516
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
517
- try {
518
- await fs.rm(callGraphFile, { force: true });
519
- } catch (error) {
520
- if (this.config.verbose) {
521
- console.error(`[Cache] Failed to remove call-graph cache: ${error.message}`);
482
+
483
+ if (!cacheData) {
484
+ cacheData = await readJsonFile(cacheFile, { workerThresholdBytes });
485
+ }
486
+
487
+ const hasCacheData = Array.isArray(cacheData);
488
+ const hasHashData = hashData && typeof hashData === 'object';
489
+
490
+ if (hasCacheData) {
491
+ const allowedExtensions = new Set((this.config.fileExtensions || []).map((ext) => `.${ext}`));
492
+ const applyExtensionFilter = !this.binaryStore;
493
+
494
+ const rawHashes = hasHashData ? new Map(Object.entries(hashData)) : new Map();
495
+ this.vectorStore = [];
496
+ this.fileHashes.clear();
497
+
498
+ // Single-pass filter + normalization
499
+ for (const chunk of cacheData) {
500
+ if (applyExtensionFilter) {
501
+ const ext = path.extname(chunk.file);
502
+ if (!allowedExtensions.has(ext)) continue;
503
+ }
504
+ normalizeChunkVector(chunk);
505
+ this.vectorStore.push(chunk);
506
+ }
507
+ const filteredCount = cacheData.length - this.vectorStore.length;
508
+ if (filteredCount > 0 && this.config.verbose) {
509
+ console.info(`[Cache] Filtered ${filteredCount} outdated cache entries`);
510
+ }
511
+
512
+ if (hasHashData) {
513
+ // Only keep hashes for allowed extensions
514
+ for (const [file, entry] of rawHashes) {
515
+ if (!applyExtensionFilter || allowedExtensions.has(path.extname(file))) {
516
+ const normalized = normalizeFileHashEntry(entry);
517
+ if (normalized) {
518
+ this.fileHashes.set(file, normalized);
519
+ }
520
+ }
521
+ }
522
+ } else {
523
+ console.warn('[Cache] Missing file-hashes.json; loaded embeddings but hashes were cleared');
524
+ }
525
+
526
+ assignChunkIndices(this.vectorStore);
527
+
528
+ if (this.config.verbose) {
529
+ console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
530
+ }
531
+
532
+ // ANN index is lazily loaded/built on first query
533
+ this.annDirty = false;
534
+ this.annPersistDirty = false;
535
+ this.annIndex = null;
536
+ this.annMeta = null;
537
+ this.annVectorCache = null;
538
+ } else if (cacheData) {
539
+ console.warn('[Cache] Cache data is not an array; ignoring cached embeddings');
540
+ } else if (hasHashData) {
541
+ console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
542
+ }
543
+
544
+ // Load call-graph data if it exists
545
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
546
+ try {
547
+ const callGraphData = await fs.readFile(callGraphFile, 'utf8');
548
+ const parsed = JSON.parse(callGraphData);
549
+ this.fileCallData = new Map(Object.entries(parsed));
550
+ if (this.config.verbose) {
551
+ console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
552
+ }
553
+ } catch {
554
+ // no cache yet, OK
555
+ }
556
+ } catch (error) {
557
+ console.warn('[Cache] Failed to load cache:', error.message);
558
+ }
559
+ }
560
+
561
+ // -------------------- Save (debounced + serialized) --------------------
562
+
563
+ save() {
564
+ if (!this.config.enableCache) return Promise.resolve();
565
+
566
+ this._saveRequested = true;
567
+
568
+ if (this._saveTimer) return this._savePromise ?? Promise.resolve();
569
+
570
+ const debounceMs = Number.isInteger(this.config.saveDebounceMs)
571
+ ? this.config.saveDebounceMs
572
+ : 250;
573
+
574
+ this._savePromise = new Promise((resolve, reject) => {
575
+ this._saveTimer = setTimeout(() => {
576
+ this._saveTimer = null;
577
+
578
+ this.saveQueue = this.saveQueue
579
+ .then(async () => {
580
+ while (this._saveRequested) {
581
+ this._saveRequested = false;
582
+ await this.performSave();
583
+ }
584
+ })
585
+ .then(resolve, reject)
586
+ .finally(() => {
587
+ this._savePromise = null;
588
+ });
589
+ }, debounceMs);
590
+ });
591
+
592
+ return this._savePromise;
593
+ }
594
+
595
+ async performSave() {
596
+ this.isSaving = true;
597
+
598
+ try {
599
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
600
+
601
+ const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
602
+ const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
603
+ const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
604
+
605
+ // Snapshot to avoid race conditions during async write
606
+ const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
607
+
608
+ this.cacheMeta = {
609
+ version: CACHE_META_VERSION,
610
+ embeddingModel: this.config.embeddingModel,
611
+ lastSaveTime: new Date().toISOString(),
612
+ filesIndexed: this.fileHashes.size,
613
+ chunksStored: snapshotStore.length,
614
+ workspace: this.config.searchDirectory || null,
615
+ };
616
+ if (Number.isFinite(this.lastIndexDurationMs) && this.lastIndexDurationMs >= 0) {
617
+ this.cacheMeta.indexDurationMs = Math.round(this.lastIndexDurationMs);
618
+ }
619
+ if (this.lastIndexStats && typeof this.lastIndexStats === 'object') {
620
+ Object.assign(this.cacheMeta, this.lastIndexStats);
621
+ }
622
+
623
+ const total = snapshotStore.length;
624
+ if (this.config.vectorStoreFormat === 'binary') {
625
+ this.binaryStore = await BinaryVectorStore.write(
626
+ this.config.cacheDirectory,
627
+ snapshotStore,
628
+ {
629
+ contentCacheEntries: this.config.contentCacheEntries,
630
+ getContent: (chunk, index) => this.getChunkContent(chunk, index),
631
+ preRename: async () => {
632
+ if (this.binaryStore) await this.binaryStore.close();
633
+ },
634
+ },
635
+ );
636
+ if (this.binaryStore) {
637
+ this.cacheMeta.chunksStored = this.binaryStore.length;
522
638
  }
639
+ } else {
640
+ const vectorWriter = new StreamingJsonWriter(cacheFile, {
641
+ highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
642
+ floatDigits:
643
+ this.config.cacheVectorFloatDigits === undefined
644
+ ? 6
645
+ : this.config.cacheVectorFloatDigits,
646
+ flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
647
+ indent: '', // set to " " if you prefer pretty formatting
648
+ assumeFinite: this.config.cacheVectorAssumeFinite,
649
+ checkFinite: this.config.cacheVectorCheckFinite,
650
+ noMutation: this.config.cacheVectorNoMutation ?? false,
651
+ joinThreshold: this.config.cacheVectorJoinThreshold ?? 8192,
652
+ joinChunkSize: this.config.cacheVectorJoinChunkSize ?? 2048,
653
+ });
654
+
655
+ await vectorWriter.writeStart();
656
+
657
+ // Optional responsiveness yield (only for huge saves)
658
+ const yieldEvery = total >= 50_000 ? 5000 : 0;
659
+
660
+ try {
661
+ for (let i = 0; i < total; i++) {
662
+ const pending = vectorWriter.writeItem(snapshotStore[i]);
663
+ if (pending) await pending;
664
+ if (yieldEvery && i > 0 && i % yieldEvery === 0) await yieldToLoop();
665
+ }
666
+ await vectorWriter.writeEnd();
667
+ } catch (e) {
668
+ vectorWriter.abort(e);
669
+ throw e;
670
+ }
671
+ }
672
+
673
+ const hashEntries = {};
674
+ for (const [file, entry] of this.fileHashes) {
675
+ const serialized = serializeFileHashEntry(entry);
676
+ if (serialized) {
677
+ hashEntries[file] = serialized;
678
+ }
679
+ }
680
+
681
+ await Promise.all([
682
+ fs.writeFile(hashFile, JSON.stringify(hashEntries, null, 2)),
683
+ fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
684
+ ]);
685
+
686
+ // Save call-graph data (or remove stale cache)
687
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
688
+ if (this.fileCallData.size > 0) {
689
+ await fs.writeFile(
690
+ callGraphFile,
691
+ JSON.stringify(Object.fromEntries(this.fileCallData), null, 2),
692
+ );
693
+ } else {
694
+ await fs.rm(callGraphFile, { force: true });
695
+ }
696
+
697
+ // Persist ANN index if it exists and changed in memory
698
+ if (
699
+ this.config.annIndexCache !== false &&
700
+ this.annPersistDirty &&
701
+ !this.annDirty &&
702
+ this.annIndex &&
703
+ this.annMeta
704
+ ) {
705
+ try {
706
+ const { indexFile, metaFile: annMetaFile } = this.getAnnIndexPaths();
707
+ this.annIndex.writeIndexSync(indexFile);
708
+ await fs.writeFile(annMetaFile, JSON.stringify(this.annMeta, null, 2));
709
+ this.annPersistDirty = false;
710
+ if (this.config.verbose) {
711
+ console.info(`[ANN] Persisted updated ANN index (${this.annMeta.count} vectors)`);
712
+ }
713
+ } catch (error) {
714
+ console.warn(`[ANN] Failed to persist ANN index: ${error.message}`);
715
+ }
716
+ }
717
+ } catch (error) {
718
+ console.warn('[Cache] Failed to save cache:', error.message);
719
+ // Attempt to recover binary store if it was closed during failed save
720
+ if (this.config.vectorStoreFormat === 'binary' && this.binaryStore && !this.binaryStore.vectorsBuffer) {
721
+ try {
722
+ console.info('[Cache] Attempting to recover binary store after failed save...');
723
+ this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
724
+ contentCacheEntries: this.config.contentCacheEntries,
725
+ });
726
+ console.info('[Cache] Binary store recovered.');
727
+ } catch (recoverErr) {
728
+ console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
729
+ this.binaryStore = null; // Ensure it's null if unusable
730
+ }
731
+ }
732
+ } finally {
733
+ this.isSaving = false;
734
+ }
735
+ }
736
+
737
+ // -------------------- Vector Store API --------------------
738
+
739
+ getVectorStore() {
740
+ return Array.isArray(this.vectorStore) ? this.vectorStore : [];
741
+ }
742
+
743
+ async setVectorStore(store) {
744
+ const previousBinaryStore = this.binaryStore;
745
+ this.vectorStore = store;
746
+ this.binaryStore = null;
747
+ if (Array.isArray(this.vectorStore)) {
748
+ for (const chunk of this.vectorStore) normalizeChunkVector(chunk);
749
+ assignChunkIndices(this.vectorStore);
750
+ }
751
+ this.invalidateAnnIndex();
752
+ if (previousBinaryStore) {
753
+ try {
754
+ await previousBinaryStore.close();
755
+ } catch {
756
+ // ignore close errors
757
+ }
758
+ }
759
+ }
760
+
761
+ setLastIndexDuration(durationMs) {
762
+ if (Number.isFinite(durationMs) && durationMs >= 0) {
763
+ this.lastIndexDurationMs = durationMs;
764
+ }
765
+ }
766
+
767
+ setLastIndexStats(stats) {
768
+ if (stats && typeof stats === 'object') {
769
+ this.lastIndexStats = { ...stats };
770
+ }
771
+ }
772
+
773
+ getFileHash(file) {
774
+ const entry = this.fileHashes.get(file);
775
+ if (typeof entry === 'string') return entry;
776
+ return entry?.hash;
777
+ }
778
+
779
+ getFileHashKeys() {
780
+ return Array.from(this.fileHashes.keys());
781
+ }
782
+
783
+ getFileHashCount() {
784
+ return this.fileHashes.size;
785
+ }
786
+
787
+ clearFileHashes() {
788
+ this.fileHashes.clear();
789
+ }
790
+
791
+ setFileHashes(entries) {
792
+ this.fileHashes.clear();
793
+ if (!entries) return;
794
+ const iterator =
795
+ entries instanceof Map
796
+ ? entries.entries()
797
+ : typeof entries === 'object'
798
+ ? Object.entries(entries)
799
+ : null;
800
+ if (!iterator) return;
801
+ for (const [file, entry] of iterator) {
802
+ const normalized = normalizeFileHashEntry(entry);
803
+ if (normalized) {
804
+ this.fileHashes.set(file, normalized);
805
+ }
806
+ }
807
+ }
808
+
809
+ setFileHash(file, hash, meta = null) {
810
+ const entry = { hash };
811
+ if (meta && typeof meta === 'object') {
812
+ if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
813
+ if (Number.isFinite(meta.size)) entry.size = meta.size;
814
+ }
815
+ this.fileHashes.set(file, entry);
816
+ }
817
+
818
+ getFileMeta(file) {
819
+ const entry = this.fileHashes.get(file);
820
+ if (!entry) return null;
821
+ if (typeof entry === 'string') return { hash: entry };
822
+ return entry;
823
+ }
824
+
825
+ getChunkVector(chunk, index = null) {
826
+ if (typeof chunk === 'number') {
827
+ const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
828
+ const entry = store ? store[chunk] : null;
829
+ if (entry?.vector) return entry.vector;
830
+ if (this.binaryStore) {
831
+ const resolved = Number.isInteger(entry?._binaryIndex) ? entry._binaryIndex : chunk;
832
+ return this.binaryStore.getVector(resolved);
523
833
  }
834
+ return null;
524
835
  }
525
- }
526
-
527
- /**
528
- * Remove call-graph entries for files no longer in the codebase
529
- */
530
- pruneCallGraphData(validFiles) {
531
- if (!validFiles || this.fileCallData.size === 0) return 0;
532
-
533
- let pruned = 0;
534
- for (const file of Array.from(this.fileCallData.keys())) {
535
- if (!validFiles.has(file)) {
536
- this.fileCallData.delete(file);
537
- pruned++;
538
- }
539
- }
836
+
837
+ if (chunk?.vector) return chunk.vector;
838
+ const resolved = Number.isInteger(index) ? index : chunk?._index;
839
+ if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
840
+ return this.binaryStore.getVector(chunk._binaryIndex);
841
+ }
842
+ if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
843
+ return this.binaryStore.getVector(resolved);
844
+ }
845
+ return null;
846
+ }
847
+
848
+ async getChunkContent(chunk, index = null) {
849
+ if (typeof chunk === 'number') {
850
+ const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
851
+ const entry = store ? store[chunk] : null;
852
+ if (entry) return await this.getChunkContent(entry, chunk);
853
+ if (!store && this.binaryStore) {
854
+ return await this.binaryStore.getContent(chunk);
855
+ }
856
+ return '';
857
+ }
858
+ if (chunk?.content !== undefined && chunk?.content !== null) {
859
+ return chunk.content;
860
+ }
861
+ if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
862
+ return await this.binaryStore.getContent(chunk._binaryIndex);
863
+ }
864
+ const resolved = Number.isInteger(index) ? index : chunk?._index;
865
+ if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
866
+ return await this.binaryStore.getContent(resolved);
867
+ }
868
+ return '';
869
+ }
870
+
871
+ deleteFileHash(file) {
872
+ this.fileHashes.delete(file);
873
+ }
874
+
875
+ removeFileFromStore(file) {
876
+ if (!Array.isArray(this.vectorStore)) return;
877
+ // In-place compaction to avoid allocating a new large array
878
+ let w = 0;
879
+ for (let r = 0; r < this.vectorStore.length; r++) {
880
+ const chunk = this.vectorStore[r];
881
+ if (chunk.file !== file) {
882
+ chunk._index = w;
883
+ this.vectorStore[w++] = chunk;
884
+ }
885
+ }
886
+ this.vectorStore.length = w;
887
+
888
+ // Removing shifts labels => rebuild ANN
889
+ this.invalidateAnnIndex();
890
+ this.removeFileCallData(file);
891
+ }
892
+
893
+ addToStore(chunk) {
894
+ normalizeChunkVector(chunk);
895
+
896
+ if (!Array.isArray(this.vectorStore)) {
897
+ this.vectorStore = [];
898
+ }
899
+
900
+ const label = this.vectorStore.length;
901
+ chunk._index = label;
902
+ this.vectorStore.push(chunk);
903
+ if (Array.isArray(this.annVectorCache) && this.annVectorCache.length === label) {
904
+ this.annVectorCache.push(chunk.vector);
905
+ }
906
+
907
+ // Best-effort incremental ANN append (fast path)
908
+ if (
909
+ this.annIndex &&
910
+ !this.annDirty &&
911
+ this.annMeta &&
912
+ typeof this.annIndex.addPoint === 'function' &&
913
+ this.annMeta.count === label &&
914
+ this.annMeta.maxElements > this.annMeta.count
915
+ ) {
916
+ try {
917
+ this.annIndex.addPoint(chunk.vector, label);
918
+ this.annMeta.count += 1;
919
+ this.annPersistDirty = true;
920
+ return;
921
+ } catch {
922
+ // fall through
923
+ }
924
+ }
925
+
926
+ this.invalidateAnnIndex();
927
+ }
928
+
929
+ invalidateAnnIndex() {
930
+ this.annIndex = null;
931
+ this.annMeta = null;
932
+ this.annDirty = true;
933
+ this.annPersistDirty = false;
934
+ this.annVectorCache = null;
935
+ }
936
+
937
+ getAnnVector(index) {
938
+ if (!Array.isArray(this.vectorStore)) return null;
939
+ const chunk = this.vectorStore[index];
940
+ if (!chunk) return null;
540
941
 
541
- if (pruned > 0) {
542
- this.callGraph = null;
942
+ if (!Array.isArray(this.annVectorCache) || this.annVectorCache.length !== this.vectorStore.length) {
943
+ this.annVectorCache = new Array(this.vectorStore.length);
543
944
  }
544
945
 
545
- return pruned;
546
- }
547
-
548
- /**
549
- * Store call data for a file
550
- */
551
- setFileCallData(file, data) {
552
- this.fileCallData.set(file, data);
553
- this.callGraph = null; // Invalidate cached graph
554
- }
555
-
556
- /**
557
- * Get call data for a file
558
- */
559
- getFileCallData(file) {
560
- return this.fileCallData.get(file);
561
- }
562
-
563
- /**
564
- * Remove call data for a file
565
- */
566
- removeFileCallData(file) {
567
- this.fileCallData.delete(file);
568
- this.callGraph = null; // Invalidate cached graph
569
- }
570
-
571
- /**
572
- * Rebuild the call graph from file data
573
- */
574
- rebuildCallGraph() {
575
- // Lazy import to avoid circular dependencies
576
- import("./call-graph.js").then(({ buildCallGraph }) => {
577
- this.callGraph = buildCallGraph(this.fileCallData);
578
- if (this.config.verbose) {
579
- console.error(`[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`);
580
- }
581
- }).catch(err => {
582
- console.error(`[CallGraph] Failed to build: ${err.message}`);
583
- this.callGraph = null;
584
- });
585
- }
946
+ const cached = this.annVectorCache[index];
947
+ if (cached) return cached;
586
948
 
587
- /**
588
- * Get files related to symbols via call graph
589
- */
590
- async getRelatedFiles(symbols) {
591
- if (!this.config.callGraphEnabled || symbols.length === 0) {
592
- return new Map();
949
+ let vec = null;
950
+ if (chunk.vector) {
951
+ vec = ensureFloat32(chunk.vector);
952
+ } else if (this.binaryStore && Number.isInteger(chunk._binaryIndex)) {
953
+ vec = this.binaryStore.getVector(chunk._binaryIndex);
593
954
  }
594
955
 
595
- // Rebuild graph if needed
596
- if (!this.callGraph && this.fileCallData.size > 0) {
597
- const { buildCallGraph } = await import("./call-graph.js");
598
- this.callGraph = buildCallGraph(this.fileCallData);
599
- }
956
+ if (!vec) return null;
600
957
 
601
- if (!this.callGraph) {
602
- return new Map();
958
+ if (this.config.vectorStoreLoadMode !== 'disk') {
959
+ chunk.vector = vec;
603
960
  }
604
-
605
- const { getRelatedFiles } = await import("./call-graph.js");
606
- return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
607
- }
608
-
609
- /**
610
- * Get call graph statistics
611
- */
612
- getCallGraphStats() {
613
- return {
614
- enabled: this.config.callGraphEnabled ?? false,
615
- filesWithData: this.fileCallData.size,
616
- graphBuilt: this.callGraph !== null,
617
- definitions: this.callGraph?.defines.size ?? 0,
618
- callTargets: this.callGraph?.calledBy.size ?? 0
619
- };
961
+ this.annVectorCache[index] = vec;
962
+ return vec;
620
963
  }
621
- }
964
+
965
+ getAnnIndexPaths() {
966
+ return {
967
+ indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
968
+ metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE),
969
+ };
970
+ }
971
+
972
+ // -------------------- ANN --------------------
973
+
974
+ async ensureAnnIndex() {
975
+ if (!this.config.annEnabled) return null;
976
+ if (!Array.isArray(this.vectorStore)) return null;
977
+ if (this.vectorStore.length < (this.config.annMinChunks ?? 5000)) return null;
978
+ if (this.annIndex && !this.annDirty) return this.annIndex;
979
+ if (this.annLoading) return this.annLoading;
980
+
981
+ this.annLoading = (async () => {
982
+ const HierarchicalNSW = await loadHnswlib();
983
+ if (!HierarchicalNSW) {
984
+ if (hnswlibLoadError) {
985
+ this.initErrors.push({
986
+ stage: 'loadHnswlib',
987
+ message: hnswlibLoadError.message,
988
+ stack: hnswlibLoadError.stack
989
+ });
990
+ }
991
+ return null;
992
+ }
993
+
994
+ const dim = this.vectorStore[0]?.vector?.length;
995
+ if (!dim) return null;
996
+
997
+ if (!this.annDirty && this.config.annIndexCache !== false) {
998
+ const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
999
+ if (loaded) return this.annIndex;
1000
+ }
1001
+
1002
+ return await this.buildAnnIndex(HierarchicalNSW, dim);
1003
+ })();
1004
+
1005
+ const index = await this.annLoading;
1006
+ this.annLoading = null;
1007
+ return index;
1008
+ }
1009
+
1010
+ async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
1011
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
1012
+ const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
1013
+ if (!metaData) return false;
1014
+
1015
+ let meta;
1016
+ try {
1017
+ meta = JSON.parse(metaData);
1018
+ } catch {
1019
+ console.warn('[ANN] Invalid ANN metadata, rebuilding');
1020
+ return false;
1021
+ }
1022
+
1023
+ if (meta?.version !== ANN_META_VERSION) {
1024
+ console.warn(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
1025
+ return false;
1026
+ }
1027
+
1028
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
1029
+ console.warn('[ANN] Embedding model changed for ANN index, rebuilding');
1030
+ return false;
1031
+ }
1032
+
1033
+ if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
1034
+ console.warn('[ANN] ANN index size mismatch, rebuilding');
1035
+ return false;
1036
+ }
1037
+
1038
+ if (
1039
+ meta?.metric !== this.config.annMetric ||
1040
+ meta?.m !== this.config.annM ||
1041
+ meta?.efConstruction !== this.config.annEfConstruction
1042
+ ) {
1043
+ console.warn('[ANN] ANN index config changed, rebuilding');
1044
+ return false;
1045
+ }
1046
+
1047
+ let maxElements = meta?.maxElements;
1048
+ if (!Number.isInteger(maxElements)) {
1049
+ maxElements = meta.count;
1050
+ } else if (maxElements < meta.count) {
1051
+ console.warn('[ANN] ANN capacity invalid, rebuilding');
1052
+ return false;
1053
+ }
1054
+
1055
+ const index = new HierarchicalNSW(meta.metric, dim);
1056
+ const loaded = readHnswIndex(index, indexFile, maxElements);
1057
+ if (!loaded) {
1058
+ console.warn('[ANN] Failed to load ANN index file, rebuilding');
1059
+ return false;
1060
+ }
1061
+
1062
+ if (typeof index.setEf === 'function') {
1063
+ index.setEf(this.config.annEfSearch);
1064
+ }
1065
+
1066
+ this.annIndex = index;
1067
+ this.annMeta = { ...meta, maxElements };
1068
+ this.annDirty = false;
1069
+ this.annPersistDirty = false;
1070
+
1071
+ if (this.config.verbose) {
1072
+ console.info(`[ANN] Loaded ANN index (${meta.count} vectors, cap=${maxElements})`);
1073
+ }
1074
+ return true;
1075
+ }
1076
+
1077
+ async buildAnnIndex(HierarchicalNSW, dim) {
1078
+ if (!Array.isArray(this.vectorStore)) return null;
1079
+ const total = this.vectorStore.length;
1080
+ if (total === 0) return null;
1081
+
1082
+ try {
1083
+ const index = new HierarchicalNSW(this.config.annMetric, dim);
1084
+
1085
+ const maxElements = computeAnnCapacity(total, this.config);
1086
+ initHnswIndex(index, maxElements, this.config.annM, this.config.annEfConstruction);
1087
+
1088
+ const yieldEvery = Number.isInteger(this.config.annBuildYieldEvery)
1089
+ ? this.config.annBuildYieldEvery
1090
+ : 1000;
1091
+
1092
+ for (let i = 0; i < total; i++) {
1093
+ const vector = this.getAnnVector(i);
1094
+ if (!vector) throw new Error(`Missing vector for ANN index at position ${i}`);
1095
+ index.addPoint(vector, i);
1096
+
1097
+ if (yieldEvery > 0 && i > 0 && i % yieldEvery === 0) {
1098
+ await yieldToLoop();
1099
+ }
1100
+ }
1101
+
1102
+ if (typeof index.setEf === 'function') {
1103
+ index.setEf(this.config.annEfSearch);
1104
+ }
1105
+
1106
+ this.annIndex = index;
1107
+ this.annMeta = {
1108
+ version: ANN_META_VERSION,
1109
+ embeddingModel: this.config.embeddingModel,
1110
+ metric: this.config.annMetric,
1111
+ dim,
1112
+ count: total,
1113
+ maxElements,
1114
+ m: this.config.annM,
1115
+ efConstruction: this.config.annEfConstruction,
1116
+ efSearch: this.config.annEfSearch,
1117
+ };
1118
+ this.annDirty = false;
1119
+ this.annPersistDirty = true;
1120
+
1121
+ if (this.config.annIndexCache !== false) {
1122
+ try {
1123
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
1124
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
1125
+ index.writeIndexSync(indexFile);
1126
+ await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
1127
+ this.annPersistDirty = false;
1128
+ if (this.config.verbose) {
1129
+ console.info(`[ANN] Saved ANN index (${total} vectors, cap=${maxElements})`);
1130
+ }
1131
+ } catch (error) {
1132
+ console.warn(`[ANN] Failed to save ANN index: ${error.message}`);
1133
+ }
1134
+ }
1135
+
1136
+ return index;
1137
+ } catch (error) {
1138
+ console.warn(`[ANN] Failed to build ANN index: ${error.message}`);
1139
+ this.initErrors.push({
1140
+ stage: 'buildAnnIndex',
1141
+ message: error.message,
1142
+ stack: error.stack
1143
+ });
1144
+ this.annIndex = null;
1145
+ this.annMeta = null;
1146
+ this.annDirty = true;
1147
+ this.annPersistDirty = false;
1148
+ return null;
1149
+ }
1150
+ }
1151
+
1152
+ async queryAnn(queryVector, k) {
1153
+ if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
1154
+ const index = await this.ensureAnnIndex();
1155
+ if (!index) return [];
1156
+
1157
+ const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
1158
+ const results = index.searchKnn(qVec, k);
1159
+ const labels = normalizeLabels(results);
1160
+
1161
+ if (labels.length === 0) return [];
1162
+
1163
+ const filtered = labels.filter(
1164
+ (label) => Number.isInteger(label) && label >= 0 && label < this.vectorStore.length,
1165
+ );
1166
+
1167
+ return filtered;
1168
+ }
1169
+
1170
+ async clear() {
1171
+ if (!this.config.enableCache) return;
1172
+
1173
+ try {
1174
+ await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
1175
+ this.vectorStore = [];
1176
+ if (this.binaryStore) {
1177
+ try {
1178
+ await this.binaryStore.close();
1179
+ } catch {
1180
+ // ignore close errors
1181
+ }
1182
+ }
1183
+ this.binaryStore = null;
1184
+ this.fileHashes = new Map();
1185
+ this.invalidateAnnIndex();
1186
+ await this.clearCallGraphData();
1187
+ if (this.config.verbose) {
1188
+ console.info(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
1189
+ }
1190
+ } catch (error) {
1191
+ console.error('[Cache] Failed to clear cache:', error.message);
1192
+ throw error;
1193
+ }
1194
+ }
1195
+
1196
+ /**
1197
+ * Adjust efSearch at runtime for speed/accuracy tradeoff.
1198
+ * Higher values = more accurate but slower.
1199
+ * @param {number} efSearch - New efSearch value (typically 16-512)
1200
+ * @returns {object} Result with success status and current config
1201
+ */
1202
+ setEfSearch(efSearch) {
1203
+ if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
1204
+ return {
1205
+ success: false,
1206
+ error: 'efSearch must be a number between 1 and 1000',
1207
+ };
1208
+ }
1209
+
1210
+ this.config.annEfSearch = efSearch;
1211
+
1212
+ if (this.annIndex && typeof this.annIndex.setEf === 'function') {
1213
+ this.annIndex.setEf(efSearch);
1214
+ if (this.annMeta) this.annMeta.efSearch = efSearch;
1215
+ this.annPersistDirty = true;
1216
+ if (this.config.verbose) {
1217
+ console.info(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
1218
+ }
1219
+ return { success: true, applied: true, efSearch };
1220
+ }
1221
+
1222
+ if (this.config.verbose) {
1223
+ console.info(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
1224
+ }
1225
+ return { success: true, applied: false, efSearch };
1226
+ }
1227
+
1228
+ /**
1229
+ * Get current ANN index statistics for diagnostics.
1230
+ * @returns {object} ANN stats including index state, config, and vector count
1231
+ */
1232
+ getAnnStats() {
1233
+ return {
1234
+ enabled: this.config.annEnabled ?? false,
1235
+ indexLoaded: this.annIndex !== null,
1236
+ dirty: this.annDirty,
1237
+ vectorCount: Array.isArray(this.vectorStore) ? this.vectorStore.length : 0,
1238
+ minChunksForAnn: this.config.annMinChunks ?? 5000,
1239
+ config: this.annMeta
1240
+ ? {
1241
+ metric: this.annMeta.metric,
1242
+ dim: this.annMeta.dim,
1243
+ count: this.annMeta.count,
1244
+ m: this.annMeta.m,
1245
+ efConstruction: this.annMeta.efConstruction,
1246
+ efSearch: this.config.annEfSearch,
1247
+ }
1248
+ : null,
1249
+ };
1250
+ }
1251
+
1252
+ // -------------------- Call Graph --------------------
1253
+
1254
+ async clearCallGraphData({ removeFile = false } = {}) {
1255
+ this.fileCallData.clear();
1256
+ this.callGraph = null;
1257
+
1258
+ if (removeFile && this.config.enableCache) {
1259
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
1260
+ try {
1261
+ await fs.rm(callGraphFile, { force: true });
1262
+ } catch (error) {
1263
+ if (this.config.verbose) {
1264
+ console.warn(`[Cache] Failed to remove call-graph cache: ${error.message}`);
1265
+ }
1266
+ }
1267
+ }
1268
+ }
1269
+
1270
+ pruneCallGraphData(validFiles) {
1271
+ if (!validFiles || this.fileCallData.size === 0) return 0;
1272
+
1273
+ let pruned = 0;
1274
+ for (const file of Array.from(this.fileCallData.keys())) {
1275
+ if (!validFiles.has(file)) {
1276
+ this.fileCallData.delete(file);
1277
+ pruned++;
1278
+ }
1279
+ }
1280
+
1281
+ if (pruned > 0) this.callGraph = null;
1282
+ return pruned;
1283
+ }
1284
+
1285
+ getFileCallData(file) {
1286
+ return this.fileCallData.get(file);
1287
+ }
1288
+
1289
+ hasFileCallData(file) {
1290
+ return this.fileCallData.has(file);
1291
+ }
1292
+
1293
+ getFileCallDataKeys() {
1294
+ return Array.from(this.fileCallData.keys());
1295
+ }
1296
+
1297
+ getFileCallDataCount() {
1298
+ return this.fileCallData.size;
1299
+ }
1300
+
1301
+ /**
1302
+ * Sets call data for a specific file.
1303
+ * @param {string} file
1304
+ * @param {object} data
1305
+ */
1306
+ setFileCallData(file, data) {
1307
+ this.fileCallData.set(file, data);
1308
+ this.callGraph = null;
1309
+ }
1310
+
1311
+ /**
1312
+ * Sets the entire file call data map.
1313
+ * @param {Map<string, object>|object} entries
1314
+ */
1315
+ setFileCallDataEntries(entries) {
1316
+ if (entries instanceof Map) {
1317
+ this.fileCallData = entries;
1318
+ } else {
1319
+ this.fileCallData.clear();
1320
+ if (entries && typeof entries === 'object') {
1321
+ for (const [file, data] of Object.entries(entries)) {
1322
+ this.fileCallData.set(file, data);
1323
+ }
1324
+ }
1325
+ }
1326
+ this.callGraph = null;
1327
+ }
1328
+
1329
+ clearFileCallData() {
1330
+ this.fileCallData.clear();
1331
+ this.callGraph = null;
1332
+ }
1333
+
1334
+ removeFileCallData(file) {
1335
+ this.fileCallData.delete(file);
1336
+ this.callGraph = null;
1337
+ }
1338
+
1339
+ async rebuildCallGraph() {
1340
+ if (this._callGraphBuild) return this._callGraphBuild;
1341
+
1342
+ this._callGraphBuild = (async () => {
1343
+ try {
1344
+ const { buildCallGraph } = await import('./call-graph.js');
1345
+ this.callGraph = buildCallGraph(this.fileCallData);
1346
+ if (this.config.verbose && this.callGraph) {
1347
+ console.info(
1348
+ `[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`,
1349
+ );
1350
+ }
1351
+ } catch (err) {
1352
+ console.error(`[CallGraph] Failed to build: ${err.message}`);
1353
+ this.callGraph = null;
1354
+ } finally {
1355
+ this._callGraphBuild = null;
1356
+ }
1357
+ })();
1358
+
1359
+ return this._callGraphBuild;
1360
+ }
1361
+
1362
+ async getRelatedFiles(symbols) {
1363
+ if (!this.config.callGraphEnabled || symbols.length === 0) return new Map();
1364
+ if (!this.callGraph && this.fileCallData.size > 0) await this.rebuildCallGraph();
1365
+ if (!this.callGraph) return new Map();
1366
+
1367
+ const { getRelatedFiles } = await import('./call-graph.js');
1368
+ return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
1369
+ }
1370
+
1371
+ getCallGraphStats() {
1372
+ return {
1373
+ enabled: this.config.callGraphEnabled ?? false,
1374
+ filesWithData: this.fileCallData.size,
1375
+ graphBuilt: this.callGraph !== null,
1376
+ definitions: this.callGraph?.defines.size ?? 0,
1377
+ callTargets: this.callGraph?.calledBy.size ?? 0,
1378
+ };
1379
+ }
1380
+
1381
+ // -------------------- Abstraction Layer --------------------
1382
+
1383
+ /**
1384
+ * Returns the total number of chunks in the store.
1385
+ * @returns {number}
1386
+ */
1387
+ getStoreSize() {
1388
+ if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
1389
+ if (this.binaryStore) return this.binaryStore.length;
1390
+ return 0;
1391
+ }
1392
+
1393
+ /**
1394
+ * Retrieves a vector by its store index.
1395
+ * @param {number} index
1396
+ * @returns {Float32Array|null}
1397
+ */
1398
+ getVector(index) {
1399
+ return this.getChunkVector(index);
1400
+ }
1401
+
1402
+ /**
1403
+ * Retrieves a chunk object by its store index.
1404
+ * @param {number} index
1405
+ * @returns {object|null}
1406
+ */
1407
+ getChunk(index) {
1408
+ if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
1409
+ return this.vectorStore[index];
1410
+ }
1411
+ if (this.binaryStore) {
1412
+ const record = this.binaryStore.getRecord(index);
1413
+ if (record) {
1414
+ return {
1415
+ file: record.file,
1416
+ startLine: record.startLine,
1417
+ endLine: record.endLine,
1418
+ vector: this.binaryStore.getVector(index),
1419
+ _index: index,
1420
+ _binaryIndex: index,
1421
+ };
1422
+ }
1423
+ }
1424
+ return null;
1425
+ }
1426
+ }