@softerist/heuristic-mcp 3.2.2 → 3.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +387 -376
  2. package/config.jsonc +800 -800
  3. package/features/ann-config.js +102 -110
  4. package/features/clear-cache.js +81 -84
  5. package/features/find-similar-code.js +265 -286
  6. package/features/hybrid-search.js +487 -536
  7. package/features/index-codebase.js +3139 -3270
  8. package/features/lifecycle.js +1041 -1063
  9. package/features/package-version.js +277 -291
  10. package/features/register.js +351 -370
  11. package/features/resources.js +115 -130
  12. package/features/set-workspace.js +214 -240
  13. package/index.js +742 -762
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +465 -519
  16. package/lib/cache.js +1699 -1767
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +232 -226
  19. package/lib/config.js +1483 -1495
  20. package/lib/constants.js +511 -492
  21. package/lib/embed-query-process.js +206 -212
  22. package/lib/embedding-process.js +434 -451
  23. package/lib/embedding-worker.js +862 -934
  24. package/lib/ignore-patterns.js +276 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +302 -310
  27. package/lib/logging.js +116 -127
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +188 -193
  30. package/lib/path-utils.js +18 -23
  31. package/lib/project-detector.js +82 -84
  32. package/lib/server-lifecycle.js +133 -145
  33. package/lib/settings-editor.js +738 -739
  34. package/lib/slice-normalize.js +25 -31
  35. package/lib/tokenizer.js +168 -203
  36. package/lib/utils.js +364 -409
  37. package/lib/vector-store-binary.js +811 -591
  38. package/lib/vector-store-sqlite.js +377 -414
  39. package/lib/workspace-env.js +32 -34
  40. package/mcp_config.json +9 -9
  41. package/package.json +86 -86
  42. package/scripts/clear-cache.js +20 -20
  43. package/scripts/download-model.js +43 -43
  44. package/scripts/mcp-launcher.js +49 -49
  45. package/scripts/postinstall.js +12 -12
  46. package/search-configs.js +36 -36
package/lib/cache.js CHANGED
@@ -1,778 +1,761 @@
1
- import fs from 'fs/promises';
2
- import path from 'path';
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
3
  import { Worker } from 'worker_threads';
4
4
  import { StreamingJsonWriter } from './json-writer.js';
5
- import { BinaryVectorStore } from './vector-store-binary.js';
5
+ import {
6
+ BinaryVectorStore,
7
+ BinaryStoreCorruptionError,
8
+ recordBinaryStoreCorruption,
9
+ } from './vector-store-binary.js';
6
10
  import { SqliteVectorStore } from './vector-store-sqlite.js';
7
11
  import { isNonProjectDirectory } from './config.js';
8
12
  import {
9
13
  JSON_WORKER_THRESHOLD_BYTES,
10
14
  ANN_DIMENSION_SAMPLE_SIZE,
11
15
  HNSWLIB_ERROR_RESET_MS,
12
- DEFAULT_READER_WAIT_TIMEOUT_MS,
13
- } from './constants.js';
14
-
15
- const CACHE_META_VERSION = 1;
16
- const CACHE_META_FILE = 'meta.json';
17
-
18
-
19
- const ANN_META_VERSION = 1;
20
- const ANN_INDEX_FILE = 'ann-index.bin';
21
- const ANN_META_FILE = 'ann-meta.json';
22
-
23
- const CALL_GRAPH_FILE = 'call-graph.json';
24
-
25
- const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
26
-
27
-
28
- const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
29
-
30
- let hnswlibPromise = null;
31
- let hnswlibLoadError = null;
32
-
33
- async function parseJsonInWorker(filePath) {
34
- return new Promise((resolve, reject) => {
35
- let settled = false;
36
- const worker = new Worker(new URL('./json-worker.js', import.meta.url), {
37
- workerData: { filePath },
38
- });
39
-
40
-
41
-
42
-
43
- const finish = (handler, value) => {
44
- if (settled) return;
45
- settled = true;
46
- worker.removeAllListeners();
47
- const termination = worker.terminate?.();
48
- if (termination && typeof termination.catch === 'function') termination.catch(() => null);
49
- handler(value);
50
- };
51
-
52
- worker.once('message', (msg) => {
53
- if (msg?.ok) {
54
- finish(resolve, msg.data);
55
- } else {
56
- const err = new Error(msg?.error || 'JSON worker failed');
57
- console.warn(`[Cache] ${err.message}`);
58
- finish(reject, err);
59
- }
60
- });
61
-
62
- worker.once('error', (err) => {
63
- console.error(`[Cache] JSON worker error: ${err.message}`);
64
- finish(reject, err);
65
- });
66
-
67
- worker.once('exit', (code) => {
68
- if (code !== 0) {
69
- const err = new Error(`JSON worker exited with code ${code}`);
70
- console.error(`[Cache] ${err.message}`);
71
- finish(reject, err);
72
- return;
73
- }
74
- if (!settled) {
75
- const err = new Error('JSON worker exited without sending a response');
76
- console.error(`[Cache] ${err.message}`);
77
- finish(reject, err);
78
- }
79
- });
80
- });
81
- }
82
-
83
- async function readJsonFile(
84
- filePath,
85
- { workerThresholdBytes = JSON_WORKER_THRESHOLD_BYTES } = {}
86
- ) {
87
- let stats;
88
- try {
89
- stats = await fs.stat(filePath);
90
- } catch {
91
-
92
- return null;
93
- }
94
-
95
- try {
96
- const canUseWorker = typeof Worker === 'function';
97
- const useWorker =
98
- canUseWorker && stats && typeof stats.size === 'number'
99
- ? stats.size >= workerThresholdBytes
100
- : false;
101
-
102
- if (useWorker) return await parseJsonInWorker(filePath);
103
-
104
- const data = await fs.readFile(filePath, 'utf-8');
105
- return JSON.parse(data);
106
- } catch (error) {
107
- console.warn(`[Cache] Failed to parse ${path.basename(filePath)}: ${error.message}`);
108
- return null;
109
- }
110
- }
111
-
112
- async function loadHnswlib() {
113
-
114
- if (hnswlibLoadError) {
115
- if (hnswlibLoadError._timestamp && Date.now() - hnswlibLoadError._timestamp > HNSWLIB_ERROR_RESET_MS) {
116
- hnswlibLoadError = null;
117
- hnswlibPromise = null;
118
- } else {
119
- return null;
120
- }
121
- }
122
-
123
- if (!hnswlibPromise) {
124
- hnswlibPromise = import('hnswlib-node')
125
- .then((mod) => {
126
- const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
127
- if (!HierarchicalNSW) throw new Error('HierarchicalNSW export not found');
128
- return HierarchicalNSW;
129
- })
130
- .catch((err) => {
131
-
132
- err._timestamp = Date.now();
133
- hnswlibLoadError = err;
134
- console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
135
- return null;
136
- });
137
- }
138
-
139
- return hnswlibPromise;
140
- }
141
-
142
- function initHnswIndex(index, maxElements, m, efConstruction) {
143
- try {
144
- index.initIndex(maxElements, m, efConstruction, 100);
145
- return;
146
- } catch (err) {
147
- console.warn(`[ANN] Standard init failed: ${err.message}`);
148
- }
149
- try {
150
- index.initIndex(maxElements, m, efConstruction);
151
- return;
152
- } catch (err) {
153
- console.warn(`[ANN] Legacy init failed: ${err.message}`);
154
- }
155
- index.initIndex(maxElements);
156
- }
157
-
158
- function readHnswIndex(index, filePath, maxElements) {
159
- try {
160
- index.readIndexSync(filePath, maxElements);
161
- return true;
162
- } catch {
163
-
164
- }
165
- try {
166
- index.readIndexSync(filePath);
167
- return true;
168
- } catch (err) {
169
- console.warn(`[ANN] Read index failed: ${err.message}`);
170
- }
171
- return false;
172
- }
173
-
174
- function normalizeLabels(result) {
175
- if (!result) return [];
176
- if (Array.isArray(result)) return result;
177
- const labels = result.labels || result.neighbors || result.indices;
178
- return labels ? Array.from(labels) : [];
179
- }
180
-
181
- function ensureFloat32(vector) {
182
- if (!vector) return null;
183
- if (vector instanceof Float32Array) return vector;
184
-
185
-
186
- let result;
187
- if (ArrayBuffer.isView(vector)) {
188
- result = Float32Array.from(vector);
189
- } else {
190
- result = new Float32Array(vector);
191
- }
192
-
193
-
194
- if (IS_TEST_ENV && result.length > 0) {
195
- for (let i = 0; i < result.length; i++) {
196
- if (!Number.isFinite(result[i])) {
197
- throw new Error(
198
- `Invalid vector value at index ${i}: ${result[i]}. ` +
199
- 'Vector contains NaN or Infinity, which will corrupt search results.'
200
- );
201
- }
202
- }
203
- }
204
-
205
- return result;
206
- }
207
-
208
- function normalizeChunkVector(chunk) {
209
- if (chunk?.vector) chunk.vector = ensureFloat32(chunk.vector);
210
- }
211
-
212
- function assignChunkIndices(store) {
213
- if (!Array.isArray(store)) return;
214
- for (let i = 0; i < store.length; i += 1) {
215
- const chunk = store[i];
216
- if (chunk) {
217
- chunk._index = i;
218
- }
219
- }
220
- }
221
-
222
- function normalizeFileHashEntry(entry) {
223
- if (!entry) return null;
224
- if (typeof entry === 'string') return { hash: entry };
225
- if (typeof entry !== 'object') return null;
226
- if (typeof entry.hash !== 'string') return null;
227
- const normalized = { hash: entry.hash };
228
- if (Number.isFinite(entry.mtimeMs)) normalized.mtimeMs = entry.mtimeMs;
229
- if (Number.isFinite(entry.size)) normalized.size = entry.size;
230
- return normalized;
231
- }
232
-
233
- function serializeFileHashEntry(entry) {
234
- if (!entry) return null;
235
- if (typeof entry === 'string') return { hash: entry };
236
- if (typeof entry !== 'object') return null;
237
- if (typeof entry.hash !== 'string') return null;
238
- const serialized = { hash: entry.hash };
239
- if (Number.isFinite(entry.mtimeMs)) serialized.mtimeMs = entry.mtimeMs;
240
- if (Number.isFinite(entry.size)) serialized.size = entry.size;
241
- return serialized;
242
- }
243
-
244
- function computeAnnCapacity(total, config) {
245
- const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
246
- const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
247
- const byFactor = Math.ceil(total * factor);
248
- const byExtra = total + extra;
249
- return Math.max(total, byFactor, byExtra);
250
- }
251
-
252
- export class EmbeddingsCache {
253
- constructor(config) {
254
- this.config = config;
255
-
256
- this.vectorStore = [];
257
- this.fileHashes = new Map();
258
- this.isSaving = false;
259
- this.lastIndexDurationMs = null;
260
- this.lastIndexStats = null;
261
-
262
- this.cacheMeta = {
263
- version: CACHE_META_VERSION,
264
- embeddingModel: config.embeddingModel,
265
- embeddingDimension: config.embeddingDimension ?? null,
266
- };
267
-
268
-
16
+ DEFAULT_READER_WAIT_TIMEOUT_MS,
17
+ } from './constants.js';
18
+
19
+ const CACHE_META_VERSION = 1;
20
+ const CACHE_META_FILE = 'meta.json';
21
+
22
+ const ANN_META_VERSION = 1;
23
+ const ANN_INDEX_FILE = 'ann-index.bin';
24
+ const ANN_META_FILE = 'ann-meta.json';
25
+
26
+ const CALL_GRAPH_FILE = 'call-graph.json';
27
+
28
+ const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
29
+
30
+ const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
31
+
32
+ let hnswlibPromise = null;
33
+ let hnswlibLoadError = null;
34
+
35
+ async function parseJsonInWorker(filePath) {
36
+ return new Promise((resolve, reject) => {
37
+ let settled = false;
38
+ const worker = new Worker(new URL('./json-worker.js', import.meta.url), {
39
+ workerData: { filePath },
40
+ });
41
+
42
+ const finish = (handler, value) => {
43
+ if (settled) return;
44
+ settled = true;
45
+ worker.removeAllListeners();
46
+ const termination = worker.terminate?.();
47
+ if (termination && typeof termination.catch === 'function') termination.catch(() => null);
48
+ handler(value);
49
+ };
50
+
51
+ worker.once('message', (msg) => {
52
+ if (msg?.ok) {
53
+ finish(resolve, msg.data);
54
+ } else {
55
+ const err = new Error(msg?.error || 'JSON worker failed');
56
+ console.warn(`[Cache] ${err.message}`);
57
+ finish(reject, err);
58
+ }
59
+ });
60
+
61
+ worker.once('error', (err) => {
62
+ console.error(`[Cache] JSON worker error: ${err.message}`);
63
+ finish(reject, err);
64
+ });
65
+
66
+ worker.once('exit', (code) => {
67
+ if (code !== 0) {
68
+ const err = new Error(`JSON worker exited with code ${code}`);
69
+ console.error(`[Cache] ${err.message}`);
70
+ finish(reject, err);
71
+ return;
72
+ }
73
+ if (!settled) {
74
+ const err = new Error('JSON worker exited without sending a response');
75
+ console.error(`[Cache] ${err.message}`);
76
+ finish(reject, err);
77
+ }
78
+ });
79
+ });
80
+ }
81
+
82
+ async function readJsonFile(filePath, { workerThresholdBytes = JSON_WORKER_THRESHOLD_BYTES } = {}) {
83
+ let stats;
84
+ try {
85
+ stats = await fs.stat(filePath);
86
+ } catch {
87
+ return null;
88
+ }
89
+
90
+ try {
91
+ const canUseWorker = typeof Worker === 'function';
92
+ const useWorker =
93
+ canUseWorker && stats && typeof stats.size === 'number'
94
+ ? stats.size >= workerThresholdBytes
95
+ : false;
96
+
97
+ if (useWorker) return await parseJsonInWorker(filePath);
98
+
99
+ const data = await fs.readFile(filePath, 'utf-8');
100
+ return JSON.parse(data);
101
+ } catch (error) {
102
+ console.warn(`[Cache] Failed to parse ${path.basename(filePath)}: ${error.message}`);
103
+ return null;
104
+ }
105
+ }
106
+
107
+ async function loadHnswlib() {
108
+ if (hnswlibLoadError) {
109
+ if (
110
+ hnswlibLoadError._timestamp &&
111
+ Date.now() - hnswlibLoadError._timestamp > HNSWLIB_ERROR_RESET_MS
112
+ ) {
113
+ hnswlibLoadError = null;
114
+ hnswlibPromise = null;
115
+ } else {
116
+ return null;
117
+ }
118
+ }
119
+
120
+ if (!hnswlibPromise) {
121
+ hnswlibPromise = import('hnswlib-node')
122
+ .then((mod) => {
123
+ const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
124
+ if (!HierarchicalNSW) throw new Error('HierarchicalNSW export not found');
125
+ return HierarchicalNSW;
126
+ })
127
+ .catch((err) => {
128
+ err._timestamp = Date.now();
129
+ hnswlibLoadError = err;
130
+ console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
131
+ return null;
132
+ });
133
+ }
134
+
135
+ return hnswlibPromise;
136
+ }
137
+
138
+ function initHnswIndex(index, maxElements, m, efConstruction) {
139
+ try {
140
+ index.initIndex(maxElements, m, efConstruction, 100);
141
+ return;
142
+ } catch (err) {
143
+ console.warn(`[ANN] Standard init failed: ${err.message}`);
144
+ }
145
+ try {
146
+ index.initIndex(maxElements, m, efConstruction);
147
+ return;
148
+ } catch (err) {
149
+ console.warn(`[ANN] Legacy init failed: ${err.message}`);
150
+ }
151
+ index.initIndex(maxElements);
152
+ }
153
+
154
+ function readHnswIndex(index, filePath, maxElements) {
155
+ try {
156
+ index.readIndexSync(filePath, maxElements);
157
+ return true;
158
+ } catch {}
159
+ try {
160
+ index.readIndexSync(filePath);
161
+ return true;
162
+ } catch (err) {
163
+ console.warn(`[ANN] Read index failed: ${err.message}`);
164
+ }
165
+ return false;
166
+ }
167
+
168
+ function normalizeLabels(result) {
169
+ if (!result) return [];
170
+ if (Array.isArray(result)) return result;
171
+ const labels = result.labels || result.neighbors || result.indices;
172
+ return labels ? Array.from(labels) : [];
173
+ }
174
+
175
+ function ensureFloat32(vector) {
176
+ if (!vector) return null;
177
+ if (vector instanceof Float32Array) return vector;
178
+
179
+ let result;
180
+ if (ArrayBuffer.isView(vector)) {
181
+ result = Float32Array.from(vector);
182
+ } else {
183
+ result = new Float32Array(vector);
184
+ }
185
+
186
+ if (IS_TEST_ENV && result.length > 0) {
187
+ for (let i = 0; i < result.length; i++) {
188
+ if (!Number.isFinite(result[i])) {
189
+ throw new Error(
190
+ `Invalid vector value at index ${i}: ${result[i]}. ` +
191
+ 'Vector contains NaN or Infinity, which will corrupt search results.'
192
+ );
193
+ }
194
+ }
195
+ }
196
+
197
+ return result;
198
+ }
199
+
200
+ function normalizeChunkVector(chunk) {
201
+ if (chunk?.vector) chunk.vector = ensureFloat32(chunk.vector);
202
+ }
203
+
204
+ function assignChunkIndices(store) {
205
+ if (!Array.isArray(store)) return;
206
+ for (let i = 0; i < store.length; i += 1) {
207
+ const chunk = store[i];
208
+ if (chunk) {
209
+ chunk._index = i;
210
+ }
211
+ }
212
+ }
213
+
214
+ function normalizeFileHashEntry(entry) {
215
+ if (!entry) return null;
216
+ if (typeof entry === 'string') return { hash: entry };
217
+ if (typeof entry !== 'object') return null;
218
+ if (typeof entry.hash !== 'string') return null;
219
+ const normalized = { hash: entry.hash };
220
+ if (Number.isFinite(entry.mtimeMs)) normalized.mtimeMs = entry.mtimeMs;
221
+ if (Number.isFinite(entry.size)) normalized.size = entry.size;
222
+ return normalized;
223
+ }
224
+
225
+ function serializeFileHashEntry(entry) {
226
+ if (!entry) return null;
227
+ if (typeof entry === 'string') return { hash: entry };
228
+ if (typeof entry !== 'object') return null;
229
+ if (typeof entry.hash !== 'string') return null;
230
+ const serialized = { hash: entry.hash };
231
+ if (Number.isFinite(entry.mtimeMs)) serialized.mtimeMs = entry.mtimeMs;
232
+ if (Number.isFinite(entry.size)) serialized.size = entry.size;
233
+ return serialized;
234
+ }
235
+
236
+ function computeAnnCapacity(total, config) {
237
+ const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
238
+ const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
239
+ const byFactor = Math.ceil(total * factor);
240
+ const byExtra = total + extra;
241
+ return Math.max(total, byFactor, byExtra);
242
+ }
243
+
244
+ export class EmbeddingsCache {
245
+ constructor(config) {
246
+ this.config = config;
247
+
248
+ this.vectorStore = [];
249
+ this.fileHashes = new Map();
250
+ this.isSaving = false;
251
+ this.lastIndexDurationMs = null;
252
+ this.lastIndexStats = null;
253
+
254
+ this.cacheMeta = {
255
+ version: CACHE_META_VERSION,
256
+ embeddingModel: config.embeddingModel,
257
+ embeddingDimension: config.embeddingDimension ?? null,
258
+ };
259
+
269
260
  this.saveQueue = Promise.resolve();
270
261
  this._saveTimer = null;
271
262
  this._saveRequested = false;
272
263
  this._savePromise = null;
273
264
  this._saveThrowOnError = false;
274
265
  this.lastSaveError = null;
275
-
276
-
277
- this.annIndex = null;
278
- this.annMeta = null;
279
- this.annDirty = false;
280
- this.annPersistDirty = false;
281
- this.annLoading = null;
282
- this.annVectorCache = null;
283
-
284
-
285
- this.fileCallData = new Map();
286
- this.callGraph = null;
287
- this._callGraphBuild = null;
288
-
289
-
290
- this.binaryStore = null;
291
-
292
-
293
- this.sqliteStore = null;
294
-
295
-
296
- this.initErrors = [];
297
-
298
-
299
- this.activeReads = 0;
300
- this._readWaiters = [];
301
- this._saveInProgress = false;
302
-
303
-
304
- this._clearedAfterIndex = false;
305
- this._loadPromise = null;
306
- }
307
-
308
-
309
- addInitError(stage, error) {
310
- this.initErrors.push({
311
- stage,
312
- message: error instanceof Error ? error.message : String(error),
313
- stack: error instanceof Error ? error.stack : null,
314
- timestamp: Date.now(),
315
- });
316
- }
317
-
318
- clearInMemoryState() {
319
- this.vectorStore = [];
320
- this.fileHashes.clear();
321
- this.invalidateAnnIndex();
322
- this.fileCallData.clear();
323
- this.callGraph = null;
324
- this.initErrors = [];
325
- if (this.binaryStore) {
326
- try {
327
- this.binaryStore.close?.();
328
- } catch {
329
-
330
- }
331
- this.binaryStore = null;
332
- }
333
- if (this.sqliteStore) {
334
- try {
335
- this.sqliteStore.close?.();
336
- } catch {
337
-
338
- }
339
- this.sqliteStore = null;
340
- }
341
- }
342
-
343
- async close() {
344
- if (this.binaryStore) {
345
- await this.binaryStore.close();
346
- this.binaryStore = null;
347
- }
348
- if (this.sqliteStore) {
349
- try {
350
- this.sqliteStore.close();
351
- } catch {
352
-
353
- }
354
- this.sqliteStore = null;
355
- }
356
- }
357
-
358
- async ensureLoaded({ preferDisk = false } = {}) {
359
- if (!this.config.enableCache) return;
360
- if (!this._clearedAfterIndex) return;
361
- if (this._loadPromise) return this._loadPromise;
362
-
363
- this._loadPromise = (async () => {
364
- if (preferDisk && this.config.verbose) {
365
- console.info('[Cache] ensureLoaded: forcing disk vector mode for incremental low-RAM reload');
366
- }
367
- await this.load({
368
- forceVectorLoadMode: preferDisk ? 'disk' : undefined,
369
- });
370
- this._clearedAfterIndex = false;
371
- })().finally(() => {
372
- this._loadPromise = null;
373
- });
374
-
375
- return this._loadPromise;
376
- }
377
-
378
- async dropInMemoryVectors() {
379
- if (!this.config.enableCache) return;
380
-
381
- if (this.activeReads > 0) {
382
- await this.waitForReaders();
383
- }
384
-
385
- this.vectorStore = [];
386
- this.annVectorCache = null;
387
- this.annIndex = null;
388
- this.annMeta = null;
389
- this.annDirty = true;
390
- this.annPersistDirty = false;
391
-
392
- if (this.binaryStore) {
393
- try {
394
- await this.binaryStore.close();
395
- } catch {
396
-
397
- }
398
- this.binaryStore = null;
399
- }
400
-
401
- if (this.sqliteStore) {
402
- try {
403
- this.sqliteStore.close();
404
- } catch {
405
-
406
- }
407
- this.sqliteStore = null;
408
- }
409
-
410
- this._clearedAfterIndex = true;
411
- }
412
-
413
-
414
-
415
- startRead() {
416
-
417
- if (this._saveInProgress) {
418
- throw new Error('Cache save in progress, try again shortly');
419
- }
420
- this.activeReads++;
421
- }
422
-
423
- endRead() {
424
- if (this.activeReads > 0) {
425
- this.activeReads--;
426
- if (this.activeReads === 0 && this._readWaiters.length > 0) {
427
- const waiters = this._readWaiters;
428
- this._readWaiters = [];
429
- for (const resolve of waiters) {
430
- resolve();
431
- }
432
- }
433
- }
434
- }
435
-
436
- async waitForReaders() {
437
- if (this.activeReads === 0) return;
438
- await new Promise((resolve) => {
439
- this._readWaiters.push(resolve);
440
- });
441
- }
442
-
443
- async waitForReadersWithTimeout(timeoutMs = 5000) {
444
- if (this.activeReads === 0) return true;
445
- let timedOut = false;
446
- let resolved = false;
447
- let waiterResolve;
448
- const waiterPromise = new Promise((resolve) => {
449
- waiterResolve = () => {
450
- if (!resolved) {
451
- resolved = true;
452
- resolve();
453
- }
454
- };
455
- this._readWaiters.push(waiterResolve);
456
- });
457
- await Promise.race([
458
- waiterPromise,
459
- new Promise((resolve) => {
460
- setTimeout(() => {
461
- if (!resolved) {
462
- resolved = true;
463
- timedOut = true;
464
-
465
- const idx = this._readWaiters.indexOf(waiterResolve);
466
- if (idx >= 0) this._readWaiters.splice(idx, 1);
467
- resolve();
468
- }
469
- }, timeoutMs);
470
- }),
471
- ]);
472
- if (timedOut) {
473
-
474
- console.warn(
475
- `[Cache] Timed out waiting for ${this.activeReads} active reader(s); proceeding with save anyway. ` +
476
- 'This may cause data inconsistency if readers access the store during write.'
477
- );
478
- }
479
- return !timedOut;
480
- }
481
-
482
-
483
-
484
-
485
- async reset() {
486
- this.vectorStore = [];
487
- if (this.binaryStore) {
488
- try {
489
- await this.binaryStore.close();
490
- } catch {
491
-
492
- }
493
- this.binaryStore = null;
494
- }
495
- if (this.sqliteStore) {
496
- try {
497
- this.sqliteStore.close();
498
- } catch {
499
-
500
- }
501
- this.sqliteStore = null;
502
- }
503
- this.fileHashes.clear();
504
- this.invalidateAnnIndex();
505
- await this.clearCallGraphData({ removeFile: true });
506
- this.initErrors = [];
507
- }
508
-
509
-
510
-
511
- async load({ forceVectorLoadMode } = {}) {
512
- if (!this.config.enableCache) return;
513
-
514
- try {
515
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
516
-
517
- const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
518
- const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
519
- const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
520
-
521
- const workerThresholdBytes =
522
- Number.isInteger(this.config.jsonWorkerThresholdBytes) &&
523
- this.config.jsonWorkerThresholdBytes > 0
524
- ? this.config.jsonWorkerThresholdBytes
525
- : JSON_WORKER_THRESHOLD_BYTES;
526
-
527
- const useBinary = this.config.vectorStoreFormat === 'binary';
528
- const useSqlite = this.config.vectorStoreFormat === 'sqlite';
529
-
530
- const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(
531
- this.config.cacheDirectory
532
- );
533
- const pathExists = async (targetPath) => {
534
- try {
535
- await fs.access(targetPath);
536
- return true;
537
- } catch {
538
- return false;
539
- }
540
- };
541
-
542
-
543
- let cacheData = null;
544
- let hashData = null;
545
- let prefetched = false;
546
- if (IS_TEST_ENV) {
547
- prefetched = true;
548
- const cachePromise = useBinary || useSqlite
549
- ? Promise.resolve(null)
550
- : readJsonFile(cacheFile, { workerThresholdBytes });
551
- [cacheData, hashData] = await Promise.all([
552
- cachePromise,
553
- readJsonFile(hashFile, { workerThresholdBytes }),
554
- ]);
555
- }
556
-
557
-
558
- const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
559
- if (!metaData) {
560
- console.warn('[Cache] Missing cache metadata, ignoring cache');
561
- this.clearInMemoryState();
562
- return;
563
- }
564
-
565
- let meta;
566
- try {
567
- meta = JSON.parse(metaData);
568
- } catch {
569
- console.warn('[Cache] Invalid cache metadata, ignoring cache');
570
- this.clearInMemoryState();
571
- return;
572
- }
573
-
574
- if (meta?.version !== CACHE_META_VERSION) {
575
- console.warn(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
576
- this.clearInMemoryState();
577
- return;
578
- }
579
-
580
- if (meta?.embeddingModel !== this.config.embeddingModel) {
581
- console.warn(
582
- `[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`
583
- );
584
- this.clearInMemoryState();
585
- return;
586
- }
587
- const expectedDimension = this.config.embeddingDimension ?? null;
588
- const metaDimension = meta?.embeddingDimension ?? null;
589
- if (metaDimension !== expectedDimension) {
590
- console.warn(
591
- `[Cache] Embedding dimension changed, ignoring cache (${metaDimension} -> ${expectedDimension})`
592
- );
593
- this.clearInMemoryState();
594
- return;
595
- }
596
-
597
- if (!prefetched) {
598
- [cacheData, hashData] = await Promise.all([
599
- useBinary || useSqlite ? Promise.resolve(null) : readJsonFile(cacheFile, { workerThresholdBytes }),
600
- readJsonFile(hashFile, { workerThresholdBytes }),
601
- ]);
602
- }
603
-
604
- this.cacheMeta = meta;
605
-
606
- const [binaryFilesPresent, jsonCachePresent] = await Promise.all([
607
- (async () => {
608
- const [vectorsOk, recordsOk, contentOk, filesOk] = await Promise.all([
609
- pathExists(vectorsPath),
610
- pathExists(recordsPath),
611
- pathExists(contentPath),
612
- pathExists(filesPath),
613
- ]);
614
- return vectorsOk && recordsOk && contentOk && filesOk;
615
- })(),
616
- pathExists(cacheFile),
617
- ]);
618
-
619
- if (useBinary && !binaryFilesPresent) {
620
- if (jsonCachePresent) {
621
- console.warn(
622
- '[Cache] vectorStoreFormat=binary but binary cache files are missing; embeddings.json exists. If you switched formats, reindex or set vectorStoreFormat=json.'
623
- );
624
- } else {
625
- console.warn(
626
- '[Cache] vectorStoreFormat=binary but binary cache files are missing. Reindex to regenerate the cache.'
627
- );
628
- }
629
- } else if (!useBinary && !useSqlite && !jsonCachePresent) {
630
- if (binaryFilesPresent) {
631
- console.warn(
632
- '[Cache] vectorStoreFormat=json but binary cache files exist. If you switched formats, set vectorStoreFormat=binary or reindex.'
633
- );
634
- } else {
635
- console.warn(
636
- '[Cache] vectorStoreFormat=json but embeddings.json is missing. Reindex to regenerate the cache.'
637
- );
638
- }
639
- }
640
-
641
- const configuredVectorLoadMode =
642
- typeof this.config.vectorStoreLoadMode === 'string'
643
- ? this.config.vectorStoreLoadMode.toLowerCase()
644
- : 'memory';
645
- const effectiveVectorLoadMode =
646
- forceVectorLoadMode === 'disk' || forceVectorLoadMode === 'memory'
647
- ? forceVectorLoadMode
648
- : configuredVectorLoadMode;
649
-
650
- if (useBinary) {
651
- try {
652
- this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
653
- contentCacheEntries: this.config.contentCacheEntries,
654
- vectorCacheEntries: this.config.vectorCacheEntries,
655
- vectorLoadMode: effectiveVectorLoadMode,
656
- });
657
- cacheData = await this.binaryStore.toChunkViews({
658
- includeContent: this.config.vectorStoreContentMode === 'inline',
659
- includeVector: effectiveVectorLoadMode !== 'disk',
660
- });
661
- } catch (err) {
662
- this.binaryStore = null;
663
- console.warn(`[Cache] Failed to load binary vector store: ${err.message}`);
664
- }
665
- }
666
-
667
-
668
- if (useSqlite) {
669
- try {
670
- this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
671
- if (this.sqliteStore) {
672
- cacheData = this.sqliteStore.toChunkViews({
673
- includeContent: this.config.vectorStoreContentMode === 'inline',
674
- includeVector: effectiveVectorLoadMode !== 'disk',
675
- });
676
- } else {
677
-
678
- console.warn('[Cache] vectorStoreFormat=sqlite but vectors.sqlite is missing. Reindex to regenerate the cache.');
679
- }
680
- } catch (err) {
681
- this.sqliteStore = null;
682
- console.warn(`[Cache] Failed to load SQLite vector store: ${err.message}`);
683
- }
684
- }
685
-
686
- if (!cacheData) {
687
- cacheData = await readJsonFile(cacheFile, { workerThresholdBytes });
688
- }
689
-
690
- const hasCacheData = Array.isArray(cacheData);
691
- const hasHashData = hashData && typeof hashData === 'object';
692
-
693
- if (hasCacheData) {
694
- const allowedExtensions = new Set(
695
- (this.config.fileExtensions || []).map((ext) => `.${ext}`)
696
- );
697
- const allowedFileNames = new Set(this.config.fileNames || []);
698
- const applyExtensionFilter = !this.binaryStore;
699
- const shouldKeepFile = (filePath) => {
700
- const ext = path.extname(filePath);
701
- if (allowedExtensions.has(ext)) return true;
702
- return allowedFileNames.has(path.basename(filePath));
703
- };
704
-
705
- const rawHashes = hasHashData ? new Map(Object.entries(hashData)) : new Map();
706
- this.vectorStore = [];
707
- this.fileHashes.clear();
708
-
709
-
710
- for (const chunk of cacheData) {
711
- if (applyExtensionFilter) {
712
- if (!shouldKeepFile(chunk.file)) continue;
713
- }
714
- normalizeChunkVector(chunk);
715
- this.vectorStore.push(chunk);
716
- }
717
- const filteredCount = cacheData.length - this.vectorStore.length;
718
- if (filteredCount > 0 && this.config.verbose) {
719
- console.info(`[Cache] Filtered ${filteredCount} outdated cache entries`);
720
- }
721
-
722
- if (hasHashData) {
723
-
724
- for (const [file, entry] of rawHashes) {
725
- if (!applyExtensionFilter || shouldKeepFile(file)) {
726
- const normalized = normalizeFileHashEntry(entry);
727
- if (normalized) {
728
- this.fileHashes.set(file, normalized);
729
- }
730
- }
731
- }
732
- } else {
733
- console.warn(
734
- '[Cache] Missing file-hashes.json; loaded embeddings but hashes were cleared'
735
- );
736
- }
737
-
738
- assignChunkIndices(this.vectorStore);
739
-
740
- if (this.config.verbose) {
741
- console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
742
- }
743
-
744
-
745
- this.annDirty = false;
746
- this.annPersistDirty = false;
747
- this.annIndex = null;
748
- this.annMeta = null;
749
- this.annVectorCache = null;
750
- } else if (cacheData) {
751
- console.warn('[Cache] Cache data is not an array; ignoring cached embeddings');
752
- } else if (hasHashData) {
753
- console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
754
- }
755
-
756
-
757
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
758
- try {
759
- const callGraphData = await fs.readFile(callGraphFile, 'utf8');
760
- const parsed = JSON.parse(callGraphData);
761
- this.fileCallData = new Map(Object.entries(parsed));
762
- if (this.config.verbose) {
763
- console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
764
- }
765
- } catch {
766
-
767
- }
768
- } catch (error) {
769
- console.warn('[Cache] Failed to load cache:', error.message);
770
- this.clearInMemoryState();
771
- }
772
- }
773
-
774
-
775
-
266
+
267
+ this.annIndex = null;
268
+ this.annMeta = null;
269
+ this.annDirty = false;
270
+ this.annPersistDirty = false;
271
+ this.annLoading = null;
272
+ this.annVectorCache = null;
273
+
274
+ this.fileCallData = new Map();
275
+ this.callGraph = null;
276
+ this._callGraphBuild = null;
277
+
278
+ this.binaryStore = null;
279
+
280
+ this.sqliteStore = null;
281
+
282
+ this.initErrors = [];
283
+
284
+ this.activeReads = 0;
285
+ this._readWaiters = [];
286
+ this._saveInProgress = false;
287
+
288
+ this._clearedAfterIndex = false;
289
+ this._loadPromise = null;
290
+ this._corruptionDetected = false;
291
+ }
292
+
293
+ /**
294
+ * Returns true if the last load() detected binary store corruption.
295
+ * Used by the server to decide whether to trigger an automatic re-index.
296
+ */
297
+ shouldAutoReindex() {
298
+ return this._corruptionDetected === true;
299
+ }
300
+
301
+ consumeAutoReindex() {
302
+ const should = this._corruptionDetected === true;
303
+ this._corruptionDetected = false;
304
+ return should;
305
+ }
306
+
307
+ addInitError(stage, error) {
308
+ this.initErrors.push({
309
+ stage,
310
+ message: error instanceof Error ? error.message : String(error),
311
+ stack: error instanceof Error ? error.stack : null,
312
+ timestamp: Date.now(),
313
+ });
314
+ }
315
+
316
+ clearInMemoryState() {
317
+ this.vectorStore = [];
318
+ this.fileHashes.clear();
319
+ this.invalidateAnnIndex();
320
+ this.fileCallData.clear();
321
+ this.callGraph = null;
322
+ this.initErrors = [];
323
+ if (this.binaryStore) {
324
+ try {
325
+ this.binaryStore.close?.();
326
+ } catch {}
327
+ this.binaryStore = null;
328
+ }
329
+ if (this.sqliteStore) {
330
+ try {
331
+ this.sqliteStore.close?.();
332
+ } catch {}
333
+ this.sqliteStore = null;
334
+ }
335
+ }
336
+
337
+ async close() {
338
+ if (this.binaryStore) {
339
+ await this.binaryStore.close();
340
+ this.binaryStore = null;
341
+ }
342
+ if (this.sqliteStore) {
343
+ try {
344
+ this.sqliteStore.close();
345
+ } catch {}
346
+ this.sqliteStore = null;
347
+ }
348
+ }
349
+
350
+ async ensureLoaded({ preferDisk = false } = {}) {
351
+ if (!this.config.enableCache) return;
352
+ if (!this._clearedAfterIndex) return;
353
+ if (this._loadPromise) return this._loadPromise;
354
+
355
+ this._loadPromise = (async () => {
356
+ if (preferDisk && this.config.verbose) {
357
+ console.info(
358
+ '[Cache] ensureLoaded: forcing disk vector mode for incremental low-RAM reload'
359
+ );
360
+ }
361
+ await this.load({
362
+ forceVectorLoadMode: preferDisk ? 'disk' : undefined,
363
+ });
364
+ this._clearedAfterIndex = false;
365
+ })().finally(() => {
366
+ this._loadPromise = null;
367
+ });
368
+
369
+ return this._loadPromise;
370
+ }
371
+
372
+ async dropInMemoryVectors() {
373
+ if (!this.config.enableCache) return;
374
+
375
+ if (this.activeReads > 0) {
376
+ await this.waitForReaders();
377
+ }
378
+
379
+ this.vectorStore = [];
380
+ this.annVectorCache = null;
381
+ this.annIndex = null;
382
+ this.annMeta = null;
383
+ this.annDirty = true;
384
+ this.annPersistDirty = false;
385
+
386
+ if (this.binaryStore) {
387
+ try {
388
+ await this.binaryStore.close();
389
+ } catch {}
390
+ this.binaryStore = null;
391
+ }
392
+
393
+ if (this.sqliteStore) {
394
+ try {
395
+ this.sqliteStore.close();
396
+ } catch {}
397
+ this.sqliteStore = null;
398
+ }
399
+
400
+ this._clearedAfterIndex = true;
401
+ }
402
+
403
+ startRead() {
404
+ if (this._saveInProgress) {
405
+ throw new Error('Cache save in progress, try again shortly');
406
+ }
407
+ this.activeReads++;
408
+ }
409
+
410
+ endRead() {
411
+ if (this.activeReads > 0) {
412
+ this.activeReads--;
413
+ if (this.activeReads === 0 && this._readWaiters.length > 0) {
414
+ const waiters = this._readWaiters;
415
+ this._readWaiters = [];
416
+ for (const resolve of waiters) {
417
+ resolve();
418
+ }
419
+ }
420
+ }
421
+ }
422
+
423
+ async waitForReaders() {
424
+ if (this.activeReads === 0) return;
425
+ await new Promise((resolve) => {
426
+ this._readWaiters.push(resolve);
427
+ });
428
+ }
429
+
430
+ async waitForReadersWithTimeout(timeoutMs = 5000) {
431
+ if (this.activeReads === 0) return true;
432
+ let timedOut = false;
433
+ let resolved = false;
434
+ let waiterResolve;
435
+ const waiterPromise = new Promise((resolve) => {
436
+ waiterResolve = () => {
437
+ if (!resolved) {
438
+ resolved = true;
439
+ resolve();
440
+ }
441
+ };
442
+ this._readWaiters.push(waiterResolve);
443
+ });
444
+ await Promise.race([
445
+ waiterPromise,
446
+ new Promise((resolve) => {
447
+ setTimeout(() => {
448
+ if (!resolved) {
449
+ resolved = true;
450
+ timedOut = true;
451
+
452
+ const idx = this._readWaiters.indexOf(waiterResolve);
453
+ if (idx >= 0) this._readWaiters.splice(idx, 1);
454
+ resolve();
455
+ }
456
+ }, timeoutMs);
457
+ }),
458
+ ]);
459
+ if (timedOut) {
460
+ console.warn(
461
+ `[Cache] Timed out waiting for ${this.activeReads} active reader(s); proceeding with save anyway. ` +
462
+ 'This may cause data inconsistency if readers access the store during write.'
463
+ );
464
+ }
465
+ return !timedOut;
466
+ }
467
+
468
+ async reset() {
469
+ this.vectorStore = [];
470
+ if (this.binaryStore) {
471
+ try {
472
+ await this.binaryStore.close();
473
+ } catch {}
474
+ this.binaryStore = null;
475
+ }
476
+ if (this.sqliteStore) {
477
+ try {
478
+ this.sqliteStore.close();
479
+ } catch {}
480
+ this.sqliteStore = null;
481
+ }
482
+ this.fileHashes.clear();
483
+ this.invalidateAnnIndex();
484
+ await this.clearCallGraphData({ removeFile: true });
485
+ this.initErrors = [];
486
+ }
487
+
488
+ async load({ forceVectorLoadMode } = {}) {
489
+ if (!this.config.enableCache) return;
490
+ this._corruptionDetected = false;
491
+
492
+ try {
493
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
494
+
495
+ const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
496
+ const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
497
+ const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
498
+
499
+ const workerThresholdBytes =
500
+ Number.isInteger(this.config.jsonWorkerThresholdBytes) &&
501
+ this.config.jsonWorkerThresholdBytes > 0
502
+ ? this.config.jsonWorkerThresholdBytes
503
+ : JSON_WORKER_THRESHOLD_BYTES;
504
+
505
+ const useBinary = this.config.vectorStoreFormat === 'binary';
506
+ const useSqlite = this.config.vectorStoreFormat === 'sqlite';
507
+
508
+ const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(
509
+ this.config.cacheDirectory
510
+ );
511
+ const pathExists = async (targetPath) => {
512
+ try {
513
+ await fs.access(targetPath);
514
+ return true;
515
+ } catch {
516
+ return false;
517
+ }
518
+ };
519
+
520
+ let cacheData = null;
521
+ let hashData = null;
522
+ let prefetched = false;
523
+ if (IS_TEST_ENV) {
524
+ prefetched = true;
525
+ const cachePromise =
526
+ useBinary || useSqlite
527
+ ? Promise.resolve(null)
528
+ : readJsonFile(cacheFile, { workerThresholdBytes });
529
+ [cacheData, hashData] = await Promise.all([
530
+ cachePromise,
531
+ readJsonFile(hashFile, { workerThresholdBytes }),
532
+ ]);
533
+ }
534
+
535
+ const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
536
+ if (!metaData) {
537
+ console.warn('[Cache] Missing cache metadata, ignoring cache');
538
+ this.clearInMemoryState();
539
+ return;
540
+ }
541
+
542
+ let meta;
543
+ try {
544
+ meta = JSON.parse(metaData);
545
+ } catch {
546
+ console.warn('[Cache] Invalid cache metadata, ignoring cache');
547
+ this.clearInMemoryState();
548
+ return;
549
+ }
550
+
551
+ if (meta?.version !== CACHE_META_VERSION) {
552
+ console.warn(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
553
+ this.clearInMemoryState();
554
+ return;
555
+ }
556
+
557
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
558
+ console.warn(
559
+ `[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`
560
+ );
561
+ this.clearInMemoryState();
562
+ return;
563
+ }
564
+ const expectedDimension = this.config.embeddingDimension ?? null;
565
+ const metaDimension = meta?.embeddingDimension ?? null;
566
+ if (metaDimension !== expectedDimension) {
567
+ console.warn(
568
+ `[Cache] Embedding dimension changed, ignoring cache (${metaDimension} -> ${expectedDimension})`
569
+ );
570
+ this.clearInMemoryState();
571
+ return;
572
+ }
573
+
574
+ if (!prefetched) {
575
+ [cacheData, hashData] = await Promise.all([
576
+ useBinary || useSqlite
577
+ ? Promise.resolve(null)
578
+ : readJsonFile(cacheFile, { workerThresholdBytes }),
579
+ readJsonFile(hashFile, { workerThresholdBytes }),
580
+ ]);
581
+ }
582
+
583
+ this.cacheMeta = meta;
584
+
585
+ const [binaryFilesPresent, jsonCachePresent] = await Promise.all([
586
+ (async () => {
587
+ const [vectorsOk, recordsOk, contentOk, filesOk] = await Promise.all([
588
+ pathExists(vectorsPath),
589
+ pathExists(recordsPath),
590
+ pathExists(contentPath),
591
+ pathExists(filesPath),
592
+ ]);
593
+ return vectorsOk && recordsOk && contentOk && filesOk;
594
+ })(),
595
+ pathExists(cacheFile),
596
+ ]);
597
+
598
+ if (useBinary && !binaryFilesPresent) {
599
+ if (jsonCachePresent) {
600
+ console.warn(
601
+ '[Cache] vectorStoreFormat=binary but binary cache files are missing; embeddings.json exists. If you switched formats, reindex or set vectorStoreFormat=json.'
602
+ );
603
+ } else {
604
+ console.warn(
605
+ '[Cache] vectorStoreFormat=binary but binary cache files are missing. Reindex to regenerate the cache.'
606
+ );
607
+ }
608
+ } else if (!useBinary && !useSqlite && !jsonCachePresent) {
609
+ if (binaryFilesPresent) {
610
+ console.warn(
611
+ '[Cache] vectorStoreFormat=json but binary cache files exist. If you switched formats, set vectorStoreFormat=binary or reindex.'
612
+ );
613
+ } else {
614
+ console.warn(
615
+ '[Cache] vectorStoreFormat=json but embeddings.json is missing. Reindex to regenerate the cache.'
616
+ );
617
+ }
618
+ }
619
+
620
+ const configuredVectorLoadMode =
621
+ typeof this.config.vectorStoreLoadMode === 'string'
622
+ ? this.config.vectorStoreLoadMode.toLowerCase()
623
+ : 'memory';
624
+ const effectiveVectorLoadMode =
625
+ forceVectorLoadMode === 'disk' || forceVectorLoadMode === 'memory'
626
+ ? forceVectorLoadMode
627
+ : configuredVectorLoadMode;
628
+
629
+ if (useBinary) {
630
+ try {
631
+ this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
632
+ contentCacheEntries: this.config.contentCacheEntries,
633
+ vectorCacheEntries: this.config.vectorCacheEntries,
634
+ vectorLoadMode: effectiveVectorLoadMode,
635
+ });
636
+ cacheData = await this.binaryStore.toChunkViews({
637
+ includeContent: this.config.vectorStoreContentMode === 'inline',
638
+ includeVector: effectiveVectorLoadMode !== 'disk',
639
+ });
640
+ } catch (err) {
641
+ this.binaryStore = null;
642
+ const isCorruption =
643
+ err instanceof BinaryStoreCorruptionError || err?.name === 'BinaryStoreCorruptionError';
644
+ if (isCorruption) {
645
+ console.warn(`[Cache] Binary store corruption detected: ${err.message}`);
646
+ this._corruptionDetected = true;
647
+ await recordBinaryStoreCorruption(this.config.cacheDirectory, {
648
+ message: err.message,
649
+ context: 'cache.load binary store',
650
+ action: 'detected',
651
+ });
652
+ } else {
653
+ console.warn(`[Cache] Failed to load binary vector store: ${err.message}`);
654
+ }
655
+ }
656
+ }
657
+
658
+ if (useSqlite) {
659
+ try {
660
+ this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
661
+ if (this.sqliteStore) {
662
+ cacheData = this.sqliteStore.toChunkViews({
663
+ includeContent: this.config.vectorStoreContentMode === 'inline',
664
+ includeVector: effectiveVectorLoadMode !== 'disk',
665
+ });
666
+ } else {
667
+ console.warn(
668
+ '[Cache] vectorStoreFormat=sqlite but vectors.sqlite is missing. Reindex to regenerate the cache.'
669
+ );
670
+ }
671
+ } catch (err) {
672
+ this.sqliteStore = null;
673
+ console.warn(`[Cache] Failed to load SQLite vector store: ${err.message}`);
674
+ }
675
+ }
676
+
677
+ if (!cacheData) {
678
+ cacheData = await readJsonFile(cacheFile, { workerThresholdBytes });
679
+ }
680
+
681
+ const hasCacheData = Array.isArray(cacheData);
682
+ const hasHashData = hashData && typeof hashData === 'object';
683
+
684
+ if (hasCacheData) {
685
+ const allowedExtensions = new Set(
686
+ (this.config.fileExtensions || []).map((ext) => `.${ext}`)
687
+ );
688
+ const allowedFileNames = new Set(this.config.fileNames || []);
689
+ const applyExtensionFilter = !this.binaryStore;
690
+ const shouldKeepFile = (filePath) => {
691
+ const ext = path.extname(filePath);
692
+ if (allowedExtensions.has(ext)) return true;
693
+ return allowedFileNames.has(path.basename(filePath));
694
+ };
695
+
696
+ const rawHashes = hasHashData ? new Map(Object.entries(hashData)) : new Map();
697
+ this.vectorStore = [];
698
+ this.fileHashes.clear();
699
+
700
+ for (const chunk of cacheData) {
701
+ if (applyExtensionFilter) {
702
+ if (!shouldKeepFile(chunk.file)) continue;
703
+ }
704
+ normalizeChunkVector(chunk);
705
+ this.vectorStore.push(chunk);
706
+ }
707
+ const filteredCount = cacheData.length - this.vectorStore.length;
708
+ if (filteredCount > 0 && this.config.verbose) {
709
+ console.info(`[Cache] Filtered ${filteredCount} outdated cache entries`);
710
+ }
711
+
712
+ if (hasHashData) {
713
+ for (const [file, entry] of rawHashes) {
714
+ if (!applyExtensionFilter || shouldKeepFile(file)) {
715
+ const normalized = normalizeFileHashEntry(entry);
716
+ if (normalized) {
717
+ this.fileHashes.set(file, normalized);
718
+ }
719
+ }
720
+ }
721
+ } else {
722
+ console.warn(
723
+ '[Cache] Missing file-hashes.json; loaded embeddings but hashes were cleared'
724
+ );
725
+ }
726
+
727
+ assignChunkIndices(this.vectorStore);
728
+
729
+ if (this.config.verbose) {
730
+ console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
731
+ }
732
+
733
+ this.annDirty = false;
734
+ this.annPersistDirty = false;
735
+ this.annIndex = null;
736
+ this.annMeta = null;
737
+ this.annVectorCache = null;
738
+ } else if (cacheData) {
739
+ console.warn('[Cache] Cache data is not an array; ignoring cached embeddings');
740
+ } else if (hasHashData) {
741
+ console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
742
+ }
743
+
744
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
745
+ try {
746
+ const callGraphData = await fs.readFile(callGraphFile, 'utf8');
747
+ const parsed = JSON.parse(callGraphData);
748
+ this.fileCallData = new Map(Object.entries(parsed));
749
+ if (this.config.verbose) {
750
+ console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
751
+ }
752
+ } catch {}
753
+ } catch (error) {
754
+ console.warn('[Cache] Failed to load cache:', error.message);
755
+ this.clearInMemoryState();
756
+ }
757
+ }
758
+
776
759
  save({ throwOnError = false } = {}) {
777
760
  if (!this.config.enableCache) return Promise.resolve();
778
761
 
@@ -782,11 +765,11 @@ export class EmbeddingsCache {
782
765
  }
783
766
 
784
767
  if (this._saveTimer) return this._savePromise ?? Promise.resolve();
785
-
786
- const debounceMs = Number.isInteger(this.config.saveDebounceMs)
787
- ? this.config.saveDebounceMs
788
- : 250;
789
-
768
+
769
+ const debounceMs = Number.isInteger(this.config.saveDebounceMs)
770
+ ? this.config.saveDebounceMs
771
+ : 250;
772
+
790
773
  this._savePromise = new Promise((resolve, reject) => {
791
774
  this._saveTimer = setTimeout(() => {
792
775
  this._saveTimer = null;
@@ -794,9 +777,7 @@ export class EmbeddingsCache {
794
777
  this._saveThrowOnError = false;
795
778
 
796
779
  this.saveQueue = this.saveQueue
797
- .catch(() => {
798
-
799
- })
780
+ .catch(() => {})
800
781
  .then(async () => {
801
782
  while (this._saveRequested) {
802
783
  this._saveRequested = false;
@@ -806,15 +787,14 @@ export class EmbeddingsCache {
806
787
  .then(resolve, reject)
807
788
  .finally(() => {
808
789
  this._savePromise = null;
809
- });
810
- }, debounceMs);
811
- });
812
-
813
- return this._savePromise;
814
- }
815
-
790
+ });
791
+ }, debounceMs);
792
+ });
793
+
794
+ return this._savePromise;
795
+ }
796
+
816
797
  async performSave({ throwOnError = false } = {}) {
817
-
818
798
  this._saveInProgress = true;
819
799
  if (
820
800
  this.config.allowSystemWorkspaceCache !== true &&
@@ -829,232 +809,219 @@ export class EmbeddingsCache {
829
809
  return;
830
810
  }
831
811
 
832
-
833
812
  if (this.activeReads > 0) {
834
- const timeoutMs = this.config.saveReaderWaitTimeoutMs ?? DEFAULT_READER_WAIT_TIMEOUT_MS;
835
- const allReadersFinished = await this.waitForReadersWithTimeout(timeoutMs);
836
- if (!allReadersFinished && !this.config.forceSaveWithActiveReaders) {
837
- console.warn('[Cache] Aborting save - active readers still present after timeout');
838
- this._saveInProgress = false;
839
- return;
840
- }
841
- }
842
-
843
- this.isSaving = true;
844
-
845
- try {
846
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
847
-
848
- const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
849
- const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
850
- const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
851
-
852
-
853
-
854
- const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
855
- const supportsBackendVectorResolve =
856
- this.config.vectorStoreFormat === 'binary' || this.config.vectorStoreFormat === 'sqlite';
857
- const hasMissingVectors = snapshotStore.some(
858
- (chunk) => chunk && (chunk.vector === undefined || chunk.vector === null)
859
- );
860
- const useDiskVectors =
861
- supportsBackendVectorResolve &&
862
- (this.config.vectorStoreLoadMode === 'disk' || hasMissingVectors);
863
- if (hasMissingVectors && !useDiskVectors) {
864
- throw new Error(
865
- 'Missing vector data for cache write and backend vector resolution is unavailable'
866
- );
867
- }
868
-
869
- this.cacheMeta = {
870
- version: CACHE_META_VERSION,
871
- embeddingModel: this.config.embeddingModel,
872
- embeddingDimension: this.config.embeddingDimension ?? null,
873
- lastSaveTime: new Date().toISOString(),
874
- filesIndexed: this.fileHashes.size,
875
- chunksStored: snapshotStore.length,
876
- workspace: this.config.searchDirectory || null,
877
- };
878
- if (Number.isFinite(this.lastIndexDurationMs) && this.lastIndexDurationMs >= 0) {
879
- this.cacheMeta.indexDurationMs = Math.round(this.lastIndexDurationMs);
880
- }
881
- if (this.lastIndexStats && typeof this.lastIndexStats === 'object') {
882
- Object.assign(this.cacheMeta, this.lastIndexStats);
883
- }
884
-
885
- const total = snapshotStore.length;
886
- if (this.config.vectorStoreFormat === 'binary') {
887
- this.binaryStore = await BinaryVectorStore.write(
888
- this.config.cacheDirectory,
889
- snapshotStore,
890
- {
891
- contentCacheEntries: this.config.contentCacheEntries,
892
- vectorCacheEntries: this.config.vectorCacheEntries,
893
- vectorLoadMode: useDiskVectors ? 'disk' : this.config.vectorStoreLoadMode,
894
- getContent: (chunk, index) => this.getChunkContent(chunk, index),
895
- getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
896
- preRename: async () => {
897
- if (this.activeReads > 0) {
898
- await this.waitForReadersWithTimeout(
899
- Number.isInteger(this.config.saveReaderWaitTimeoutMs)
900
- ? this.config.saveReaderWaitTimeoutMs
901
- : 5000
902
- );
903
- }
904
- if (this.binaryStore) {
905
- await this.binaryStore.close();
906
- this.binaryStore = null;
907
- }
908
- },
909
- }
910
- );
911
- if (this.binaryStore) {
912
- this.cacheMeta.chunksStored = this.binaryStore.length;
913
- }
914
- } else if (this.config.vectorStoreFormat === 'sqlite') {
915
-
916
- if (this.sqliteStore) {
917
- try {
918
- this.sqliteStore.close();
919
- } catch {
920
-
921
- }
922
- this.sqliteStore = null;
923
- }
924
- this.sqliteStore = await SqliteVectorStore.write(
925
- this.config.cacheDirectory,
926
- snapshotStore,
927
- {
928
- getContent: (chunk, index) => this.getChunkContent(chunk, index),
929
- getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
930
- preRename: async () => {
931
- if (this.activeReads > 0) {
932
- await this.waitForReadersWithTimeout(
933
- Number.isInteger(this.config.saveReaderWaitTimeoutMs)
934
- ? this.config.saveReaderWaitTimeoutMs
935
- : 5000
936
- );
937
- }
938
- },
939
- }
940
- );
941
- if (this.sqliteStore) {
942
- this.cacheMeta.chunksStored = this.sqliteStore.length();
943
- }
944
- } else {
945
- const vectorWriter = new StreamingJsonWriter(cacheFile, {
946
- highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
947
- floatDigits: this.config.cacheVectorFloatDigits ?? 6,
948
- flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
949
- indent: '',
950
- assumeFinite: this.config.cacheVectorAssumeFinite,
951
- checkFinite: this.config.cacheVectorCheckFinite,
952
- noMutation: this.config.cacheVectorNoMutation ?? false,
953
- joinThreshold: this.config.cacheVectorJoinThreshold ?? 8192,
954
- joinChunkSize: this.config.cacheVectorJoinChunkSize ?? 2048,
955
- });
956
-
957
- await vectorWriter.writeStart();
958
-
959
-
960
- const yieldEvery = total >= 50_000 ? 5000 : 0;
961
-
962
- try {
963
- for (let i = 0; i < total; i++) {
964
- const pending = vectorWriter.writeItem(snapshotStore[i]);
965
- if (pending) await pending;
966
- if (yieldEvery && i > 0 && i % yieldEvery === 0) await yieldToLoop();
967
- }
968
- await vectorWriter.writeEnd();
969
- } catch (e) {
970
- vectorWriter.abort(e);
971
- throw e;
972
- }
973
- }
974
-
975
- const hashEntries = {};
976
- for (const [file, entry] of this.fileHashes) {
977
- const serialized = serializeFileHashEntry(entry);
978
- if (serialized) {
979
- hashEntries[file] = serialized;
980
- }
981
- }
982
-
983
- await Promise.all([
984
- fs.writeFile(hashFile, JSON.stringify(hashEntries, null, 2)),
985
- fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
986
- ]);
987
-
988
-
989
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
990
- if (this.fileCallData.size > 0) {
991
- await fs.writeFile(
992
- callGraphFile,
993
- JSON.stringify(Object.fromEntries(this.fileCallData), null, 2)
994
- );
995
- } else {
996
- await fs.rm(callGraphFile, { force: true });
997
- }
998
-
999
-
1000
-
813
+ const timeoutMs = this.config.saveReaderWaitTimeoutMs ?? DEFAULT_READER_WAIT_TIMEOUT_MS;
814
+ const allReadersFinished = await this.waitForReadersWithTimeout(timeoutMs);
815
+ if (!allReadersFinished && !this.config.forceSaveWithActiveReaders) {
816
+ console.warn('[Cache] Aborting save - active readers still present after timeout');
817
+ this._saveInProgress = false;
818
+ return;
819
+ }
820
+ }
821
+
822
+ this.isSaving = true;
823
+
824
+ try {
825
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
826
+
827
+ const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
828
+ const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
829
+ const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
830
+
831
+ const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
832
+ const supportsBackendVectorResolve =
833
+ this.config.vectorStoreFormat === 'binary' || this.config.vectorStoreFormat === 'sqlite';
834
+ const hasMissingVectors = snapshotStore.some(
835
+ (chunk) => chunk && (chunk.vector === undefined || chunk.vector === null)
836
+ );
837
+ const useDiskVectors =
838
+ supportsBackendVectorResolve &&
839
+ (this.config.vectorStoreLoadMode === 'disk' || hasMissingVectors);
840
+ if (hasMissingVectors && !useDiskVectors) {
841
+ throw new Error(
842
+ 'Missing vector data for cache write and backend vector resolution is unavailable'
843
+ );
844
+ }
845
+
846
+ this.cacheMeta = {
847
+ version: CACHE_META_VERSION,
848
+ embeddingModel: this.config.embeddingModel,
849
+ embeddingDimension: this.config.embeddingDimension ?? null,
850
+ lastSaveTime: new Date().toISOString(),
851
+ filesIndexed: this.fileHashes.size,
852
+ chunksStored: snapshotStore.length,
853
+ workspace: this.config.searchDirectory || null,
854
+ };
855
+ if (Number.isFinite(this.lastIndexDurationMs) && this.lastIndexDurationMs >= 0) {
856
+ this.cacheMeta.indexDurationMs = Math.round(this.lastIndexDurationMs);
857
+ }
858
+ if (this.lastIndexStats && typeof this.lastIndexStats === 'object') {
859
+ Object.assign(this.cacheMeta, this.lastIndexStats);
860
+ }
861
+
862
+ const total = snapshotStore.length;
863
+ if (this.config.vectorStoreFormat === 'binary') {
864
+ this.binaryStore = await BinaryVectorStore.write(
865
+ this.config.cacheDirectory,
866
+ snapshotStore,
867
+ {
868
+ contentCacheEntries: this.config.contentCacheEntries,
869
+ vectorCacheEntries: this.config.vectorCacheEntries,
870
+ vectorLoadMode: useDiskVectors ? 'disk' : this.config.vectorStoreLoadMode,
871
+ getContent: (chunk, index) => this.getChunkContent(chunk, index),
872
+ getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
873
+ preRename: async () => {
874
+ if (this.activeReads > 0) {
875
+ await this.waitForReadersWithTimeout(
876
+ Number.isInteger(this.config.saveReaderWaitTimeoutMs)
877
+ ? this.config.saveReaderWaitTimeoutMs
878
+ : 5000
879
+ );
880
+ }
881
+ if (this.binaryStore) {
882
+ await this.binaryStore.close();
883
+ this.binaryStore = null;
884
+ }
885
+ },
886
+ }
887
+ );
888
+ if (this.binaryStore) {
889
+ this.cacheMeta.chunksStored = this.binaryStore.length;
890
+ }
891
+ } else if (this.config.vectorStoreFormat === 'sqlite') {
892
+ if (this.sqliteStore) {
893
+ try {
894
+ this.sqliteStore.close();
895
+ } catch {}
896
+ this.sqliteStore = null;
897
+ }
898
+ this.sqliteStore = await SqliteVectorStore.write(
899
+ this.config.cacheDirectory,
900
+ snapshotStore,
901
+ {
902
+ getContent: (chunk, index) => this.getChunkContent(chunk, index),
903
+ getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
904
+ preRename: async () => {
905
+ if (this.activeReads > 0) {
906
+ await this.waitForReadersWithTimeout(
907
+ Number.isInteger(this.config.saveReaderWaitTimeoutMs)
908
+ ? this.config.saveReaderWaitTimeoutMs
909
+ : 5000
910
+ );
911
+ }
912
+ },
913
+ }
914
+ );
915
+ if (this.sqliteStore) {
916
+ this.cacheMeta.chunksStored = this.sqliteStore.length();
917
+ }
918
+ } else {
919
+ const vectorWriter = new StreamingJsonWriter(cacheFile, {
920
+ highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
921
+ floatDigits: this.config.cacheVectorFloatDigits ?? 6,
922
+ flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
923
+ indent: '',
924
+ assumeFinite: this.config.cacheVectorAssumeFinite,
925
+ checkFinite: this.config.cacheVectorCheckFinite,
926
+ noMutation: this.config.cacheVectorNoMutation ?? false,
927
+ joinThreshold: this.config.cacheVectorJoinThreshold ?? 8192,
928
+ joinChunkSize: this.config.cacheVectorJoinChunkSize ?? 2048,
929
+ });
930
+
931
+ await vectorWriter.writeStart();
932
+
933
+ const yieldEvery = total >= 50_000 ? 5000 : 0;
934
+
935
+ try {
936
+ for (let i = 0; i < total; i++) {
937
+ const pending = vectorWriter.writeItem(snapshotStore[i]);
938
+ if (pending) await pending;
939
+ if (yieldEvery && i > 0 && i % yieldEvery === 0) await yieldToLoop();
940
+ }
941
+ await vectorWriter.writeEnd();
942
+ } catch (e) {
943
+ vectorWriter.abort(e);
944
+ throw e;
945
+ }
946
+ }
947
+
948
+ const hashEntries = {};
949
+ for (const [file, entry] of this.fileHashes) {
950
+ const serialized = serializeFileHashEntry(entry);
951
+ if (serialized) {
952
+ hashEntries[file] = serialized;
953
+ }
954
+ }
955
+
956
+ await Promise.all([
957
+ fs.writeFile(hashFile, JSON.stringify(hashEntries, null, 2)),
958
+ fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
959
+ ]);
960
+
961
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
962
+ if (this.fileCallData.size > 0) {
963
+ await fs.writeFile(
964
+ callGraphFile,
965
+ JSON.stringify(Object.fromEntries(this.fileCallData), null, 2)
966
+ );
967
+ } else {
968
+ await fs.rm(callGraphFile, { force: true });
969
+ }
970
+
1001
971
  if (
1002
972
  this.config.annIndexCache !== false &&
1003
973
  this.annPersistDirty &&
1004
- !this.annDirty &&
1005
- !this._annWriting &&
1006
- this.annIndex &&
1007
- this.annMeta
1008
- ) {
1009
- this._annWriting = true;
1010
- try {
1011
- const { indexFile, metaFile: annMetaFile } = this.getAnnIndexPaths();
1012
- this.annIndex.writeIndexSync(indexFile);
1013
- await fs.writeFile(annMetaFile, JSON.stringify(this.annMeta, null, 2));
1014
- this.annPersistDirty = false;
1015
- if (this.config.verbose) {
1016
- console.info(`[ANN] Persisted updated ANN index (${this.annMeta.count} vectors)`);
1017
- }
1018
- } catch (error) {
1019
- console.warn(`[ANN] Failed to persist ANN index: ${error.message}`);
1020
- } finally {
1021
- this._annWriting = false;
974
+ !this.annDirty &&
975
+ !this._annWriting &&
976
+ this.annIndex &&
977
+ this.annMeta
978
+ ) {
979
+ this._annWriting = true;
980
+ try {
981
+ const { indexFile, metaFile: annMetaFile } = this.getAnnIndexPaths();
982
+ this.annIndex.writeIndexSync(indexFile);
983
+ await fs.writeFile(annMetaFile, JSON.stringify(this.annMeta, null, 2));
984
+ this.annPersistDirty = false;
985
+ if (this.config.verbose) {
986
+ console.info(`[ANN] Persisted updated ANN index (${this.annMeta.count} vectors)`);
987
+ }
988
+ } catch (error) {
989
+ console.warn(`[ANN] Failed to persist ANN index: ${error.message}`);
990
+ } finally {
991
+ this._annWriting = false;
1022
992
  }
1023
993
  }
1024
994
  this.lastSaveError = null;
1025
995
  } catch (error) {
1026
996
  this.lastSaveError = error instanceof Error ? error : new Error(String(error));
1027
997
  console.warn('[Cache] Failed to save cache:', this.lastSaveError.message);
1028
-
1029
- if (
1030
- this.config.vectorStoreFormat === 'binary' &&
1031
- this.binaryStore &&
1032
- !this.binaryStore.vectorsBuffer
1033
- ) {
1034
- try {
1035
- console.info('[Cache] Attempting to recover binary store after failed save...');
1036
- this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
1037
- contentCacheEntries: this.config.contentCacheEntries,
1038
- });
1039
- console.info('[Cache] Binary store recovered.');
1040
- } catch (recoverErr) {
1041
- console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
1042
- this.binaryStore = null;
1043
- }
1044
- }
1045
-
998
+
1046
999
  if (
1047
- this.config.vectorStoreFormat === 'sqlite' &&
1048
- !this.sqliteStore
1049
- ) {
1050
- try {
1051
- console.info('[Cache] Attempting to recover SQLite store after failed save...');
1052
- this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
1053
- if (this.sqliteStore) {
1054
- console.info('[Cache] SQLite store recovered.');
1055
- }
1056
- } catch (recoverErr) {
1057
- console.warn(`[Cache] Failed to recover SQLite store: ${recoverErr.message}`);
1000
+ this.config.vectorStoreFormat === 'binary' &&
1001
+ this.binaryStore &&
1002
+ !this.binaryStore.vectorsBuffer
1003
+ ) {
1004
+ try {
1005
+ console.info('[Cache] Attempting to recover binary store after failed save...');
1006
+ this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
1007
+ contentCacheEntries: this.config.contentCacheEntries,
1008
+ });
1009
+ console.info('[Cache] Binary store recovered.');
1010
+ } catch (recoverErr) {
1011
+ console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
1012
+ this.binaryStore = null;
1013
+ }
1014
+ }
1015
+
1016
+ if (this.config.vectorStoreFormat === 'sqlite' && !this.sqliteStore) {
1017
+ try {
1018
+ console.info('[Cache] Attempting to recover SQLite store after failed save...');
1019
+ this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
1020
+ if (this.sqliteStore) {
1021
+ console.info('[Cache] SQLite store recovered.');
1022
+ }
1023
+ } catch (recoverErr) {
1024
+ console.warn(`[Cache] Failed to recover SQLite store: ${recoverErr.message}`);
1058
1025
  this.sqliteStore = null;
1059
1026
  }
1060
1027
  }
@@ -1065,778 +1032,743 @@ export class EmbeddingsCache {
1065
1032
  }
1066
1033
  } finally {
1067
1034
  this.isSaving = false;
1068
- this._saveInProgress = false;
1069
- }
1070
- }
1071
-
1072
-
1073
-
1074
- getVectorStore() {
1075
- return Array.isArray(this.vectorStore) ? this.vectorStore : [];
1076
- }
1077
-
1078
- async setVectorStore(store) {
1079
- const previousBinaryStore = this.binaryStore;
1080
- const previousSqliteStore = this.sqliteStore;
1081
- this.vectorStore = store;
1082
- this.binaryStore = null;
1083
- this.sqliteStore = null;
1084
- if (Array.isArray(this.vectorStore)) {
1085
- for (const chunk of this.vectorStore) normalizeChunkVector(chunk);
1086
- assignChunkIndices(this.vectorStore);
1087
- }
1088
- this.invalidateAnnIndex();
1089
- if (previousBinaryStore) {
1090
- try {
1091
- await previousBinaryStore.close();
1092
- } catch {
1093
-
1094
- }
1095
- }
1096
- if (previousSqliteStore) {
1097
- try {
1098
- previousSqliteStore.close();
1099
- } catch {
1100
-
1101
- }
1102
- }
1103
- }
1104
-
1105
- setLastIndexDuration(durationMs) {
1106
- if (Number.isFinite(durationMs) && durationMs >= 0) {
1107
- this.lastIndexDurationMs = durationMs;
1108
- }
1109
- }
1110
-
1111
- setLastIndexStats(stats) {
1112
- if (stats && typeof stats === 'object') {
1113
- this.lastIndexStats = { ...stats };
1114
- }
1115
- }
1116
-
1117
- getFileHash(file) {
1118
- const entry = this.fileHashes.get(file);
1119
- if (typeof entry === 'string') return entry;
1120
- return entry?.hash;
1121
- }
1122
-
1123
- getFileHashKeys() {
1124
- return Array.from(this.fileHashes.keys());
1125
- }
1126
-
1127
- getFileHashCount() {
1128
- return this.fileHashes.size;
1129
- }
1130
-
1131
- clearFileHashes() {
1132
- this.fileHashes.clear();
1133
- }
1134
-
1135
- setFileHashes(entries) {
1136
- this.fileHashes.clear();
1137
- if (!entries || typeof entries !== 'object') return;
1138
- const iterator =
1139
- entries instanceof Map
1140
- ? entries.entries()
1141
- : Object.entries(entries);
1142
- if (!iterator) return;
1143
- for (const [file, entry] of iterator) {
1144
- const normalized = normalizeFileHashEntry(entry);
1145
- if (normalized) {
1146
- this.fileHashes.set(file, normalized);
1147
- }
1148
- }
1149
- }
1150
-
1151
- setFileHash(file, hash, meta = null) {
1152
- const entry = { hash };
1153
- if (meta && typeof meta === 'object') {
1154
- if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
1155
- if (Number.isFinite(meta.size)) entry.size = meta.size;
1156
- }
1157
- this.fileHashes.set(file, entry);
1158
- }
1159
-
1160
- getFileMeta(file) {
1161
- const entry = this.fileHashes.get(file);
1162
- if (!entry) return null;
1163
- if (typeof entry === 'string') return { hash: entry };
1164
- return entry;
1165
- }
1166
-
1167
- getChunkVector(chunk, index = null) {
1168
- if (typeof chunk === 'number') {
1169
- const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
1170
- const entry = store ? store[chunk] : null;
1171
- if (entry?.vector) return entry.vector;
1172
- if (this.binaryStore) {
1173
- const resolved = Number.isInteger(entry?._binaryIndex) ? entry._binaryIndex : chunk;
1174
- return this.binaryStore.getVector(resolved);
1175
- }
1176
- if (this.sqliteStore) {
1177
- const resolved = Number.isInteger(entry?._sqliteIndex) ? entry._sqliteIndex : chunk;
1178
- return this.sqliteStore.getVector(resolved);
1179
- }
1180
- return null;
1181
- }
1182
-
1183
- if (chunk?.vector) return chunk.vector;
1184
- const resolved = Number.isInteger(index) ? index : chunk?._index;
1185
- if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
1186
- return this.binaryStore.getVector(chunk._binaryIndex);
1187
- }
1188
- if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
1189
- return this.binaryStore.getVector(resolved);
1190
- }
1191
- if (this.sqliteStore) {
1192
- const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
1193
- ? chunk._sqliteIndex
1194
- : Number.isInteger(chunk?.index)
1195
- ? chunk.index
1196
- : resolved;
1197
- if (Number.isInteger(sqliteIndex)) {
1198
- return this.sqliteStore.getVector(sqliteIndex);
1199
- }
1200
- }
1201
- return null;
1202
- }
1203
-
1204
- async getChunkContent(chunk, index = null) {
1205
- if (typeof chunk === 'number') {
1206
- const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
1207
- const entry = store ? store[chunk] : null;
1208
- if (entry) return await this.getChunkContent(entry, chunk);
1209
- if (!store && this.binaryStore) {
1210
- const content = await this.binaryStore.getContent(chunk);
1211
- return content ?? '';
1212
- }
1213
- if (!store && this.sqliteStore) {
1214
- return this.sqliteStore.getContent(chunk) ?? '';
1215
- }
1216
- return '';
1217
- }
1218
- if (chunk?.content !== undefined && chunk?.content !== null) {
1219
- return chunk.content;
1220
- }
1221
- if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
1222
- const content = await this.binaryStore.getContent(chunk._binaryIndex);
1223
- return content ?? '';
1224
- }
1225
- const resolved = Number.isInteger(index) ? index : chunk?._index;
1226
- if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
1227
- const content = await this.binaryStore.getContent(resolved);
1228
- return content ?? '';
1229
- }
1230
- if (this.sqliteStore) {
1231
- const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
1232
- ? chunk._sqliteIndex
1233
- : Number.isInteger(chunk?.index)
1234
- ? chunk.index
1235
- : resolved;
1236
- if (Number.isInteger(sqliteIndex)) {
1237
- return this.sqliteStore.getContent(sqliteIndex) ?? '';
1238
- }
1239
- }
1240
- return '';
1241
- }
1242
-
1243
- deleteFileHash(file) {
1244
- this.fileHashes.delete(file);
1245
- }
1246
-
1247
-
1248
- async removeFileFromStore(file) {
1249
- if (!Array.isArray(this.vectorStore)) return;
1250
-
1251
- let w = 0;
1252
- for (let r = 0; r < this.vectorStore.length; r++) {
1253
- const chunk = this.vectorStore[r];
1254
- if (chunk.file !== file) {
1255
- chunk._index = w;
1256
- this.vectorStore[w++] = chunk;
1257
- }
1258
- }
1259
- this.vectorStore.length = w;
1260
-
1261
-
1262
- this.invalidateAnnIndex();
1263
- this.removeFileCallData(file);
1264
-
1265
- this.fileHashes.delete(file);
1266
- }
1267
-
1268
- addToStore(chunk) {
1269
- normalizeChunkVector(chunk);
1270
-
1271
- if (!Array.isArray(this.vectorStore)) {
1272
- this.vectorStore = [];
1273
- }
1274
-
1275
- const label = this.vectorStore.length;
1276
- chunk._index = label;
1277
- this.vectorStore.push(chunk);
1278
- if (Array.isArray(this.annVectorCache) && this.annVectorCache.length === label) {
1279
- this.annVectorCache.push(chunk.vector);
1280
- }
1281
-
1282
-
1283
- if (
1284
- this.annIndex &&
1285
- !this.annDirty &&
1286
- this.annMeta &&
1287
- typeof this.annIndex.addPoint === 'function' &&
1288
- this.annMeta.count === label &&
1289
- this.annMeta.maxElements > this.annMeta.count
1290
- ) {
1291
- try {
1292
- this.annIndex.addPoint(chunk.vector, label);
1293
- this.annMeta.count += 1;
1294
- this.annPersistDirty = true;
1295
- return;
1296
- } catch {
1297
-
1298
- }
1299
- }
1300
-
1301
- this.invalidateAnnIndex();
1302
- }
1303
-
1304
- invalidateAnnIndex() {
1305
- this.annIndex = null;
1306
- this.annMeta = null;
1307
- this.annDirty = true;
1308
- this.annPersistDirty = false;
1309
- this.annVectorCache = null;
1310
- }
1311
-
1312
- getAnnVector(index) {
1313
- if (!Array.isArray(this.vectorStore)) return null;
1314
- const chunk = this.vectorStore[index];
1315
- if (!chunk) return null;
1316
-
1317
- if (
1318
- !Array.isArray(this.annVectorCache) ||
1319
- this.annVectorCache.length !== this.vectorStore.length
1320
- ) {
1321
- this.annVectorCache = new Array(this.vectorStore.length);
1322
- }
1323
-
1324
- const cached = this.annVectorCache[index];
1325
- if (cached) return cached;
1326
-
1327
- let vec = null;
1328
- if (chunk.vector) {
1329
- vec = ensureFloat32(chunk.vector);
1330
- } else if (this.binaryStore && Number.isInteger(chunk._binaryIndex)) {
1331
- vec = this.binaryStore.getVector(chunk._binaryIndex);
1332
- } else if (this.sqliteStore) {
1333
- const sqliteIndex = Number.isInteger(chunk._sqliteIndex)
1334
- ? chunk._sqliteIndex
1335
- : Number.isInteger(chunk.index)
1336
- ? chunk.index
1337
- : index;
1338
- if (Number.isInteger(sqliteIndex)) {
1339
- vec = this.sqliteStore.getVector(sqliteIndex);
1340
- }
1341
- }
1342
-
1343
- if (!vec) return null;
1344
-
1345
- if (this.config.vectorStoreLoadMode !== 'disk') {
1346
- chunk.vector = vec;
1347
- }
1348
- this.annVectorCache[index] = vec;
1349
- return vec;
1350
- }
1351
-
1352
- getAnnIndexPaths() {
1353
- return {
1354
- indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
1355
- metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE),
1356
- };
1357
- }
1358
-
1359
-
1360
-
1361
-
1362
- async ensureAnnIndex() {
1363
- if (!this.config.annEnabled) return null;
1364
- if (!Array.isArray(this.vectorStore)) return null;
1365
- if (this.vectorStore.length < (this.config.annMinChunks ?? 5000)) return null;
1366
- if (this.annIndex && !this.annDirty) return this.annIndex;
1367
- if (this.annLoading) return this.annLoading;
1368
-
1369
- this.annLoading = (async () => {
1370
- try {
1371
- const HierarchicalNSW = await loadHnswlib();
1372
- if (!HierarchicalNSW) {
1373
- if (hnswlibLoadError) {
1374
- this.addInitError('loadHnswlib', hnswlibLoadError);
1375
- }
1376
- return null;
1377
- }
1378
-
1379
- const dim =
1380
- this.vectorStore[0]?.vector?.length ||
1381
- this.binaryStore?.dim ||
1382
- this.sqliteStore?.dim;
1383
- if (!dim) return null;
1384
-
1385
-
1386
-
1387
- let dimensionMismatch = false;
1388
- const sampleSize = Math.min(ANN_DIMENSION_SAMPLE_SIZE, this.vectorStore.length);
1389
- const step = Math.max(1, Math.floor(this.vectorStore.length / sampleSize));
1390
- for (let i = step; i < this.vectorStore.length; i += step) {
1391
- const v = this.vectorStore[i]?.vector;
1392
- if (v && v.length !== dim) {
1393
- dimensionMismatch = true;
1394
- console.warn(
1395
- `[ANN] Dimension mismatch at index ${i}: expected ${dim}, got ${v.length}. ` +
1396
- 'This may indicate a config change mid-index. Consider full reindex.'
1397
- );
1398
- break;
1399
- }
1400
- }
1401
-
1402
- if (dimensionMismatch) {
1403
- this.addInitError('ensureAnnIndex', `Vector dimension inconsistency detected. Expected ${dim}. Full reindex required.`);
1404
- return null;
1405
- }
1406
-
1407
- if (!this.annDirty && this.config.annIndexCache !== false) {
1408
- const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
1409
- if (loaded) return this.annIndex;
1410
- }
1411
-
1412
- return await this.buildAnnIndex(HierarchicalNSW, dim);
1413
- } finally {
1414
- this.annLoading = null;
1415
- }
1416
- })();
1417
-
1418
- return this.annLoading;
1419
- }
1420
-
1421
- async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
1422
- const { indexFile, metaFile } = this.getAnnIndexPaths();
1423
- const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
1424
- if (!metaData) return false;
1425
-
1426
- let meta;
1427
- try {
1428
- meta = JSON.parse(metaData);
1429
- } catch {
1430
- console.warn('[ANN] Invalid ANN metadata, rebuilding');
1431
- return false;
1432
- }
1433
-
1434
- if (meta?.version !== ANN_META_VERSION) {
1435
- console.warn(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
1436
- return false;
1437
- }
1438
-
1439
- if (meta?.embeddingModel !== this.config.embeddingModel) {
1440
- console.warn('[ANN] Embedding model changed for ANN index, rebuilding');
1441
- return false;
1442
- }
1443
-
1444
- if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
1445
- console.warn('[ANN] ANN index size mismatch, rebuilding');
1446
- return false;
1447
- }
1448
-
1449
- if (
1450
- meta?.metric !== this.config.annMetric ||
1451
- meta?.m !== this.config.annM ||
1452
- meta?.efConstruction !== this.config.annEfConstruction
1453
- ) {
1454
- console.warn('[ANN] ANN index config changed, rebuilding');
1455
- return false;
1456
- }
1457
-
1458
- let maxElements = meta?.maxElements;
1459
- if (!Number.isInteger(maxElements)) {
1460
- maxElements = meta.count;
1461
- } else if (maxElements < meta.count) {
1462
- console.warn('[ANN] ANN capacity invalid, rebuilding');
1463
- return false;
1464
- }
1465
-
1466
- const index = new HierarchicalNSW(meta.metric, dim);
1467
- const loaded = readHnswIndex(index, indexFile, maxElements);
1468
- if (!loaded) {
1469
- console.warn('[ANN] Failed to load ANN index file, rebuilding');
1470
- return false;
1471
- }
1472
-
1473
- if (typeof index.setEf === 'function') {
1474
- index.setEf(this.config.annEfSearch);
1475
- }
1476
-
1477
- this.annIndex = index;
1478
- this.annMeta = { ...meta, maxElements };
1479
- this.annDirty = false;
1480
- this.annPersistDirty = false;
1481
-
1482
- if (this.config.verbose) {
1483
- console.info(`[ANN] Loaded ANN index (${meta.count} vectors, cap=${maxElements})`);
1484
- }
1485
- return true;
1486
- }
1487
-
1488
- async buildAnnIndex(HierarchicalNSW, dim) {
1489
- if (!Array.isArray(this.vectorStore)) return null;
1490
- const total = this.vectorStore.length;
1491
- if (total === 0) return null;
1492
-
1493
- try {
1494
- const index = new HierarchicalNSW(this.config.annMetric, dim);
1495
-
1496
- const maxElements = computeAnnCapacity(total, this.config);
1497
- initHnswIndex(index, maxElements, this.config.annM, this.config.annEfConstruction);
1498
-
1499
- const yieldEvery = Number.isInteger(this.config.annBuildYieldEvery)
1500
- ? this.config.annBuildYieldEvery
1501
- : 1000;
1502
-
1503
- for (let i = 0; i < total; i++) {
1504
- const vector = this.getAnnVector(i);
1505
- if (!vector) throw new Error(`Missing vector for ANN index at position ${i}`);
1506
- index.addPoint(vector, i);
1507
-
1508
- if (yieldEvery > 0 && i > 0 && i % yieldEvery === 0) {
1509
- await yieldToLoop();
1510
- }
1511
- }
1512
-
1513
- if (typeof index.setEf === 'function') {
1514
- index.setEf(this.config.annEfSearch);
1515
- }
1516
-
1517
- this.annIndex = index;
1518
- this.annMeta = {
1519
- version: ANN_META_VERSION,
1520
- embeddingModel: this.config.embeddingModel,
1521
- metric: this.config.annMetric,
1522
- dim,
1523
- count: total,
1524
- maxElements,
1525
- m: this.config.annM,
1526
- efConstruction: this.config.annEfConstruction,
1527
- efSearch: this.config.annEfSearch,
1528
- };
1529
- this.annDirty = false;
1530
- this.annPersistDirty = true;
1531
-
1532
- if (this.config.annIndexCache !== false) {
1533
- try {
1534
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
1535
- const { indexFile, metaFile } = this.getAnnIndexPaths();
1536
- index.writeIndexSync(indexFile);
1537
- await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
1538
- this.annPersistDirty = false;
1539
- if (this.config.verbose) {
1540
- console.info(`[ANN] Saved ANN index (${total} vectors, cap=${maxElements})`);
1541
- }
1542
- } catch (error) {
1543
- console.warn(`[ANN] Failed to save ANN index: ${error.message}`);
1544
- }
1545
- }
1546
-
1547
- return index;
1548
- } catch (error) {
1549
- console.warn(`[ANN] Failed to build ANN index: ${error.message}`);
1550
- this.addInitError('buildAnnIndex', error);
1551
- this.annIndex = null;
1552
- this.annMeta = null;
1553
- this.annDirty = true;
1554
- this.annPersistDirty = false;
1555
- return null;
1556
- }
1557
- }
1558
-
1559
-
1560
- async queryAnn(queryVector, k) {
1561
- if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
1562
- const index = await this.ensureAnnIndex();
1563
- if (!index) return [];
1564
-
1565
- const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
1566
-
1567
-
1568
- let results;
1569
- try {
1570
- results = index.searchKnn(qVec, k);
1571
- } catch (err) {
1572
- console.warn(`[ANN] searchKnn failed: ${err.message}. Falling back to linear search.`);
1573
- this.addInitError('queryAnn', err);
1574
-
1575
- this.invalidateAnnIndex();
1576
- return [];
1577
- }
1578
-
1579
- const labels = normalizeLabels(results);
1580
-
1581
- if (labels.length === 0) return [];
1582
-
1583
- const filtered = labels.filter(
1584
- (label) => Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
1585
- );
1586
-
1587
- return filtered;
1588
- }
1589
-
1590
- async clear() {
1591
- if (!this.config.enableCache) return;
1592
-
1593
- try {
1594
- await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
1595
- this.vectorStore = [];
1596
- if (this.binaryStore) {
1597
- try {
1598
- await this.binaryStore.close();
1599
- } catch {
1600
-
1601
- }
1602
- }
1603
- this.binaryStore = null;
1604
- if (this.sqliteStore) {
1605
- try {
1606
- this.sqliteStore.close();
1607
- } catch {
1608
-
1609
- }
1610
- }
1611
- this.sqliteStore = null;
1612
- this.fileHashes = new Map();
1613
- this.invalidateAnnIndex();
1614
- await this.clearCallGraphData();
1615
- if (this.config.verbose) {
1616
- console.info(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
1617
- }
1618
- } catch (error) {
1619
- console.error('[Cache] Failed to clear cache:', error.message);
1620
- throw error;
1621
- }
1622
- }
1623
-
1624
-
1625
- setEfSearch(efSearch) {
1626
- if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
1627
- return {
1628
- success: false,
1629
- error: 'efSearch must be a number between 1 and 1000',
1630
- };
1631
- }
1632
-
1633
- this.config.annEfSearch = efSearch;
1634
-
1635
- if (this.annIndex && typeof this.annIndex.setEf === 'function') {
1636
- this.annIndex.setEf(efSearch);
1637
- if (this.annMeta) this.annMeta.efSearch = efSearch;
1638
- this.annPersistDirty = true;
1639
- if (this.config.verbose) {
1640
- console.info(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
1641
- }
1642
- return { success: true, applied: true, efSearch };
1643
- }
1644
-
1645
- if (this.config.verbose) {
1646
- console.info(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
1647
- }
1648
- return { success: true, applied: false, efSearch };
1649
- }
1650
-
1651
-
1652
- getAnnStats() {
1653
- return {
1654
- enabled: this.config.annEnabled ?? false,
1655
- indexLoaded: this.annIndex !== null,
1656
- dirty: this.annDirty,
1657
- vectorCount: Array.isArray(this.vectorStore) ? this.vectorStore.length : 0,
1658
- minChunksForAnn: this.config.annMinChunks ?? 5000,
1659
- config: this.annMeta
1660
- ? {
1661
- metric: this.annMeta.metric,
1662
- dim: this.annMeta.dim,
1663
- count: this.annMeta.count,
1664
- m: this.annMeta.m,
1665
- efConstruction: this.annMeta.efConstruction,
1666
- efSearch: this.config.annEfSearch,
1667
- }
1668
- : null,
1669
- };
1670
- }
1671
-
1672
-
1673
-
1674
- async clearCallGraphData({ removeFile = false } = {}) {
1675
- this.fileCallData.clear();
1676
- this.callGraph = null;
1677
-
1678
- if (removeFile && this.config.enableCache) {
1679
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
1680
- try {
1681
- await fs.rm(callGraphFile, { force: true });
1682
- } catch (error) {
1683
- if (this.config.verbose) {
1684
- console.warn(`[Cache] Failed to remove call-graph cache: ${error.message}`);
1685
- }
1686
- }
1687
- }
1688
- }
1689
-
1690
- pruneCallGraphData(validFiles) {
1691
- if (!validFiles || this.fileCallData.size === 0) return 0;
1692
-
1693
- let pruned = 0;
1694
- for (const file of Array.from(this.fileCallData.keys())) {
1695
- if (!validFiles.has(file)) {
1696
- this.fileCallData.delete(file);
1697
- pruned++;
1698
- }
1699
- }
1700
-
1701
- if (pruned > 0) this.callGraph = null;
1702
- return pruned;
1703
- }
1704
-
1705
- getFileCallData(file) {
1706
- return this.fileCallData.get(file);
1707
- }
1708
-
1709
- hasFileCallData(file) {
1710
- return this.fileCallData.has(file);
1711
- }
1712
-
1713
- getFileCallDataKeys() {
1714
- return Array.from(this.fileCallData.keys());
1715
- }
1716
-
1717
- getFileCallDataCount() {
1718
- return this.fileCallData.size;
1719
- }
1720
-
1721
-
1722
- setFileCallData(file, data) {
1723
- this.fileCallData.set(file, data);
1724
- this.callGraph = null;
1725
- }
1726
-
1727
-
1728
- setFileCallDataEntries(entries) {
1729
- if (entries instanceof Map) {
1730
- this.fileCallData = entries;
1731
- } else {
1732
- this.fileCallData.clear();
1733
- if (entries && typeof entries === 'object') {
1734
- for (const [file, data] of Object.entries(entries)) {
1735
- this.fileCallData.set(file, data);
1736
- }
1737
- }
1738
- }
1739
- this.callGraph = null;
1740
- }
1741
-
1742
- clearFileCallData() {
1743
- this.fileCallData.clear();
1744
- this.callGraph = null;
1745
- }
1746
-
1747
- removeFileCallData(file) {
1748
- this.fileCallData.delete(file);
1749
- this.callGraph = null;
1750
- }
1751
-
1752
- async rebuildCallGraph() {
1753
- if (this._callGraphBuild) return this._callGraphBuild;
1754
-
1755
- this._callGraphBuild = (async () => {
1756
- try {
1757
- const { buildCallGraph } = await import('./call-graph.js');
1758
- this.callGraph = buildCallGraph(this.fileCallData);
1759
- if (this.config.verbose && this.callGraph) {
1760
- console.info(
1761
- `[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`
1762
- );
1763
- }
1764
- } catch (err) {
1765
- console.error(`[CallGraph] Failed to build: ${err.message}`);
1766
- this.callGraph = null;
1767
- } finally {
1768
- this._callGraphBuild = null;
1769
- }
1770
- })();
1771
-
1772
- return this._callGraphBuild;
1773
- }
1774
-
1775
- async getRelatedFiles(symbols) {
1776
- if (!this.config.callGraphEnabled || symbols.length === 0) return new Map();
1777
- if (!this.callGraph && this.fileCallData.size > 0) await this.rebuildCallGraph();
1778
- if (!this.callGraph) return new Map();
1779
-
1780
- const { getRelatedFiles } = await import('./call-graph.js');
1781
- return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
1782
- }
1783
-
1784
- getCallGraphStats() {
1785
- return {
1786
- enabled: this.config.callGraphEnabled ?? false,
1787
- filesWithData: this.fileCallData.size,
1788
- graphBuilt: this.callGraph !== null,
1789
- definitions: this.callGraph?.defines.size ?? 0,
1790
- callTargets: this.callGraph?.calledBy.size ?? 0,
1791
- };
1792
- }
1793
-
1794
-
1795
-
1796
-
1797
- getStoreSize() {
1798
- if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
1799
- if (this.binaryStore) return this.binaryStore.length;
1800
- if (this.sqliteStore) return this.sqliteStore.length();
1801
- return 0;
1802
- }
1803
-
1804
-
1805
- getVector(index) {
1806
- return this.getChunkVector(index);
1807
- }
1808
-
1809
-
1810
- getChunk(index) {
1811
- if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
1812
- return this.vectorStore[index];
1813
- }
1814
- if (this.binaryStore) {
1815
- const record = this.binaryStore.getRecord(index);
1816
- if (record) {
1817
- return {
1818
- file: record.file,
1819
- startLine: record.startLine,
1820
- endLine: record.endLine,
1821
- vector: this.binaryStore.getVector(index),
1822
- _index: index,
1823
- _binaryIndex: index,
1824
- };
1825
- }
1826
- }
1827
- if (this.sqliteStore) {
1828
- const record = this.sqliteStore.getRecord(index);
1829
- if (record) {
1830
- return {
1831
- file: record.file,
1832
- startLine: record.startLine,
1833
- endLine: record.endLine,
1834
- vector: this.sqliteStore.getVector(index),
1835
- _index: index,
1836
- _sqliteIndex: index,
1837
- };
1838
- }
1839
- }
1840
- return null;
1841
- }
1842
- }
1035
+ this._saveInProgress = false;
1036
+ }
1037
+ }
1038
+
1039
+ getVectorStore() {
1040
+ return Array.isArray(this.vectorStore) ? this.vectorStore : [];
1041
+ }
1042
+
1043
+ async setVectorStore(store) {
1044
+ const previousBinaryStore = this.binaryStore;
1045
+ const previousSqliteStore = this.sqliteStore;
1046
+ this.vectorStore = store;
1047
+ this.binaryStore = null;
1048
+ this.sqliteStore = null;
1049
+ if (Array.isArray(this.vectorStore)) {
1050
+ for (const chunk of this.vectorStore) normalizeChunkVector(chunk);
1051
+ assignChunkIndices(this.vectorStore);
1052
+ }
1053
+ this.invalidateAnnIndex();
1054
+ if (previousBinaryStore) {
1055
+ try {
1056
+ await previousBinaryStore.close();
1057
+ } catch {}
1058
+ }
1059
+ if (previousSqliteStore) {
1060
+ try {
1061
+ previousSqliteStore.close();
1062
+ } catch {}
1063
+ }
1064
+ }
1065
+
1066
+ setLastIndexDuration(durationMs) {
1067
+ if (Number.isFinite(durationMs) && durationMs >= 0) {
1068
+ this.lastIndexDurationMs = durationMs;
1069
+ }
1070
+ }
1071
+
1072
+ setLastIndexStats(stats) {
1073
+ if (stats && typeof stats === 'object') {
1074
+ this.lastIndexStats = { ...stats };
1075
+ }
1076
+ }
1077
+
1078
+ getFileHash(file) {
1079
+ const entry = this.fileHashes.get(file);
1080
+ if (typeof entry === 'string') return entry;
1081
+ return entry?.hash;
1082
+ }
1083
+
1084
+ getFileHashKeys() {
1085
+ return Array.from(this.fileHashes.keys());
1086
+ }
1087
+
1088
+ getFileHashCount() {
1089
+ return this.fileHashes.size;
1090
+ }
1091
+
1092
+ clearFileHashes() {
1093
+ this.fileHashes.clear();
1094
+ }
1095
+
1096
+ setFileHashes(entries) {
1097
+ this.fileHashes.clear();
1098
+ if (!entries || typeof entries !== 'object') return;
1099
+ const iterator = entries instanceof Map ? entries.entries() : Object.entries(entries);
1100
+ if (!iterator) return;
1101
+ for (const [file, entry] of iterator) {
1102
+ const normalized = normalizeFileHashEntry(entry);
1103
+ if (normalized) {
1104
+ this.fileHashes.set(file, normalized);
1105
+ }
1106
+ }
1107
+ }
1108
+
1109
+ setFileHash(file, hash, meta = null) {
1110
+ const entry = { hash };
1111
+ if (meta && typeof meta === 'object') {
1112
+ if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
1113
+ if (Number.isFinite(meta.size)) entry.size = meta.size;
1114
+ }
1115
+ this.fileHashes.set(file, entry);
1116
+ }
1117
+
1118
+ getFileMeta(file) {
1119
+ const entry = this.fileHashes.get(file);
1120
+ if (!entry) return null;
1121
+ if (typeof entry === 'string') return { hash: entry };
1122
+ return entry;
1123
+ }
1124
+
1125
+ getChunkVector(chunk, index = null) {
1126
+ if (typeof chunk === 'number') {
1127
+ const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
1128
+ const entry = store ? store[chunk] : null;
1129
+ if (entry?.vector) return entry.vector;
1130
+ if (this.binaryStore) {
1131
+ const resolved = Number.isInteger(entry?._binaryIndex) ? entry._binaryIndex : chunk;
1132
+ return this.binaryStore.getVector(resolved);
1133
+ }
1134
+ if (this.sqliteStore) {
1135
+ const resolved = Number.isInteger(entry?._sqliteIndex) ? entry._sqliteIndex : chunk;
1136
+ return this.sqliteStore.getVector(resolved);
1137
+ }
1138
+ return null;
1139
+ }
1140
+
1141
+ if (chunk?.vector) return chunk.vector;
1142
+ const resolved = Number.isInteger(index) ? index : chunk?._index;
1143
+ if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
1144
+ return this.binaryStore.getVector(chunk._binaryIndex);
1145
+ }
1146
+ if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
1147
+ return this.binaryStore.getVector(resolved);
1148
+ }
1149
+ if (this.sqliteStore) {
1150
+ const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
1151
+ ? chunk._sqliteIndex
1152
+ : Number.isInteger(chunk?.index)
1153
+ ? chunk.index
1154
+ : resolved;
1155
+ if (Number.isInteger(sqliteIndex)) {
1156
+ return this.sqliteStore.getVector(sqliteIndex);
1157
+ }
1158
+ }
1159
+ return null;
1160
+ }
1161
+
1162
+ async getChunkContent(chunk, index = null) {
1163
+ if (typeof chunk === 'number') {
1164
+ const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
1165
+ const entry = store ? store[chunk] : null;
1166
+ if (entry) return await this.getChunkContent(entry, chunk);
1167
+ if (!store && this.binaryStore) {
1168
+ const content = await this.binaryStore.getContent(chunk);
1169
+ return content ?? '';
1170
+ }
1171
+ if (!store && this.sqliteStore) {
1172
+ return this.sqliteStore.getContent(chunk) ?? '';
1173
+ }
1174
+ return '';
1175
+ }
1176
+ if (chunk?.content !== undefined && chunk?.content !== null) {
1177
+ return chunk.content;
1178
+ }
1179
+ if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
1180
+ const content = await this.binaryStore.getContent(chunk._binaryIndex);
1181
+ return content ?? '';
1182
+ }
1183
+ const resolved = Number.isInteger(index) ? index : chunk?._index;
1184
+ if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
1185
+ const content = await this.binaryStore.getContent(resolved);
1186
+ return content ?? '';
1187
+ }
1188
+ if (this.sqliteStore) {
1189
+ const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
1190
+ ? chunk._sqliteIndex
1191
+ : Number.isInteger(chunk?.index)
1192
+ ? chunk.index
1193
+ : resolved;
1194
+ if (Number.isInteger(sqliteIndex)) {
1195
+ return this.sqliteStore.getContent(sqliteIndex) ?? '';
1196
+ }
1197
+ }
1198
+ return '';
1199
+ }
1200
+
1201
+ deleteFileHash(file) {
1202
+ this.fileHashes.delete(file);
1203
+ }
1204
+
1205
+ async removeFileFromStore(file) {
1206
+ if (!Array.isArray(this.vectorStore)) return;
1207
+
1208
+ let w = 0;
1209
+ for (let r = 0; r < this.vectorStore.length; r++) {
1210
+ const chunk = this.vectorStore[r];
1211
+ if (chunk.file !== file) {
1212
+ chunk._index = w;
1213
+ this.vectorStore[w++] = chunk;
1214
+ }
1215
+ }
1216
+ this.vectorStore.length = w;
1217
+
1218
+ this.invalidateAnnIndex();
1219
+ this.removeFileCallData(file);
1220
+
1221
+ this.fileHashes.delete(file);
1222
+ }
1223
+
1224
+ addToStore(chunk) {
1225
+ normalizeChunkVector(chunk);
1226
+
1227
+ if (!Array.isArray(this.vectorStore)) {
1228
+ this.vectorStore = [];
1229
+ }
1230
+
1231
+ const label = this.vectorStore.length;
1232
+ chunk._index = label;
1233
+ this.vectorStore.push(chunk);
1234
+ if (Array.isArray(this.annVectorCache) && this.annVectorCache.length === label) {
1235
+ this.annVectorCache.push(chunk.vector);
1236
+ }
1237
+
1238
+ if (
1239
+ this.annIndex &&
1240
+ !this.annDirty &&
1241
+ this.annMeta &&
1242
+ typeof this.annIndex.addPoint === 'function' &&
1243
+ this.annMeta.count === label &&
1244
+ this.annMeta.maxElements > this.annMeta.count
1245
+ ) {
1246
+ try {
1247
+ this.annIndex.addPoint(chunk.vector, label);
1248
+ this.annMeta.count += 1;
1249
+ this.annPersistDirty = true;
1250
+ return;
1251
+ } catch {}
1252
+ }
1253
+
1254
+ this.invalidateAnnIndex();
1255
+ }
1256
+
1257
+ invalidateAnnIndex() {
1258
+ this.annIndex = null;
1259
+ this.annMeta = null;
1260
+ this.annDirty = true;
1261
+ this.annPersistDirty = false;
1262
+ this.annVectorCache = null;
1263
+ }
1264
+
1265
+ getAnnVector(index) {
1266
+ if (!Array.isArray(this.vectorStore)) return null;
1267
+ const chunk = this.vectorStore[index];
1268
+ if (!chunk) return null;
1269
+
1270
+ if (
1271
+ !Array.isArray(this.annVectorCache) ||
1272
+ this.annVectorCache.length !== this.vectorStore.length
1273
+ ) {
1274
+ this.annVectorCache = new Array(this.vectorStore.length);
1275
+ }
1276
+
1277
+ const cached = this.annVectorCache[index];
1278
+ if (cached) return cached;
1279
+
1280
+ let vec = null;
1281
+ if (chunk.vector) {
1282
+ vec = ensureFloat32(chunk.vector);
1283
+ } else if (this.binaryStore && Number.isInteger(chunk._binaryIndex)) {
1284
+ vec = this.binaryStore.getVector(chunk._binaryIndex);
1285
+ } else if (this.sqliteStore) {
1286
+ const sqliteIndex = Number.isInteger(chunk._sqliteIndex)
1287
+ ? chunk._sqliteIndex
1288
+ : Number.isInteger(chunk.index)
1289
+ ? chunk.index
1290
+ : index;
1291
+ if (Number.isInteger(sqliteIndex)) {
1292
+ vec = this.sqliteStore.getVector(sqliteIndex);
1293
+ }
1294
+ }
1295
+
1296
+ if (!vec) return null;
1297
+
1298
+ if (this.config.vectorStoreLoadMode !== 'disk') {
1299
+ chunk.vector = vec;
1300
+ }
1301
+ this.annVectorCache[index] = vec;
1302
+ return vec;
1303
+ }
1304
+
1305
+ getAnnIndexPaths() {
1306
+ return {
1307
+ indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
1308
+ metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE),
1309
+ };
1310
+ }
1311
+
1312
+ async ensureAnnIndex() {
1313
+ if (!this.config.annEnabled) return null;
1314
+ if (!Array.isArray(this.vectorStore)) return null;
1315
+ if (this.vectorStore.length < (this.config.annMinChunks ?? 5000)) return null;
1316
+ if (this.annIndex && !this.annDirty) return this.annIndex;
1317
+ if (this.annLoading) return this.annLoading;
1318
+
1319
+ this.annLoading = (async () => {
1320
+ try {
1321
+ const HierarchicalNSW = await loadHnswlib();
1322
+ if (!HierarchicalNSW) {
1323
+ if (hnswlibLoadError) {
1324
+ this.addInitError('loadHnswlib', hnswlibLoadError);
1325
+ }
1326
+ return null;
1327
+ }
1328
+
1329
+ const dim =
1330
+ this.vectorStore[0]?.vector?.length || this.binaryStore?.dim || this.sqliteStore?.dim;
1331
+ if (!dim) return null;
1332
+
1333
+ let dimensionMismatch = false;
1334
+ const sampleSize = Math.min(ANN_DIMENSION_SAMPLE_SIZE, this.vectorStore.length);
1335
+ const step = Math.max(1, Math.floor(this.vectorStore.length / sampleSize));
1336
+ for (let i = step; i < this.vectorStore.length; i += step) {
1337
+ const v = this.vectorStore[i]?.vector;
1338
+ if (v && v.length !== dim) {
1339
+ dimensionMismatch = true;
1340
+ console.warn(
1341
+ `[ANN] Dimension mismatch at index ${i}: expected ${dim}, got ${v.length}. ` +
1342
+ 'This may indicate a config change mid-index. Consider full reindex.'
1343
+ );
1344
+ break;
1345
+ }
1346
+ }
1347
+
1348
+ if (dimensionMismatch) {
1349
+ this.addInitError(
1350
+ 'ensureAnnIndex',
1351
+ `Vector dimension inconsistency detected. Expected ${dim}. Full reindex required.`
1352
+ );
1353
+ return null;
1354
+ }
1355
+
1356
+ if (!this.annDirty && this.config.annIndexCache !== false) {
1357
+ const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
1358
+ if (loaded) return this.annIndex;
1359
+ }
1360
+
1361
+ return await this.buildAnnIndex(HierarchicalNSW, dim);
1362
+ } finally {
1363
+ this.annLoading = null;
1364
+ }
1365
+ })();
1366
+
1367
+ return this.annLoading;
1368
+ }
1369
+
1370
+ async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
1371
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
1372
+ const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
1373
+ if (!metaData) return false;
1374
+
1375
+ let meta;
1376
+ try {
1377
+ meta = JSON.parse(metaData);
1378
+ } catch {
1379
+ console.warn('[ANN] Invalid ANN metadata, rebuilding');
1380
+ return false;
1381
+ }
1382
+
1383
+ if (meta?.version !== ANN_META_VERSION) {
1384
+ console.warn(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
1385
+ return false;
1386
+ }
1387
+
1388
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
1389
+ console.warn('[ANN] Embedding model changed for ANN index, rebuilding');
1390
+ return false;
1391
+ }
1392
+
1393
+ if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
1394
+ console.warn('[ANN] ANN index size mismatch, rebuilding');
1395
+ return false;
1396
+ }
1397
+
1398
+ if (
1399
+ meta?.metric !== this.config.annMetric ||
1400
+ meta?.m !== this.config.annM ||
1401
+ meta?.efConstruction !== this.config.annEfConstruction
1402
+ ) {
1403
+ console.warn('[ANN] ANN index config changed, rebuilding');
1404
+ return false;
1405
+ }
1406
+
1407
+ let maxElements = meta?.maxElements;
1408
+ if (!Number.isInteger(maxElements)) {
1409
+ maxElements = meta.count;
1410
+ } else if (maxElements < meta.count) {
1411
+ console.warn('[ANN] ANN capacity invalid, rebuilding');
1412
+ return false;
1413
+ }
1414
+
1415
+ const index = new HierarchicalNSW(meta.metric, dim);
1416
+ const loaded = readHnswIndex(index, indexFile, maxElements);
1417
+ if (!loaded) {
1418
+ console.warn('[ANN] Failed to load ANN index file, rebuilding');
1419
+ return false;
1420
+ }
1421
+
1422
+ if (typeof index.setEf === 'function') {
1423
+ index.setEf(this.config.annEfSearch);
1424
+ }
1425
+
1426
+ this.annIndex = index;
1427
+ this.annMeta = { ...meta, maxElements };
1428
+ this.annDirty = false;
1429
+ this.annPersistDirty = false;
1430
+
1431
+ if (this.config.verbose) {
1432
+ console.info(`[ANN] Loaded ANN index (${meta.count} vectors, cap=${maxElements})`);
1433
+ }
1434
+ return true;
1435
+ }
1436
+
1437
+ async buildAnnIndex(HierarchicalNSW, dim) {
1438
+ if (!Array.isArray(this.vectorStore)) return null;
1439
+ const total = this.vectorStore.length;
1440
+ if (total === 0) return null;
1441
+
1442
+ try {
1443
+ const index = new HierarchicalNSW(this.config.annMetric, dim);
1444
+
1445
+ const maxElements = computeAnnCapacity(total, this.config);
1446
+ initHnswIndex(index, maxElements, this.config.annM, this.config.annEfConstruction);
1447
+
1448
+ const yieldEvery = Number.isInteger(this.config.annBuildYieldEvery)
1449
+ ? this.config.annBuildYieldEvery
1450
+ : 1000;
1451
+
1452
+ for (let i = 0; i < total; i++) {
1453
+ const vector = this.getAnnVector(i);
1454
+ if (!vector) throw new Error(`Missing vector for ANN index at position ${i}`);
1455
+ index.addPoint(vector, i);
1456
+
1457
+ if (yieldEvery > 0 && i > 0 && i % yieldEvery === 0) {
1458
+ await yieldToLoop();
1459
+ }
1460
+ }
1461
+
1462
+ if (typeof index.setEf === 'function') {
1463
+ index.setEf(this.config.annEfSearch);
1464
+ }
1465
+
1466
+ this.annIndex = index;
1467
+ this.annMeta = {
1468
+ version: ANN_META_VERSION,
1469
+ embeddingModel: this.config.embeddingModel,
1470
+ metric: this.config.annMetric,
1471
+ dim,
1472
+ count: total,
1473
+ maxElements,
1474
+ m: this.config.annM,
1475
+ efConstruction: this.config.annEfConstruction,
1476
+ efSearch: this.config.annEfSearch,
1477
+ };
1478
+ this.annDirty = false;
1479
+ this.annPersistDirty = true;
1480
+
1481
+ if (this.config.annIndexCache !== false) {
1482
+ try {
1483
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
1484
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
1485
+ index.writeIndexSync(indexFile);
1486
+ await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
1487
+ this.annPersistDirty = false;
1488
+ if (this.config.verbose) {
1489
+ console.info(`[ANN] Saved ANN index (${total} vectors, cap=${maxElements})`);
1490
+ }
1491
+ } catch (error) {
1492
+ console.warn(`[ANN] Failed to save ANN index: ${error.message}`);
1493
+ }
1494
+ }
1495
+
1496
+ return index;
1497
+ } catch (error) {
1498
+ console.warn(`[ANN] Failed to build ANN index: ${error.message}`);
1499
+ this.addInitError('buildAnnIndex', error);
1500
+ this.annIndex = null;
1501
+ this.annMeta = null;
1502
+ this.annDirty = true;
1503
+ this.annPersistDirty = false;
1504
+ return null;
1505
+ }
1506
+ }
1507
+
1508
+ async queryAnn(queryVector, k) {
1509
+ if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
1510
+ const index = await this.ensureAnnIndex();
1511
+ if (!index) return [];
1512
+
1513
+ const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
1514
+
1515
+ let results;
1516
+ try {
1517
+ results = index.searchKnn(qVec, k);
1518
+ } catch (err) {
1519
+ console.warn(`[ANN] searchKnn failed: ${err.message}. Falling back to linear search.`);
1520
+ this.addInitError('queryAnn', err);
1521
+
1522
+ this.invalidateAnnIndex();
1523
+ return [];
1524
+ }
1525
+
1526
+ const labels = normalizeLabels(results);
1527
+
1528
+ if (labels.length === 0) return [];
1529
+
1530
+ const filtered = labels.filter(
1531
+ (label) => Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
1532
+ );
1533
+
1534
+ return filtered;
1535
+ }
1536
+
1537
+ async clear() {
1538
+ if (!this.config.enableCache) return;
1539
+
1540
+ try {
1541
+ await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
1542
+ this.vectorStore = [];
1543
+ if (this.binaryStore) {
1544
+ try {
1545
+ await this.binaryStore.close();
1546
+ } catch {}
1547
+ }
1548
+ this.binaryStore = null;
1549
+ if (this.sqliteStore) {
1550
+ try {
1551
+ this.sqliteStore.close();
1552
+ } catch {}
1553
+ }
1554
+ this.sqliteStore = null;
1555
+ this.fileHashes = new Map();
1556
+ this.invalidateAnnIndex();
1557
+ await this.clearCallGraphData();
1558
+ if (this.config.verbose) {
1559
+ console.info(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
1560
+ }
1561
+ } catch (error) {
1562
+ console.error('[Cache] Failed to clear cache:', error.message);
1563
+ throw error;
1564
+ }
1565
+ }
1566
+
1567
+ setEfSearch(efSearch) {
1568
+ if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
1569
+ return {
1570
+ success: false,
1571
+ error: 'efSearch must be a number between 1 and 1000',
1572
+ };
1573
+ }
1574
+
1575
+ this.config.annEfSearch = efSearch;
1576
+
1577
+ if (this.annIndex && typeof this.annIndex.setEf === 'function') {
1578
+ this.annIndex.setEf(efSearch);
1579
+ if (this.annMeta) this.annMeta.efSearch = efSearch;
1580
+ this.annPersistDirty = true;
1581
+ if (this.config.verbose) {
1582
+ console.info(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
1583
+ }
1584
+ return { success: true, applied: true, efSearch };
1585
+ }
1586
+
1587
+ if (this.config.verbose) {
1588
+ console.info(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
1589
+ }
1590
+ return { success: true, applied: false, efSearch };
1591
+ }
1592
+
1593
+ getAnnStats() {
1594
+ return {
1595
+ enabled: this.config.annEnabled ?? false,
1596
+ indexLoaded: this.annIndex !== null,
1597
+ dirty: this.annDirty,
1598
+ vectorCount: Array.isArray(this.vectorStore) ? this.vectorStore.length : 0,
1599
+ minChunksForAnn: this.config.annMinChunks ?? 5000,
1600
+ config: this.annMeta
1601
+ ? {
1602
+ metric: this.annMeta.metric,
1603
+ dim: this.annMeta.dim,
1604
+ count: this.annMeta.count,
1605
+ m: this.annMeta.m,
1606
+ efConstruction: this.annMeta.efConstruction,
1607
+ efSearch: this.config.annEfSearch,
1608
+ }
1609
+ : null,
1610
+ };
1611
+ }
1612
+
1613
+ async clearCallGraphData({ removeFile = false } = {}) {
1614
+ this.fileCallData.clear();
1615
+ this.callGraph = null;
1616
+
1617
+ if (removeFile && this.config.enableCache) {
1618
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
1619
+ try {
1620
+ await fs.rm(callGraphFile, { force: true });
1621
+ } catch (error) {
1622
+ if (this.config.verbose) {
1623
+ console.warn(`[Cache] Failed to remove call-graph cache: ${error.message}`);
1624
+ }
1625
+ }
1626
+ }
1627
+ }
1628
+
1629
+ pruneCallGraphData(validFiles) {
1630
+ if (!validFiles || this.fileCallData.size === 0) return 0;
1631
+
1632
+ let pruned = 0;
1633
+ for (const file of Array.from(this.fileCallData.keys())) {
1634
+ if (!validFiles.has(file)) {
1635
+ this.fileCallData.delete(file);
1636
+ pruned++;
1637
+ }
1638
+ }
1639
+
1640
+ if (pruned > 0) this.callGraph = null;
1641
+ return pruned;
1642
+ }
1643
+
1644
+ getFileCallData(file) {
1645
+ return this.fileCallData.get(file);
1646
+ }
1647
+
1648
+ hasFileCallData(file) {
1649
+ return this.fileCallData.has(file);
1650
+ }
1651
+
1652
+ getFileCallDataKeys() {
1653
+ return Array.from(this.fileCallData.keys());
1654
+ }
1655
+
1656
+ getFileCallDataCount() {
1657
+ return this.fileCallData.size;
1658
+ }
1659
+
1660
+ setFileCallData(file, data) {
1661
+ this.fileCallData.set(file, data);
1662
+ this.callGraph = null;
1663
+ }
1664
+
1665
+ setFileCallDataEntries(entries) {
1666
+ if (entries instanceof Map) {
1667
+ this.fileCallData = entries;
1668
+ } else {
1669
+ this.fileCallData.clear();
1670
+ if (entries && typeof entries === 'object') {
1671
+ for (const [file, data] of Object.entries(entries)) {
1672
+ this.fileCallData.set(file, data);
1673
+ }
1674
+ }
1675
+ }
1676
+ this.callGraph = null;
1677
+ }
1678
+
1679
+ clearFileCallData() {
1680
+ this.fileCallData.clear();
1681
+ this.callGraph = null;
1682
+ }
1683
+
1684
+ removeFileCallData(file) {
1685
+ this.fileCallData.delete(file);
1686
+ this.callGraph = null;
1687
+ }
1688
+
1689
+ async rebuildCallGraph() {
1690
+ if (this._callGraphBuild) return this._callGraphBuild;
1691
+
1692
+ this._callGraphBuild = (async () => {
1693
+ try {
1694
+ const { buildCallGraph } = await import('./call-graph.js');
1695
+ this.callGraph = buildCallGraph(this.fileCallData);
1696
+ if (this.config.verbose && this.callGraph) {
1697
+ console.info(
1698
+ `[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`
1699
+ );
1700
+ }
1701
+ } catch (err) {
1702
+ console.error(`[CallGraph] Failed to build: ${err.message}`);
1703
+ this.callGraph = null;
1704
+ } finally {
1705
+ this._callGraphBuild = null;
1706
+ }
1707
+ })();
1708
+
1709
+ return this._callGraphBuild;
1710
+ }
1711
+
1712
+ async getRelatedFiles(symbols) {
1713
+ if (!this.config.callGraphEnabled || symbols.length === 0) return new Map();
1714
+ if (!this.callGraph && this.fileCallData.size > 0) await this.rebuildCallGraph();
1715
+ if (!this.callGraph) return new Map();
1716
+
1717
+ const { getRelatedFiles } = await import('./call-graph.js');
1718
+ return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
1719
+ }
1720
+
1721
+ getCallGraphStats() {
1722
+ return {
1723
+ enabled: this.config.callGraphEnabled ?? false,
1724
+ filesWithData: this.fileCallData.size,
1725
+ graphBuilt: this.callGraph !== null,
1726
+ definitions: this.callGraph?.defines.size ?? 0,
1727
+ callTargets: this.callGraph?.calledBy.size ?? 0,
1728
+ };
1729
+ }
1730
+
1731
+ getStoreSize() {
1732
+ if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
1733
+ if (this.binaryStore) return this.binaryStore.length;
1734
+ if (this.sqliteStore) return this.sqliteStore.length();
1735
+ return 0;
1736
+ }
1737
+
1738
+ getVector(index) {
1739
+ return this.getChunkVector(index);
1740
+ }
1741
+
1742
+ getChunk(index) {
1743
+ if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
1744
+ return this.vectorStore[index];
1745
+ }
1746
+ if (this.binaryStore) {
1747
+ const record = this.binaryStore.getRecord(index);
1748
+ if (record) {
1749
+ return {
1750
+ file: record.file,
1751
+ startLine: record.startLine,
1752
+ endLine: record.endLine,
1753
+ vector: this.binaryStore.getVector(index),
1754
+ _index: index,
1755
+ _binaryIndex: index,
1756
+ };
1757
+ }
1758
+ }
1759
+ if (this.sqliteStore) {
1760
+ const record = this.sqliteStore.getRecord(index);
1761
+ if (record) {
1762
+ return {
1763
+ file: record.file,
1764
+ startLine: record.startLine,
1765
+ endLine: record.endLine,
1766
+ vector: this.sqliteStore.getVector(index),
1767
+ _index: index,
1768
+ _sqliteIndex: index,
1769
+ };
1770
+ }
1771
+ }
1772
+ return null;
1773
+ }
1774
+ }