@softerist/heuristic-mcp 3.2.3 → 3.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +387 -376
  2. package/config.jsonc +800 -800
  3. package/features/ann-config.js +102 -110
  4. package/features/clear-cache.js +81 -84
  5. package/features/find-similar-code.js +265 -286
  6. package/features/hybrid-search.js +487 -536
  7. package/features/index-codebase.js +3139 -3270
  8. package/features/lifecycle.js +1011 -1063
  9. package/features/package-version.js +277 -291
  10. package/features/register.js +351 -370
  11. package/features/resources.js +115 -130
  12. package/features/set-workspace.js +214 -240
  13. package/index.js +693 -758
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +465 -519
  16. package/lib/cache.js +1749 -1849
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +232 -226
  19. package/lib/config.js +1483 -1495
  20. package/lib/constants.js +511 -493
  21. package/lib/embed-query-process.js +206 -212
  22. package/lib/embedding-process.js +434 -451
  23. package/lib/embedding-worker.js +862 -934
  24. package/lib/ignore-patterns.js +276 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +302 -310
  27. package/lib/logging.js +116 -127
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +188 -193
  30. package/lib/path-utils.js +18 -23
  31. package/lib/project-detector.js +82 -84
  32. package/lib/server-lifecycle.js +133 -145
  33. package/lib/settings-editor.js +738 -739
  34. package/lib/slice-normalize.js +25 -31
  35. package/lib/tokenizer.js +168 -203
  36. package/lib/utils.js +364 -409
  37. package/lib/vector-store-binary.js +973 -991
  38. package/lib/vector-store-sqlite.js +377 -414
  39. package/lib/workspace-env.js +32 -34
  40. package/mcp_config.json +9 -9
  41. package/package.json +86 -86
  42. package/scripts/clear-cache.js +20 -20
  43. package/scripts/download-model.js +43 -43
  44. package/scripts/mcp-launcher.js +49 -49
  45. package/scripts/postinstall.js +12 -12
  46. package/search-configs.js +36 -36
package/lib/cache.js CHANGED
@@ -1,319 +1,299 @@
1
- import fs from 'fs/promises';
2
- import path from 'path';
3
- import { Worker } from 'worker_threads';
4
- import { StreamingJsonWriter } from './json-writer.js';
1
+ import fs from 'fs/promises';
2
+ import path from 'path';
3
+ import { Worker } from 'worker_threads';
4
+ import { StreamingJsonWriter } from './json-writer.js';
5
5
  import {
6
6
  BinaryVectorStore,
7
7
  BinaryStoreCorruptionError,
8
8
  recordBinaryStoreCorruption,
9
9
  } from './vector-store-binary.js';
10
- import { SqliteVectorStore } from './vector-store-sqlite.js';
11
- import { isNonProjectDirectory } from './config.js';
12
- import {
13
- JSON_WORKER_THRESHOLD_BYTES,
14
- ANN_DIMENSION_SAMPLE_SIZE,
15
- HNSWLIB_ERROR_RESET_MS,
16
- DEFAULT_READER_WAIT_TIMEOUT_MS,
17
- } from './constants.js';
18
-
19
- const CACHE_META_VERSION = 1;
20
- const CACHE_META_FILE = 'meta.json';
21
-
22
-
23
- const ANN_META_VERSION = 1;
24
- const ANN_INDEX_FILE = 'ann-index.bin';
25
- const ANN_META_FILE = 'ann-meta.json';
26
-
27
- const CALL_GRAPH_FILE = 'call-graph.json';
28
-
29
- const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
30
-
31
-
32
- const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
33
-
34
- let hnswlibPromise = null;
35
- let hnswlibLoadError = null;
36
-
37
- async function parseJsonInWorker(filePath) {
38
- return new Promise((resolve, reject) => {
39
- let settled = false;
40
- const worker = new Worker(new URL('./json-worker.js', import.meta.url), {
41
- workerData: { filePath },
42
- });
43
-
44
-
45
-
46
-
47
- const finish = (handler, value) => {
48
- if (settled) return;
49
- settled = true;
50
- worker.removeAllListeners();
51
- const termination = worker.terminate?.();
52
- if (termination && typeof termination.catch === 'function') termination.catch(() => null);
53
- handler(value);
54
- };
55
-
56
- worker.once('message', (msg) => {
57
- if (msg?.ok) {
58
- finish(resolve, msg.data);
59
- } else {
60
- const err = new Error(msg?.error || 'JSON worker failed');
61
- console.warn(`[Cache] ${err.message}`);
62
- finish(reject, err);
63
- }
64
- });
65
-
66
- worker.once('error', (err) => {
67
- console.error(`[Cache] JSON worker error: ${err.message}`);
68
- finish(reject, err);
69
- });
70
-
71
- worker.once('exit', (code) => {
72
- if (code !== 0) {
73
- const err = new Error(`JSON worker exited with code ${code}`);
74
- console.error(`[Cache] ${err.message}`);
75
- finish(reject, err);
76
- return;
77
- }
78
- if (!settled) {
79
- const err = new Error('JSON worker exited without sending a response');
80
- console.error(`[Cache] ${err.message}`);
81
- finish(reject, err);
82
- }
83
- });
84
- });
85
- }
86
-
87
- async function readJsonFile(
88
- filePath,
89
- { workerThresholdBytes = JSON_WORKER_THRESHOLD_BYTES } = {}
90
- ) {
91
- let stats;
92
- try {
93
- stats = await fs.stat(filePath);
94
- } catch {
95
-
96
- return null;
97
- }
98
-
99
- try {
100
- const canUseWorker = typeof Worker === 'function';
101
- const useWorker =
102
- canUseWorker && stats && typeof stats.size === 'number'
103
- ? stats.size >= workerThresholdBytes
104
- : false;
105
-
106
- if (useWorker) return await parseJsonInWorker(filePath);
107
-
108
- const data = await fs.readFile(filePath, 'utf-8');
109
- return JSON.parse(data);
110
- } catch (error) {
111
- console.warn(`[Cache] Failed to parse ${path.basename(filePath)}: ${error.message}`);
112
- return null;
113
- }
114
- }
115
-
116
- async function loadHnswlib() {
117
-
118
- if (hnswlibLoadError) {
119
- if (hnswlibLoadError._timestamp && Date.now() - hnswlibLoadError._timestamp > HNSWLIB_ERROR_RESET_MS) {
120
- hnswlibLoadError = null;
121
- hnswlibPromise = null;
122
- } else {
123
- return null;
124
- }
125
- }
126
-
127
- if (!hnswlibPromise) {
128
- hnswlibPromise = import('hnswlib-node')
129
- .then((mod) => {
130
- const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
131
- if (!HierarchicalNSW) throw new Error('HierarchicalNSW export not found');
132
- return HierarchicalNSW;
133
- })
134
- .catch((err) => {
135
-
136
- err._timestamp = Date.now();
137
- hnswlibLoadError = err;
138
- console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
139
- return null;
140
- });
141
- }
142
-
143
- return hnswlibPromise;
144
- }
145
-
146
- function initHnswIndex(index, maxElements, m, efConstruction) {
147
- try {
148
- index.initIndex(maxElements, m, efConstruction, 100);
149
- return;
150
- } catch (err) {
151
- console.warn(`[ANN] Standard init failed: ${err.message}`);
152
- }
153
- try {
154
- index.initIndex(maxElements, m, efConstruction);
155
- return;
156
- } catch (err) {
157
- console.warn(`[ANN] Legacy init failed: ${err.message}`);
158
- }
159
- index.initIndex(maxElements);
160
- }
161
-
162
- function readHnswIndex(index, filePath, maxElements) {
163
- try {
164
- index.readIndexSync(filePath, maxElements);
165
- return true;
166
- } catch {
167
-
168
- }
169
- try {
170
- index.readIndexSync(filePath);
171
- return true;
172
- } catch (err) {
173
- console.warn(`[ANN] Read index failed: ${err.message}`);
174
- }
175
- return false;
176
- }
177
-
178
- function normalizeLabels(result) {
179
- if (!result) return [];
180
- if (Array.isArray(result)) return result;
181
- const labels = result.labels || result.neighbors || result.indices;
182
- return labels ? Array.from(labels) : [];
183
- }
184
-
185
- function ensureFloat32(vector) {
186
- if (!vector) return null;
187
- if (vector instanceof Float32Array) return vector;
188
-
189
-
190
- let result;
191
- if (ArrayBuffer.isView(vector)) {
192
- result = Float32Array.from(vector);
193
- } else {
194
- result = new Float32Array(vector);
195
- }
196
-
197
-
198
- if (IS_TEST_ENV && result.length > 0) {
199
- for (let i = 0; i < result.length; i++) {
200
- if (!Number.isFinite(result[i])) {
201
- throw new Error(
202
- `Invalid vector value at index ${i}: ${result[i]}. ` +
203
- 'Vector contains NaN or Infinity, which will corrupt search results.'
204
- );
205
- }
206
- }
207
- }
208
-
209
- return result;
210
- }
211
-
212
- function normalizeChunkVector(chunk) {
213
- if (chunk?.vector) chunk.vector = ensureFloat32(chunk.vector);
214
- }
215
-
216
- function assignChunkIndices(store) {
217
- if (!Array.isArray(store)) return;
218
- for (let i = 0; i < store.length; i += 1) {
219
- const chunk = store[i];
220
- if (chunk) {
221
- chunk._index = i;
222
- }
223
- }
224
- }
225
-
226
- function normalizeFileHashEntry(entry) {
227
- if (!entry) return null;
228
- if (typeof entry === 'string') return { hash: entry };
229
- if (typeof entry !== 'object') return null;
230
- if (typeof entry.hash !== 'string') return null;
231
- const normalized = { hash: entry.hash };
232
- if (Number.isFinite(entry.mtimeMs)) normalized.mtimeMs = entry.mtimeMs;
233
- if (Number.isFinite(entry.size)) normalized.size = entry.size;
234
- return normalized;
235
- }
236
-
237
- function serializeFileHashEntry(entry) {
238
- if (!entry) return null;
239
- if (typeof entry === 'string') return { hash: entry };
240
- if (typeof entry !== 'object') return null;
241
- if (typeof entry.hash !== 'string') return null;
242
- const serialized = { hash: entry.hash };
243
- if (Number.isFinite(entry.mtimeMs)) serialized.mtimeMs = entry.mtimeMs;
244
- if (Number.isFinite(entry.size)) serialized.size = entry.size;
245
- return serialized;
246
- }
247
-
248
- function computeAnnCapacity(total, config) {
249
- const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
250
- const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
251
- const byFactor = Math.ceil(total * factor);
252
- const byExtra = total + extra;
253
- return Math.max(total, byFactor, byExtra);
254
- }
255
-
256
- export class EmbeddingsCache {
257
- constructor(config) {
258
- this.config = config;
259
-
260
- this.vectorStore = [];
261
- this.fileHashes = new Map();
262
- this.isSaving = false;
263
- this.lastIndexDurationMs = null;
264
- this.lastIndexStats = null;
265
-
266
- this.cacheMeta = {
267
- version: CACHE_META_VERSION,
268
- embeddingModel: config.embeddingModel,
269
- embeddingDimension: config.embeddingDimension ?? null,
270
- };
271
-
272
-
273
- this.saveQueue = Promise.resolve();
274
- this._saveTimer = null;
275
- this._saveRequested = false;
276
- this._savePromise = null;
277
- this._saveThrowOnError = false;
278
- this.lastSaveError = null;
279
-
280
-
281
- this.annIndex = null;
282
- this.annMeta = null;
283
- this.annDirty = false;
284
- this.annPersistDirty = false;
285
- this.annLoading = null;
286
- this.annVectorCache = null;
287
-
288
-
289
- this.fileCallData = new Map();
290
- this.callGraph = null;
291
- this._callGraphBuild = null;
292
-
293
-
294
- this.binaryStore = null;
295
-
296
-
297
- this.sqliteStore = null;
298
-
299
-
300
- this.initErrors = [];
301
-
302
-
303
- this.activeReads = 0;
304
- this._readWaiters = [];
305
- this._saveInProgress = false;
306
-
307
-
308
- this._clearedAfterIndex = false;
309
- this._loadPromise = null;
310
- this._corruptionDetected = false;
311
- }
312
-
313
- /**
314
- * Returns true if the last load() detected binary store corruption.
315
- * Used by the server to decide whether to trigger an automatic re-index.
316
- */
10
+ import { SqliteVectorStore } from './vector-store-sqlite.js';
11
+ import { isNonProjectDirectory } from './config.js';
12
+ import {
13
+ JSON_WORKER_THRESHOLD_BYTES,
14
+ ANN_DIMENSION_SAMPLE_SIZE,
15
+ HNSWLIB_ERROR_RESET_MS,
16
+ DEFAULT_READER_WAIT_TIMEOUT_MS,
17
+ } from './constants.js';
18
+
19
+ const CACHE_META_VERSION = 1;
20
+ const CACHE_META_FILE = 'meta.json';
21
+
22
+ const ANN_META_VERSION = 1;
23
+ const ANN_INDEX_FILE = 'ann-index.bin';
24
+ const ANN_META_FILE = 'ann-meta.json';
25
+
26
+ const CALL_GRAPH_FILE = 'call-graph.json';
27
+
28
+ const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
29
+
30
+ const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
31
+
32
+ let hnswlibPromise = null;
33
+ let hnswlibLoadError = null;
34
+
35
+ async function parseJsonInWorker(filePath) {
36
+ return new Promise((resolve, reject) => {
37
+ let settled = false;
38
+ const worker = new Worker(new URL('./json-worker.js', import.meta.url), {
39
+ workerData: { filePath },
40
+ });
41
+
42
+ const finish = (handler, value) => {
43
+ if (settled) return;
44
+ settled = true;
45
+ worker.removeAllListeners();
46
+ const termination = worker.terminate?.();
47
+ if (termination && typeof termination.catch === 'function') termination.catch(() => null);
48
+ handler(value);
49
+ };
50
+
51
+ worker.once('message', (msg) => {
52
+ if (msg?.ok) {
53
+ finish(resolve, msg.data);
54
+ } else {
55
+ const err = new Error(msg?.error || 'JSON worker failed');
56
+ console.warn(`[Cache] ${err.message}`);
57
+ finish(reject, err);
58
+ }
59
+ });
60
+
61
+ worker.once('error', (err) => {
62
+ console.error(`[Cache] JSON worker error: ${err.message}`);
63
+ finish(reject, err);
64
+ });
65
+
66
+ worker.once('exit', (code) => {
67
+ if (code !== 0) {
68
+ const err = new Error(`JSON worker exited with code ${code}`);
69
+ console.error(`[Cache] ${err.message}`);
70
+ finish(reject, err);
71
+ return;
72
+ }
73
+ if (!settled) {
74
+ const err = new Error('JSON worker exited without sending a response');
75
+ console.error(`[Cache] ${err.message}`);
76
+ finish(reject, err);
77
+ }
78
+ });
79
+ });
80
+ }
81
+
82
+ async function readJsonFile(filePath, { workerThresholdBytes = JSON_WORKER_THRESHOLD_BYTES } = {}) {
83
+ let stats;
84
+ try {
85
+ stats = await fs.stat(filePath);
86
+ } catch {
87
+ return null;
88
+ }
89
+
90
+ try {
91
+ const canUseWorker = typeof Worker === 'function';
92
+ const useWorker =
93
+ canUseWorker && stats && typeof stats.size === 'number'
94
+ ? stats.size >= workerThresholdBytes
95
+ : false;
96
+
97
+ if (useWorker) return await parseJsonInWorker(filePath);
98
+
99
+ const data = await fs.readFile(filePath, 'utf-8');
100
+ return JSON.parse(data);
101
+ } catch (error) {
102
+ console.warn(`[Cache] Failed to parse ${path.basename(filePath)}: ${error.message}`);
103
+ return null;
104
+ }
105
+ }
106
+
107
+ async function loadHnswlib() {
108
+ if (hnswlibLoadError) {
109
+ if (
110
+ hnswlibLoadError._timestamp &&
111
+ Date.now() - hnswlibLoadError._timestamp > HNSWLIB_ERROR_RESET_MS
112
+ ) {
113
+ hnswlibLoadError = null;
114
+ hnswlibPromise = null;
115
+ } else {
116
+ return null;
117
+ }
118
+ }
119
+
120
+ if (!hnswlibPromise) {
121
+ hnswlibPromise = import('hnswlib-node')
122
+ .then((mod) => {
123
+ const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
124
+ if (!HierarchicalNSW) throw new Error('HierarchicalNSW export not found');
125
+ return HierarchicalNSW;
126
+ })
127
+ .catch((err) => {
128
+ err._timestamp = Date.now();
129
+ hnswlibLoadError = err;
130
+ console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
131
+ return null;
132
+ });
133
+ }
134
+
135
+ return hnswlibPromise;
136
+ }
137
+
138
+ function initHnswIndex(index, maxElements, m, efConstruction) {
139
+ try {
140
+ index.initIndex(maxElements, m, efConstruction, 100);
141
+ return;
142
+ } catch (err) {
143
+ console.warn(`[ANN] Standard init failed: ${err.message}`);
144
+ }
145
+ try {
146
+ index.initIndex(maxElements, m, efConstruction);
147
+ return;
148
+ } catch (err) {
149
+ console.warn(`[ANN] Legacy init failed: ${err.message}`);
150
+ }
151
+ index.initIndex(maxElements);
152
+ }
153
+
154
+ function readHnswIndex(index, filePath, maxElements) {
155
+ try {
156
+ index.readIndexSync(filePath, maxElements);
157
+ return true;
158
+ } catch {}
159
+ try {
160
+ index.readIndexSync(filePath);
161
+ return true;
162
+ } catch (err) {
163
+ console.warn(`[ANN] Read index failed: ${err.message}`);
164
+ }
165
+ return false;
166
+ }
167
+
168
+ function normalizeLabels(result) {
169
+ if (!result) return [];
170
+ if (Array.isArray(result)) return result;
171
+ const labels = result.labels || result.neighbors || result.indices;
172
+ return labels ? Array.from(labels) : [];
173
+ }
174
+
175
+ function ensureFloat32(vector) {
176
+ if (!vector) return null;
177
+ if (vector instanceof Float32Array) return vector;
178
+
179
+ let result;
180
+ if (ArrayBuffer.isView(vector)) {
181
+ result = Float32Array.from(vector);
182
+ } else {
183
+ result = new Float32Array(vector);
184
+ }
185
+
186
+ if (IS_TEST_ENV && result.length > 0) {
187
+ for (let i = 0; i < result.length; i++) {
188
+ if (!Number.isFinite(result[i])) {
189
+ throw new Error(
190
+ `Invalid vector value at index ${i}: ${result[i]}. ` +
191
+ 'Vector contains NaN or Infinity, which will corrupt search results.'
192
+ );
193
+ }
194
+ }
195
+ }
196
+
197
+ return result;
198
+ }
199
+
200
+ function normalizeChunkVector(chunk) {
201
+ if (chunk?.vector) chunk.vector = ensureFloat32(chunk.vector);
202
+ }
203
+
204
+ function assignChunkIndices(store) {
205
+ if (!Array.isArray(store)) return;
206
+ for (let i = 0; i < store.length; i += 1) {
207
+ const chunk = store[i];
208
+ if (chunk) {
209
+ chunk._index = i;
210
+ }
211
+ }
212
+ }
213
+
214
+ function normalizeFileHashEntry(entry) {
215
+ if (!entry) return null;
216
+ if (typeof entry === 'string') return { hash: entry };
217
+ if (typeof entry !== 'object') return null;
218
+ if (typeof entry.hash !== 'string') return null;
219
+ const normalized = { hash: entry.hash };
220
+ if (Number.isFinite(entry.mtimeMs)) normalized.mtimeMs = entry.mtimeMs;
221
+ if (Number.isFinite(entry.size)) normalized.size = entry.size;
222
+ return normalized;
223
+ }
224
+
225
+ function serializeFileHashEntry(entry) {
226
+ if (!entry) return null;
227
+ if (typeof entry === 'string') return { hash: entry };
228
+ if (typeof entry !== 'object') return null;
229
+ if (typeof entry.hash !== 'string') return null;
230
+ const serialized = { hash: entry.hash };
231
+ if (Number.isFinite(entry.mtimeMs)) serialized.mtimeMs = entry.mtimeMs;
232
+ if (Number.isFinite(entry.size)) serialized.size = entry.size;
233
+ return serialized;
234
+ }
235
+
236
+ function computeAnnCapacity(total, config) {
237
+ const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
238
+ const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
239
+ const byFactor = Math.ceil(total * factor);
240
+ const byExtra = total + extra;
241
+ return Math.max(total, byFactor, byExtra);
242
+ }
243
+
244
+ export class EmbeddingsCache {
245
+ constructor(config) {
246
+ this.config = config;
247
+
248
+ this.vectorStore = [];
249
+ this.fileHashes = new Map();
250
+ this.isSaving = false;
251
+ this.lastIndexDurationMs = null;
252
+ this.lastIndexStats = null;
253
+
254
+ this.cacheMeta = {
255
+ version: CACHE_META_VERSION,
256
+ embeddingModel: config.embeddingModel,
257
+ embeddingDimension: config.embeddingDimension ?? null,
258
+ };
259
+
260
+ this.saveQueue = Promise.resolve();
261
+ this._saveTimer = null;
262
+ this._saveRequested = false;
263
+ this._savePromise = null;
264
+ this._saveThrowOnError = false;
265
+ this.lastSaveError = null;
266
+
267
+ this.annIndex = null;
268
+ this.annMeta = null;
269
+ this.annDirty = false;
270
+ this.annPersistDirty = false;
271
+ this.annLoading = null;
272
+ this.annVectorCache = null;
273
+
274
+ this.fileCallData = new Map();
275
+ this.callGraph = null;
276
+ this._callGraphBuild = null;
277
+
278
+ this.binaryStore = null;
279
+
280
+ this.sqliteStore = null;
281
+
282
+ this.initErrors = [];
283
+
284
+ this.activeReads = 0;
285
+ this._readWaiters = [];
286
+ this._saveInProgress = false;
287
+
288
+ this._clearedAfterIndex = false;
289
+ this._loadPromise = null;
290
+ this._corruptionDetected = false;
291
+ }
292
+
293
+ /**
294
+ * Returns true if the last load() detected binary store corruption.
295
+ * Used by the server to decide whether to trigger an automatic re-index.
296
+ */
317
297
  shouldAutoReindex() {
318
298
  return this._corruptionDetected === true;
319
299
  }
@@ -323,365 +303,344 @@ export class EmbeddingsCache {
323
303
  this._corruptionDetected = false;
324
304
  return should;
325
305
  }
326
-
327
-
328
- addInitError(stage, error) {
329
- this.initErrors.push({
330
- stage,
331
- message: error instanceof Error ? error.message : String(error),
332
- stack: error instanceof Error ? error.stack : null,
333
- timestamp: Date.now(),
334
- });
335
- }
336
-
337
- clearInMemoryState() {
338
- this.vectorStore = [];
339
- this.fileHashes.clear();
340
- this.invalidateAnnIndex();
341
- this.fileCallData.clear();
342
- this.callGraph = null;
343
- this.initErrors = [];
344
- if (this.binaryStore) {
345
- try {
346
- this.binaryStore.close?.();
347
- } catch {
348
-
349
- }
350
- this.binaryStore = null;
351
- }
352
- if (this.sqliteStore) {
353
- try {
354
- this.sqliteStore.close?.();
355
- } catch {
356
-
357
- }
358
- this.sqliteStore = null;
359
- }
360
- }
361
-
362
- async close() {
363
- if (this.binaryStore) {
364
- await this.binaryStore.close();
365
- this.binaryStore = null;
366
- }
367
- if (this.sqliteStore) {
368
- try {
369
- this.sqliteStore.close();
370
- } catch {
371
-
372
- }
373
- this.sqliteStore = null;
374
- }
375
- }
376
-
377
- async ensureLoaded({ preferDisk = false } = {}) {
378
- if (!this.config.enableCache) return;
379
- if (!this._clearedAfterIndex) return;
380
- if (this._loadPromise) return this._loadPromise;
381
-
382
- this._loadPromise = (async () => {
383
- if (preferDisk && this.config.verbose) {
384
- console.info('[Cache] ensureLoaded: forcing disk vector mode for incremental low-RAM reload');
385
- }
386
- await this.load({
387
- forceVectorLoadMode: preferDisk ? 'disk' : undefined,
388
- });
389
- this._clearedAfterIndex = false;
390
- })().finally(() => {
391
- this._loadPromise = null;
392
- });
393
-
394
- return this._loadPromise;
395
- }
396
-
397
- async dropInMemoryVectors() {
398
- if (!this.config.enableCache) return;
399
-
400
- if (this.activeReads > 0) {
401
- await this.waitForReaders();
402
- }
403
-
404
- this.vectorStore = [];
405
- this.annVectorCache = null;
406
- this.annIndex = null;
407
- this.annMeta = null;
408
- this.annDirty = true;
409
- this.annPersistDirty = false;
410
-
411
- if (this.binaryStore) {
412
- try {
413
- await this.binaryStore.close();
414
- } catch {
415
-
416
- }
417
- this.binaryStore = null;
418
- }
419
-
420
- if (this.sqliteStore) {
421
- try {
422
- this.sqliteStore.close();
423
- } catch {
424
-
425
- }
426
- this.sqliteStore = null;
427
- }
428
-
429
- this._clearedAfterIndex = true;
430
- }
431
-
432
-
433
-
434
- startRead() {
435
-
436
- if (this._saveInProgress) {
437
- throw new Error('Cache save in progress, try again shortly');
438
- }
439
- this.activeReads++;
440
- }
441
-
442
- endRead() {
443
- if (this.activeReads > 0) {
444
- this.activeReads--;
445
- if (this.activeReads === 0 && this._readWaiters.length > 0) {
446
- const waiters = this._readWaiters;
447
- this._readWaiters = [];
448
- for (const resolve of waiters) {
449
- resolve();
450
- }
451
- }
452
- }
453
- }
454
-
455
- async waitForReaders() {
456
- if (this.activeReads === 0) return;
457
- await new Promise((resolve) => {
458
- this._readWaiters.push(resolve);
459
- });
460
- }
461
-
462
- async waitForReadersWithTimeout(timeoutMs = 5000) {
463
- if (this.activeReads === 0) return true;
464
- let timedOut = false;
465
- let resolved = false;
466
- let waiterResolve;
467
- const waiterPromise = new Promise((resolve) => {
468
- waiterResolve = () => {
469
- if (!resolved) {
470
- resolved = true;
471
- resolve();
472
- }
473
- };
474
- this._readWaiters.push(waiterResolve);
475
- });
476
- await Promise.race([
477
- waiterPromise,
478
- new Promise((resolve) => {
479
- setTimeout(() => {
480
- if (!resolved) {
481
- resolved = true;
482
- timedOut = true;
483
-
484
- const idx = this._readWaiters.indexOf(waiterResolve);
485
- if (idx >= 0) this._readWaiters.splice(idx, 1);
486
- resolve();
487
- }
488
- }, timeoutMs);
489
- }),
490
- ]);
491
- if (timedOut) {
492
-
493
- console.warn(
494
- `[Cache] Timed out waiting for ${this.activeReads} active reader(s); proceeding with save anyway. ` +
495
- 'This may cause data inconsistency if readers access the store during write.'
496
- );
497
- }
498
- return !timedOut;
499
- }
500
-
501
-
502
-
503
-
504
- async reset() {
505
- this.vectorStore = [];
506
- if (this.binaryStore) {
507
- try {
508
- await this.binaryStore.close();
509
- } catch {
510
-
511
- }
512
- this.binaryStore = null;
513
- }
514
- if (this.sqliteStore) {
515
- try {
516
- this.sqliteStore.close();
517
- } catch {
518
-
519
- }
520
- this.sqliteStore = null;
521
- }
522
- this.fileHashes.clear();
523
- this.invalidateAnnIndex();
524
- await this.clearCallGraphData({ removeFile: true });
525
- this.initErrors = [];
526
- }
527
-
528
-
529
-
530
- async load({ forceVectorLoadMode } = {}) {
531
- if (!this.config.enableCache) return;
532
- this._corruptionDetected = false;
533
-
534
- try {
535
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
536
-
537
- const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
538
- const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
539
- const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
540
-
541
- const workerThresholdBytes =
542
- Number.isInteger(this.config.jsonWorkerThresholdBytes) &&
543
- this.config.jsonWorkerThresholdBytes > 0
544
- ? this.config.jsonWorkerThresholdBytes
545
- : JSON_WORKER_THRESHOLD_BYTES;
546
-
547
- const useBinary = this.config.vectorStoreFormat === 'binary';
548
- const useSqlite = this.config.vectorStoreFormat === 'sqlite';
549
-
550
- const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(
551
- this.config.cacheDirectory
552
- );
553
- const pathExists = async (targetPath) => {
554
- try {
555
- await fs.access(targetPath);
556
- return true;
557
- } catch {
558
- return false;
559
- }
560
- };
561
-
562
-
563
- let cacheData = null;
564
- let hashData = null;
565
- let prefetched = false;
566
- if (IS_TEST_ENV) {
567
- prefetched = true;
568
- const cachePromise = useBinary || useSqlite
569
- ? Promise.resolve(null)
570
- : readJsonFile(cacheFile, { workerThresholdBytes });
571
- [cacheData, hashData] = await Promise.all([
572
- cachePromise,
573
- readJsonFile(hashFile, { workerThresholdBytes }),
574
- ]);
575
- }
576
-
577
-
578
- const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
579
- if (!metaData) {
580
- console.warn('[Cache] Missing cache metadata, ignoring cache');
581
- this.clearInMemoryState();
582
- return;
583
- }
584
-
585
- let meta;
586
- try {
587
- meta = JSON.parse(metaData);
588
- } catch {
589
- console.warn('[Cache] Invalid cache metadata, ignoring cache');
590
- this.clearInMemoryState();
591
- return;
592
- }
593
-
594
- if (meta?.version !== CACHE_META_VERSION) {
595
- console.warn(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
596
- this.clearInMemoryState();
597
- return;
598
- }
599
-
600
- if (meta?.embeddingModel !== this.config.embeddingModel) {
601
- console.warn(
602
- `[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`
603
- );
604
- this.clearInMemoryState();
605
- return;
606
- }
607
- const expectedDimension = this.config.embeddingDimension ?? null;
608
- const metaDimension = meta?.embeddingDimension ?? null;
609
- if (metaDimension !== expectedDimension) {
610
- console.warn(
611
- `[Cache] Embedding dimension changed, ignoring cache (${metaDimension} -> ${expectedDimension})`
612
- );
613
- this.clearInMemoryState();
614
- return;
615
- }
616
-
617
- if (!prefetched) {
618
- [cacheData, hashData] = await Promise.all([
619
- useBinary || useSqlite ? Promise.resolve(null) : readJsonFile(cacheFile, { workerThresholdBytes }),
620
- readJsonFile(hashFile, { workerThresholdBytes }),
621
- ]);
622
- }
623
-
624
- this.cacheMeta = meta;
625
-
626
- const [binaryFilesPresent, jsonCachePresent] = await Promise.all([
627
- (async () => {
628
- const [vectorsOk, recordsOk, contentOk, filesOk] = await Promise.all([
629
- pathExists(vectorsPath),
630
- pathExists(recordsPath),
631
- pathExists(contentPath),
632
- pathExists(filesPath),
633
- ]);
634
- return vectorsOk && recordsOk && contentOk && filesOk;
635
- })(),
636
- pathExists(cacheFile),
637
- ]);
638
-
639
- if (useBinary && !binaryFilesPresent) {
640
- if (jsonCachePresent) {
641
- console.warn(
642
- '[Cache] vectorStoreFormat=binary but binary cache files are missing; embeddings.json exists. If you switched formats, reindex or set vectorStoreFormat=json.'
643
- );
644
- } else {
645
- console.warn(
646
- '[Cache] vectorStoreFormat=binary but binary cache files are missing. Reindex to regenerate the cache.'
647
- );
648
- }
649
- } else if (!useBinary && !useSqlite && !jsonCachePresent) {
650
- if (binaryFilesPresent) {
651
- console.warn(
652
- '[Cache] vectorStoreFormat=json but binary cache files exist. If you switched formats, set vectorStoreFormat=binary or reindex.'
653
- );
654
- } else {
655
- console.warn(
656
- '[Cache] vectorStoreFormat=json but embeddings.json is missing. Reindex to regenerate the cache.'
657
- );
658
- }
659
- }
660
-
661
- const configuredVectorLoadMode =
662
- typeof this.config.vectorStoreLoadMode === 'string'
663
- ? this.config.vectorStoreLoadMode.toLowerCase()
664
- : 'memory';
665
- const effectiveVectorLoadMode =
666
- forceVectorLoadMode === 'disk' || forceVectorLoadMode === 'memory'
667
- ? forceVectorLoadMode
668
- : configuredVectorLoadMode;
669
-
670
- if (useBinary) {
671
- try {
672
- this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
673
- contentCacheEntries: this.config.contentCacheEntries,
674
- vectorCacheEntries: this.config.vectorCacheEntries,
675
- vectorLoadMode: effectiveVectorLoadMode,
676
- });
677
- cacheData = await this.binaryStore.toChunkViews({
678
- includeContent: this.config.vectorStoreContentMode === 'inline',
679
- includeVector: effectiveVectorLoadMode !== 'disk',
680
- });
681
- } catch (err) {
682
- this.binaryStore = null;
683
- const isCorruption = err instanceof BinaryStoreCorruptionError ||
684
- err?.name === 'BinaryStoreCorruptionError';
306
+
307
+ addInitError(stage, error) {
308
+ this.initErrors.push({
309
+ stage,
310
+ message: error instanceof Error ? error.message : String(error),
311
+ stack: error instanceof Error ? error.stack : null,
312
+ timestamp: Date.now(),
313
+ });
314
+ }
315
+
316
+ clearInMemoryState() {
317
+ this.vectorStore = [];
318
+ this.fileHashes.clear();
319
+ this.invalidateAnnIndex();
320
+ this.fileCallData.clear();
321
+ this.callGraph = null;
322
+ this.initErrors = [];
323
+ if (this.binaryStore) {
324
+ try {
325
+ this.binaryStore.close?.();
326
+ } catch {}
327
+ this.binaryStore = null;
328
+ }
329
+ if (this.sqliteStore) {
330
+ try {
331
+ this.sqliteStore.close?.();
332
+ } catch {}
333
+ this.sqliteStore = null;
334
+ }
335
+ }
336
+
337
+ async close() {
338
+ if (this.binaryStore) {
339
+ await this.binaryStore.close();
340
+ this.binaryStore = null;
341
+ }
342
+ if (this.sqliteStore) {
343
+ try {
344
+ this.sqliteStore.close();
345
+ } catch {}
346
+ this.sqliteStore = null;
347
+ }
348
+ }
349
+
350
+ async ensureLoaded({ preferDisk = false } = {}) {
351
+ if (!this.config.enableCache) return;
352
+ if (!this._clearedAfterIndex) return;
353
+ if (this._loadPromise) return this._loadPromise;
354
+
355
+ this._loadPromise = (async () => {
356
+ if (preferDisk && this.config.verbose) {
357
+ console.info(
358
+ '[Cache] ensureLoaded: forcing disk vector mode for incremental low-RAM reload'
359
+ );
360
+ }
361
+ await this.load({
362
+ forceVectorLoadMode: preferDisk ? 'disk' : undefined,
363
+ });
364
+ this._clearedAfterIndex = false;
365
+ })().finally(() => {
366
+ this._loadPromise = null;
367
+ });
368
+
369
+ return this._loadPromise;
370
+ }
371
+
372
+ async dropInMemoryVectors() {
373
+ if (!this.config.enableCache) return;
374
+
375
+ if (this.activeReads > 0) {
376
+ await this.waitForReaders();
377
+ }
378
+
379
+ this.vectorStore = [];
380
+ this.annVectorCache = null;
381
+ this.annIndex = null;
382
+ this.annMeta = null;
383
+ this.annDirty = true;
384
+ this.annPersistDirty = false;
385
+
386
+ if (this.binaryStore) {
387
+ try {
388
+ await this.binaryStore.close();
389
+ } catch {}
390
+ this.binaryStore = null;
391
+ }
392
+
393
+ if (this.sqliteStore) {
394
+ try {
395
+ this.sqliteStore.close();
396
+ } catch {}
397
+ this.sqliteStore = null;
398
+ }
399
+
400
+ this._clearedAfterIndex = true;
401
+ }
402
+
403
+ startRead() {
404
+ if (this._saveInProgress) {
405
+ throw new Error('Cache save in progress, try again shortly');
406
+ }
407
+ this.activeReads++;
408
+ }
409
+
410
+ endRead() {
411
+ if (this.activeReads > 0) {
412
+ this.activeReads--;
413
+ if (this.activeReads === 0 && this._readWaiters.length > 0) {
414
+ const waiters = this._readWaiters;
415
+ this._readWaiters = [];
416
+ for (const resolve of waiters) {
417
+ resolve();
418
+ }
419
+ }
420
+ }
421
+ }
422
+
423
+ async waitForReaders() {
424
+ if (this.activeReads === 0) return;
425
+ await new Promise((resolve) => {
426
+ this._readWaiters.push(resolve);
427
+ });
428
+ }
429
+
430
+ async waitForReadersWithTimeout(timeoutMs = 5000) {
431
+ if (this.activeReads === 0) return true;
432
+ let timedOut = false;
433
+ let resolved = false;
434
+ let waiterResolve;
435
+ const waiterPromise = new Promise((resolve) => {
436
+ waiterResolve = () => {
437
+ if (!resolved) {
438
+ resolved = true;
439
+ resolve();
440
+ }
441
+ };
442
+ this._readWaiters.push(waiterResolve);
443
+ });
444
+ await Promise.race([
445
+ waiterPromise,
446
+ new Promise((resolve) => {
447
+ setTimeout(() => {
448
+ if (!resolved) {
449
+ resolved = true;
450
+ timedOut = true;
451
+
452
+ const idx = this._readWaiters.indexOf(waiterResolve);
453
+ if (idx >= 0) this._readWaiters.splice(idx, 1);
454
+ resolve();
455
+ }
456
+ }, timeoutMs);
457
+ }),
458
+ ]);
459
+ if (timedOut) {
460
+ console.warn(
461
+ `[Cache] Timed out waiting for ${this.activeReads} active reader(s); proceeding with save anyway. ` +
462
+ 'This may cause data inconsistency if readers access the store during write.'
463
+ );
464
+ }
465
+ return !timedOut;
466
+ }
467
+
468
+ async reset() {
469
+ this.vectorStore = [];
470
+ if (this.binaryStore) {
471
+ try {
472
+ await this.binaryStore.close();
473
+ } catch {}
474
+ this.binaryStore = null;
475
+ }
476
+ if (this.sqliteStore) {
477
+ try {
478
+ this.sqliteStore.close();
479
+ } catch {}
480
+ this.sqliteStore = null;
481
+ }
482
+ this.fileHashes.clear();
483
+ this.invalidateAnnIndex();
484
+ await this.clearCallGraphData({ removeFile: true });
485
+ this.initErrors = [];
486
+ }
487
+
488
+ async load({ forceVectorLoadMode } = {}) {
489
+ if (!this.config.enableCache) return;
490
+ this._corruptionDetected = false;
491
+
492
+ try {
493
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
494
+
495
+ const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
496
+ const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
497
+ const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
498
+
499
+ const workerThresholdBytes =
500
+ Number.isInteger(this.config.jsonWorkerThresholdBytes) &&
501
+ this.config.jsonWorkerThresholdBytes > 0
502
+ ? this.config.jsonWorkerThresholdBytes
503
+ : JSON_WORKER_THRESHOLD_BYTES;
504
+
505
+ const useBinary = this.config.vectorStoreFormat === 'binary';
506
+ const useSqlite = this.config.vectorStoreFormat === 'sqlite';
507
+
508
+ const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(
509
+ this.config.cacheDirectory
510
+ );
511
+ const pathExists = async (targetPath) => {
512
+ try {
513
+ await fs.access(targetPath);
514
+ return true;
515
+ } catch {
516
+ return false;
517
+ }
518
+ };
519
+
520
+ let cacheData = null;
521
+ let hashData = null;
522
+ let prefetched = false;
523
+ if (IS_TEST_ENV) {
524
+ prefetched = true;
525
+ const cachePromise =
526
+ useBinary || useSqlite
527
+ ? Promise.resolve(null)
528
+ : readJsonFile(cacheFile, { workerThresholdBytes });
529
+ [cacheData, hashData] = await Promise.all([
530
+ cachePromise,
531
+ readJsonFile(hashFile, { workerThresholdBytes }),
532
+ ]);
533
+ }
534
+
535
+ const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
536
+ if (!metaData) {
537
+ console.warn('[Cache] Missing cache metadata, ignoring cache');
538
+ this.clearInMemoryState();
539
+ return;
540
+ }
541
+
542
+ let meta;
543
+ try {
544
+ meta = JSON.parse(metaData);
545
+ } catch {
546
+ console.warn('[Cache] Invalid cache metadata, ignoring cache');
547
+ this.clearInMemoryState();
548
+ return;
549
+ }
550
+
551
+ if (meta?.version !== CACHE_META_VERSION) {
552
+ console.warn(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
553
+ this.clearInMemoryState();
554
+ return;
555
+ }
556
+
557
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
558
+ console.warn(
559
+ `[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`
560
+ );
561
+ this.clearInMemoryState();
562
+ return;
563
+ }
564
+ const expectedDimension = this.config.embeddingDimension ?? null;
565
+ const metaDimension = meta?.embeddingDimension ?? null;
566
+ if (metaDimension !== expectedDimension) {
567
+ console.warn(
568
+ `[Cache] Embedding dimension changed, ignoring cache (${metaDimension} -> ${expectedDimension})`
569
+ );
570
+ this.clearInMemoryState();
571
+ return;
572
+ }
573
+
574
+ if (!prefetched) {
575
+ [cacheData, hashData] = await Promise.all([
576
+ useBinary || useSqlite
577
+ ? Promise.resolve(null)
578
+ : readJsonFile(cacheFile, { workerThresholdBytes }),
579
+ readJsonFile(hashFile, { workerThresholdBytes }),
580
+ ]);
581
+ }
582
+
583
+ this.cacheMeta = meta;
584
+
585
+ const [binaryFilesPresent, jsonCachePresent] = await Promise.all([
586
+ (async () => {
587
+ const [vectorsOk, recordsOk, contentOk, filesOk] = await Promise.all([
588
+ pathExists(vectorsPath),
589
+ pathExists(recordsPath),
590
+ pathExists(contentPath),
591
+ pathExists(filesPath),
592
+ ]);
593
+ return vectorsOk && recordsOk && contentOk && filesOk;
594
+ })(),
595
+ pathExists(cacheFile),
596
+ ]);
597
+
598
+ if (useBinary && !binaryFilesPresent) {
599
+ if (jsonCachePresent) {
600
+ console.warn(
601
+ '[Cache] vectorStoreFormat=binary but binary cache files are missing; embeddings.json exists. If you switched formats, reindex or set vectorStoreFormat=json.'
602
+ );
603
+ } else {
604
+ console.warn(
605
+ '[Cache] vectorStoreFormat=binary but binary cache files are missing. Reindex to regenerate the cache.'
606
+ );
607
+ }
608
+ } else if (!useBinary && !useSqlite && !jsonCachePresent) {
609
+ if (binaryFilesPresent) {
610
+ console.warn(
611
+ '[Cache] vectorStoreFormat=json but binary cache files exist. If you switched formats, set vectorStoreFormat=binary or reindex.'
612
+ );
613
+ } else {
614
+ console.warn(
615
+ '[Cache] vectorStoreFormat=json but embeddings.json is missing. Reindex to regenerate the cache.'
616
+ );
617
+ }
618
+ }
619
+
620
+ const configuredVectorLoadMode =
621
+ typeof this.config.vectorStoreLoadMode === 'string'
622
+ ? this.config.vectorStoreLoadMode.toLowerCase()
623
+ : 'memory';
624
+ const effectiveVectorLoadMode =
625
+ forceVectorLoadMode === 'disk' || forceVectorLoadMode === 'memory'
626
+ ? forceVectorLoadMode
627
+ : configuredVectorLoadMode;
628
+
629
+ if (useBinary) {
630
+ try {
631
+ this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
632
+ contentCacheEntries: this.config.contentCacheEntries,
633
+ vectorCacheEntries: this.config.vectorCacheEntries,
634
+ vectorLoadMode: effectiveVectorLoadMode,
635
+ });
636
+ cacheData = await this.binaryStore.toChunkViews({
637
+ includeContent: this.config.vectorStoreContentMode === 'inline',
638
+ includeVector: effectiveVectorLoadMode !== 'disk',
639
+ });
640
+ } catch (err) {
641
+ this.binaryStore = null;
642
+ const isCorruption =
643
+ err instanceof BinaryStoreCorruptionError || err?.name === 'BinaryStoreCorruptionError';
685
644
  if (isCorruption) {
686
645
  console.warn(`[Cache] Binary store corruption detected: ${err.message}`);
687
646
  this._corruptionDetected = true;
@@ -693,1182 +652,1123 @@ export class EmbeddingsCache {
693
652
  } else {
694
653
  console.warn(`[Cache] Failed to load binary vector store: ${err.message}`);
695
654
  }
696
- }
697
- }
698
-
699
-
700
- if (useSqlite) {
701
- try {
702
- this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
703
- if (this.sqliteStore) {
704
- cacheData = this.sqliteStore.toChunkViews({
705
- includeContent: this.config.vectorStoreContentMode === 'inline',
706
- includeVector: effectiveVectorLoadMode !== 'disk',
707
- });
708
- } else {
709
-
710
- console.warn('[Cache] vectorStoreFormat=sqlite but vectors.sqlite is missing. Reindex to regenerate the cache.');
711
- }
712
- } catch (err) {
713
- this.sqliteStore = null;
714
- console.warn(`[Cache] Failed to load SQLite vector store: ${err.message}`);
715
- }
716
- }
717
-
718
- if (!cacheData) {
719
- cacheData = await readJsonFile(cacheFile, { workerThresholdBytes });
720
- }
721
-
722
- const hasCacheData = Array.isArray(cacheData);
723
- const hasHashData = hashData && typeof hashData === 'object';
724
-
725
- if (hasCacheData) {
726
- const allowedExtensions = new Set(
727
- (this.config.fileExtensions || []).map((ext) => `.${ext}`)
728
- );
729
- const allowedFileNames = new Set(this.config.fileNames || []);
730
- const applyExtensionFilter = !this.binaryStore;
731
- const shouldKeepFile = (filePath) => {
732
- const ext = path.extname(filePath);
733
- if (allowedExtensions.has(ext)) return true;
734
- return allowedFileNames.has(path.basename(filePath));
735
- };
736
-
737
- const rawHashes = hasHashData ? new Map(Object.entries(hashData)) : new Map();
738
- this.vectorStore = [];
739
- this.fileHashes.clear();
740
-
741
-
742
- for (const chunk of cacheData) {
743
- if (applyExtensionFilter) {
744
- if (!shouldKeepFile(chunk.file)) continue;
745
- }
746
- normalizeChunkVector(chunk);
747
- this.vectorStore.push(chunk);
748
- }
749
- const filteredCount = cacheData.length - this.vectorStore.length;
750
- if (filteredCount > 0 && this.config.verbose) {
751
- console.info(`[Cache] Filtered ${filteredCount} outdated cache entries`);
752
- }
753
-
754
- if (hasHashData) {
755
-
756
- for (const [file, entry] of rawHashes) {
757
- if (!applyExtensionFilter || shouldKeepFile(file)) {
758
- const normalized = normalizeFileHashEntry(entry);
759
- if (normalized) {
760
- this.fileHashes.set(file, normalized);
761
- }
762
- }
763
- }
764
- } else {
765
- console.warn(
766
- '[Cache] Missing file-hashes.json; loaded embeddings but hashes were cleared'
767
- );
768
- }
769
-
770
- assignChunkIndices(this.vectorStore);
771
-
772
- if (this.config.verbose) {
773
- console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
774
- }
775
-
776
-
777
- this.annDirty = false;
778
- this.annPersistDirty = false;
779
- this.annIndex = null;
780
- this.annMeta = null;
781
- this.annVectorCache = null;
782
- } else if (cacheData) {
783
- console.warn('[Cache] Cache data is not an array; ignoring cached embeddings');
784
- } else if (hasHashData) {
785
- console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
786
- }
787
-
788
-
789
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
790
- try {
791
- const callGraphData = await fs.readFile(callGraphFile, 'utf8');
792
- const parsed = JSON.parse(callGraphData);
793
- this.fileCallData = new Map(Object.entries(parsed));
794
- if (this.config.verbose) {
795
- console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
796
- }
797
- } catch {
798
-
799
- }
800
- } catch (error) {
801
- console.warn('[Cache] Failed to load cache:', error.message);
802
- this.clearInMemoryState();
803
- }
804
- }
805
-
806
-
807
-
808
- save({ throwOnError = false } = {}) {
809
- if (!this.config.enableCache) return Promise.resolve();
810
-
811
- this._saveRequested = true;
812
- if (throwOnError) {
813
- this._saveThrowOnError = true;
814
- }
815
-
816
- if (this._saveTimer) return this._savePromise ?? Promise.resolve();
817
-
818
- const debounceMs = Number.isInteger(this.config.saveDebounceMs)
819
- ? this.config.saveDebounceMs
820
- : 250;
821
-
822
- this._savePromise = new Promise((resolve, reject) => {
823
- this._saveTimer = setTimeout(() => {
824
- this._saveTimer = null;
825
- const rejectOnSaveError = this._saveThrowOnError;
826
- this._saveThrowOnError = false;
827
-
828
- this.saveQueue = this.saveQueue
829
- .catch(() => {
830
-
831
- })
832
- .then(async () => {
833
- while (this._saveRequested) {
834
- this._saveRequested = false;
835
- await this.performSave({ throwOnError: rejectOnSaveError });
836
- }
837
- })
838
- .then(resolve, reject)
839
- .finally(() => {
840
- this._savePromise = null;
841
- });
842
- }, debounceMs);
843
- });
844
-
845
- return this._savePromise;
846
- }
847
-
848
- async performSave({ throwOnError = false } = {}) {
849
-
850
- this._saveInProgress = true;
851
- if (
852
- this.config.allowSystemWorkspaceCache !== true &&
853
- this.config.searchDirectory &&
854
- isNonProjectDirectory(this.config.searchDirectory)
855
- ) {
856
- const source = this.config.workspaceResolution?.source || 'unknown';
857
- console.warn(
858
- `[Cache] Skipping cache save for non-project workspace (${source}): ${this.config.searchDirectory}`
859
- );
860
- this._saveInProgress = false;
861
- return;
862
- }
863
-
864
-
865
- if (this.activeReads > 0) {
866
- const timeoutMs = this.config.saveReaderWaitTimeoutMs ?? DEFAULT_READER_WAIT_TIMEOUT_MS;
867
- const allReadersFinished = await this.waitForReadersWithTimeout(timeoutMs);
868
- if (!allReadersFinished && !this.config.forceSaveWithActiveReaders) {
869
- console.warn('[Cache] Aborting save - active readers still present after timeout');
870
- this._saveInProgress = false;
871
- return;
872
- }
873
- }
874
-
875
- this.isSaving = true;
876
-
877
- try {
878
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
879
-
880
- const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
881
- const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
882
- const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
883
-
884
-
885
-
886
- const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
887
- const supportsBackendVectorResolve =
888
- this.config.vectorStoreFormat === 'binary' || this.config.vectorStoreFormat === 'sqlite';
889
- const hasMissingVectors = snapshotStore.some(
890
- (chunk) => chunk && (chunk.vector === undefined || chunk.vector === null)
891
- );
892
- const useDiskVectors =
893
- supportsBackendVectorResolve &&
894
- (this.config.vectorStoreLoadMode === 'disk' || hasMissingVectors);
895
- if (hasMissingVectors && !useDiskVectors) {
896
- throw new Error(
897
- 'Missing vector data for cache write and backend vector resolution is unavailable'
898
- );
899
- }
900
-
901
- this.cacheMeta = {
902
- version: CACHE_META_VERSION,
903
- embeddingModel: this.config.embeddingModel,
904
- embeddingDimension: this.config.embeddingDimension ?? null,
905
- lastSaveTime: new Date().toISOString(),
906
- filesIndexed: this.fileHashes.size,
907
- chunksStored: snapshotStore.length,
908
- workspace: this.config.searchDirectory || null,
909
- };
910
- if (Number.isFinite(this.lastIndexDurationMs) && this.lastIndexDurationMs >= 0) {
911
- this.cacheMeta.indexDurationMs = Math.round(this.lastIndexDurationMs);
912
- }
913
- if (this.lastIndexStats && typeof this.lastIndexStats === 'object') {
914
- Object.assign(this.cacheMeta, this.lastIndexStats);
915
- }
916
-
917
- const total = snapshotStore.length;
918
- if (this.config.vectorStoreFormat === 'binary') {
919
- this.binaryStore = await BinaryVectorStore.write(
920
- this.config.cacheDirectory,
921
- snapshotStore,
922
- {
923
- contentCacheEntries: this.config.contentCacheEntries,
924
- vectorCacheEntries: this.config.vectorCacheEntries,
925
- vectorLoadMode: useDiskVectors ? 'disk' : this.config.vectorStoreLoadMode,
926
- getContent: (chunk, index) => this.getChunkContent(chunk, index),
927
- getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
928
- preRename: async () => {
929
- if (this.activeReads > 0) {
930
- await this.waitForReadersWithTimeout(
931
- Number.isInteger(this.config.saveReaderWaitTimeoutMs)
932
- ? this.config.saveReaderWaitTimeoutMs
933
- : 5000
934
- );
935
- }
936
- if (this.binaryStore) {
937
- await this.binaryStore.close();
938
- this.binaryStore = null;
939
- }
940
- },
941
- }
942
- );
943
- if (this.binaryStore) {
944
- this.cacheMeta.chunksStored = this.binaryStore.length;
945
- }
946
- } else if (this.config.vectorStoreFormat === 'sqlite') {
947
-
948
- if (this.sqliteStore) {
949
- try {
950
- this.sqliteStore.close();
951
- } catch {
952
-
953
- }
954
- this.sqliteStore = null;
955
- }
956
- this.sqliteStore = await SqliteVectorStore.write(
957
- this.config.cacheDirectory,
958
- snapshotStore,
959
- {
960
- getContent: (chunk, index) => this.getChunkContent(chunk, index),
961
- getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
962
- preRename: async () => {
963
- if (this.activeReads > 0) {
964
- await this.waitForReadersWithTimeout(
965
- Number.isInteger(this.config.saveReaderWaitTimeoutMs)
966
- ? this.config.saveReaderWaitTimeoutMs
967
- : 5000
968
- );
969
- }
970
- },
971
- }
972
- );
973
- if (this.sqliteStore) {
974
- this.cacheMeta.chunksStored = this.sqliteStore.length();
975
- }
976
- } else {
977
- const vectorWriter = new StreamingJsonWriter(cacheFile, {
978
- highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
979
- floatDigits: this.config.cacheVectorFloatDigits ?? 6,
980
- flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
981
- indent: '',
982
- assumeFinite: this.config.cacheVectorAssumeFinite,
983
- checkFinite: this.config.cacheVectorCheckFinite,
984
- noMutation: this.config.cacheVectorNoMutation ?? false,
985
- joinThreshold: this.config.cacheVectorJoinThreshold ?? 8192,
986
- joinChunkSize: this.config.cacheVectorJoinChunkSize ?? 2048,
987
- });
988
-
989
- await vectorWriter.writeStart();
990
-
991
-
992
- const yieldEvery = total >= 50_000 ? 5000 : 0;
993
-
994
- try {
995
- for (let i = 0; i < total; i++) {
996
- const pending = vectorWriter.writeItem(snapshotStore[i]);
997
- if (pending) await pending;
998
- if (yieldEvery && i > 0 && i % yieldEvery === 0) await yieldToLoop();
999
- }
1000
- await vectorWriter.writeEnd();
1001
- } catch (e) {
1002
- vectorWriter.abort(e);
1003
- throw e;
1004
- }
1005
- }
1006
-
1007
- const hashEntries = {};
1008
- for (const [file, entry] of this.fileHashes) {
1009
- const serialized = serializeFileHashEntry(entry);
1010
- if (serialized) {
1011
- hashEntries[file] = serialized;
1012
- }
1013
- }
1014
-
1015
- await Promise.all([
1016
- fs.writeFile(hashFile, JSON.stringify(hashEntries, null, 2)),
1017
- fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
1018
- ]);
1019
-
1020
-
1021
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
1022
- if (this.fileCallData.size > 0) {
1023
- await fs.writeFile(
1024
- callGraphFile,
1025
- JSON.stringify(Object.fromEntries(this.fileCallData), null, 2)
1026
- );
1027
- } else {
1028
- await fs.rm(callGraphFile, { force: true });
1029
- }
1030
-
1031
-
1032
-
1033
- if (
1034
- this.config.annIndexCache !== false &&
1035
- this.annPersistDirty &&
1036
- !this.annDirty &&
1037
- !this._annWriting &&
1038
- this.annIndex &&
1039
- this.annMeta
1040
- ) {
1041
- this._annWriting = true;
1042
- try {
1043
- const { indexFile, metaFile: annMetaFile } = this.getAnnIndexPaths();
1044
- this.annIndex.writeIndexSync(indexFile);
1045
- await fs.writeFile(annMetaFile, JSON.stringify(this.annMeta, null, 2));
1046
- this.annPersistDirty = false;
1047
- if (this.config.verbose) {
1048
- console.info(`[ANN] Persisted updated ANN index (${this.annMeta.count} vectors)`);
1049
- }
1050
- } catch (error) {
1051
- console.warn(`[ANN] Failed to persist ANN index: ${error.message}`);
1052
- } finally {
1053
- this._annWriting = false;
1054
- }
1055
- }
1056
- this.lastSaveError = null;
1057
- } catch (error) {
1058
- this.lastSaveError = error instanceof Error ? error : new Error(String(error));
1059
- console.warn('[Cache] Failed to save cache:', this.lastSaveError.message);
1060
-
1061
- if (
1062
- this.config.vectorStoreFormat === 'binary' &&
1063
- this.binaryStore &&
1064
- !this.binaryStore.vectorsBuffer
1065
- ) {
1066
- try {
1067
- console.info('[Cache] Attempting to recover binary store after failed save...');
1068
- this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
1069
- contentCacheEntries: this.config.contentCacheEntries,
1070
- });
1071
- console.info('[Cache] Binary store recovered.');
1072
- } catch (recoverErr) {
1073
- console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
1074
- this.binaryStore = null;
1075
- }
1076
- }
1077
-
1078
- if (
1079
- this.config.vectorStoreFormat === 'sqlite' &&
1080
- !this.sqliteStore
1081
- ) {
1082
- try {
1083
- console.info('[Cache] Attempting to recover SQLite store after failed save...');
1084
- this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
1085
- if (this.sqliteStore) {
1086
- console.info('[Cache] SQLite store recovered.');
1087
- }
1088
- } catch (recoverErr) {
1089
- console.warn(`[Cache] Failed to recover SQLite store: ${recoverErr.message}`);
1090
- this.sqliteStore = null;
1091
- }
1092
- }
1093
- if (throwOnError) {
1094
- const wrapped = new Error(`Cache save failed: ${this.lastSaveError.message}`);
1095
- wrapped.cause = this.lastSaveError;
1096
- throw wrapped;
1097
- }
1098
- } finally {
1099
- this.isSaving = false;
1100
- this._saveInProgress = false;
1101
- }
1102
- }
1103
-
1104
-
1105
-
1106
- getVectorStore() {
1107
- return Array.isArray(this.vectorStore) ? this.vectorStore : [];
1108
- }
1109
-
1110
- async setVectorStore(store) {
1111
- const previousBinaryStore = this.binaryStore;
1112
- const previousSqliteStore = this.sqliteStore;
1113
- this.vectorStore = store;
1114
- this.binaryStore = null;
1115
- this.sqliteStore = null;
1116
- if (Array.isArray(this.vectorStore)) {
1117
- for (const chunk of this.vectorStore) normalizeChunkVector(chunk);
1118
- assignChunkIndices(this.vectorStore);
1119
- }
1120
- this.invalidateAnnIndex();
1121
- if (previousBinaryStore) {
1122
- try {
1123
- await previousBinaryStore.close();
1124
- } catch {
1125
-
1126
- }
1127
- }
1128
- if (previousSqliteStore) {
1129
- try {
1130
- previousSqliteStore.close();
1131
- } catch {
1132
-
1133
- }
1134
- }
1135
- }
1136
-
1137
- setLastIndexDuration(durationMs) {
1138
- if (Number.isFinite(durationMs) && durationMs >= 0) {
1139
- this.lastIndexDurationMs = durationMs;
1140
- }
1141
- }
1142
-
1143
- setLastIndexStats(stats) {
1144
- if (stats && typeof stats === 'object') {
1145
- this.lastIndexStats = { ...stats };
1146
- }
1147
- }
1148
-
1149
- getFileHash(file) {
1150
- const entry = this.fileHashes.get(file);
1151
- if (typeof entry === 'string') return entry;
1152
- return entry?.hash;
1153
- }
1154
-
1155
- getFileHashKeys() {
1156
- return Array.from(this.fileHashes.keys());
1157
- }
1158
-
1159
- getFileHashCount() {
1160
- return this.fileHashes.size;
1161
- }
1162
-
1163
- clearFileHashes() {
1164
- this.fileHashes.clear();
1165
- }
1166
-
1167
- setFileHashes(entries) {
1168
- this.fileHashes.clear();
1169
- if (!entries || typeof entries !== 'object') return;
1170
- const iterator =
1171
- entries instanceof Map
1172
- ? entries.entries()
1173
- : Object.entries(entries);
1174
- if (!iterator) return;
1175
- for (const [file, entry] of iterator) {
1176
- const normalized = normalizeFileHashEntry(entry);
1177
- if (normalized) {
1178
- this.fileHashes.set(file, normalized);
1179
- }
1180
- }
1181
- }
1182
-
1183
- setFileHash(file, hash, meta = null) {
1184
- const entry = { hash };
1185
- if (meta && typeof meta === 'object') {
1186
- if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
1187
- if (Number.isFinite(meta.size)) entry.size = meta.size;
1188
- }
1189
- this.fileHashes.set(file, entry);
1190
- }
1191
-
1192
- getFileMeta(file) {
1193
- const entry = this.fileHashes.get(file);
1194
- if (!entry) return null;
1195
- if (typeof entry === 'string') return { hash: entry };
1196
- return entry;
1197
- }
1198
-
1199
- getChunkVector(chunk, index = null) {
1200
- if (typeof chunk === 'number') {
1201
- const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
1202
- const entry = store ? store[chunk] : null;
1203
- if (entry?.vector) return entry.vector;
1204
- if (this.binaryStore) {
1205
- const resolved = Number.isInteger(entry?._binaryIndex) ? entry._binaryIndex : chunk;
1206
- return this.binaryStore.getVector(resolved);
1207
- }
1208
- if (this.sqliteStore) {
1209
- const resolved = Number.isInteger(entry?._sqliteIndex) ? entry._sqliteIndex : chunk;
1210
- return this.sqliteStore.getVector(resolved);
1211
- }
1212
- return null;
1213
- }
1214
-
1215
- if (chunk?.vector) return chunk.vector;
1216
- const resolved = Number.isInteger(index) ? index : chunk?._index;
1217
- if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
1218
- return this.binaryStore.getVector(chunk._binaryIndex);
1219
- }
1220
- if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
1221
- return this.binaryStore.getVector(resolved);
1222
- }
1223
- if (this.sqliteStore) {
1224
- const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
1225
- ? chunk._sqliteIndex
1226
- : Number.isInteger(chunk?.index)
1227
- ? chunk.index
1228
- : resolved;
1229
- if (Number.isInteger(sqliteIndex)) {
1230
- return this.sqliteStore.getVector(sqliteIndex);
1231
- }
1232
- }
1233
- return null;
1234
- }
1235
-
1236
- async getChunkContent(chunk, index = null) {
1237
- if (typeof chunk === 'number') {
1238
- const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
1239
- const entry = store ? store[chunk] : null;
1240
- if (entry) return await this.getChunkContent(entry, chunk);
1241
- if (!store && this.binaryStore) {
1242
- const content = await this.binaryStore.getContent(chunk);
1243
- return content ?? '';
1244
- }
1245
- if (!store && this.sqliteStore) {
1246
- return this.sqliteStore.getContent(chunk) ?? '';
1247
- }
1248
- return '';
1249
- }
1250
- if (chunk?.content !== undefined && chunk?.content !== null) {
1251
- return chunk.content;
1252
- }
1253
- if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
1254
- const content = await this.binaryStore.getContent(chunk._binaryIndex);
1255
- return content ?? '';
1256
- }
1257
- const resolved = Number.isInteger(index) ? index : chunk?._index;
1258
- if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
1259
- const content = await this.binaryStore.getContent(resolved);
1260
- return content ?? '';
1261
- }
1262
- if (this.sqliteStore) {
1263
- const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
1264
- ? chunk._sqliteIndex
1265
- : Number.isInteger(chunk?.index)
1266
- ? chunk.index
1267
- : resolved;
1268
- if (Number.isInteger(sqliteIndex)) {
1269
- return this.sqliteStore.getContent(sqliteIndex) ?? '';
1270
- }
1271
- }
1272
- return '';
1273
- }
1274
-
1275
- deleteFileHash(file) {
1276
- this.fileHashes.delete(file);
1277
- }
1278
-
1279
-
1280
- async removeFileFromStore(file) {
1281
- if (!Array.isArray(this.vectorStore)) return;
1282
-
1283
- let w = 0;
1284
- for (let r = 0; r < this.vectorStore.length; r++) {
1285
- const chunk = this.vectorStore[r];
1286
- if (chunk.file !== file) {
1287
- chunk._index = w;
1288
- this.vectorStore[w++] = chunk;
1289
- }
1290
- }
1291
- this.vectorStore.length = w;
1292
-
1293
-
1294
- this.invalidateAnnIndex();
1295
- this.removeFileCallData(file);
1296
-
1297
- this.fileHashes.delete(file);
1298
- }
1299
-
1300
- addToStore(chunk) {
1301
- normalizeChunkVector(chunk);
1302
-
1303
- if (!Array.isArray(this.vectorStore)) {
1304
- this.vectorStore = [];
1305
- }
1306
-
1307
- const label = this.vectorStore.length;
1308
- chunk._index = label;
1309
- this.vectorStore.push(chunk);
1310
- if (Array.isArray(this.annVectorCache) && this.annVectorCache.length === label) {
1311
- this.annVectorCache.push(chunk.vector);
1312
- }
1313
-
1314
-
1315
- if (
1316
- this.annIndex &&
1317
- !this.annDirty &&
1318
- this.annMeta &&
1319
- typeof this.annIndex.addPoint === 'function' &&
1320
- this.annMeta.count === label &&
1321
- this.annMeta.maxElements > this.annMeta.count
1322
- ) {
1323
- try {
1324
- this.annIndex.addPoint(chunk.vector, label);
1325
- this.annMeta.count += 1;
1326
- this.annPersistDirty = true;
1327
- return;
1328
- } catch {
1329
-
1330
- }
1331
- }
1332
-
1333
- this.invalidateAnnIndex();
1334
- }
1335
-
1336
- invalidateAnnIndex() {
1337
- this.annIndex = null;
1338
- this.annMeta = null;
1339
- this.annDirty = true;
1340
- this.annPersistDirty = false;
1341
- this.annVectorCache = null;
1342
- }
1343
-
1344
- getAnnVector(index) {
1345
- if (!Array.isArray(this.vectorStore)) return null;
1346
- const chunk = this.vectorStore[index];
1347
- if (!chunk) return null;
1348
-
1349
- if (
1350
- !Array.isArray(this.annVectorCache) ||
1351
- this.annVectorCache.length !== this.vectorStore.length
1352
- ) {
1353
- this.annVectorCache = new Array(this.vectorStore.length);
1354
- }
1355
-
1356
- const cached = this.annVectorCache[index];
1357
- if (cached) return cached;
1358
-
1359
- let vec = null;
1360
- if (chunk.vector) {
1361
- vec = ensureFloat32(chunk.vector);
1362
- } else if (this.binaryStore && Number.isInteger(chunk._binaryIndex)) {
1363
- vec = this.binaryStore.getVector(chunk._binaryIndex);
1364
- } else if (this.sqliteStore) {
1365
- const sqliteIndex = Number.isInteger(chunk._sqliteIndex)
1366
- ? chunk._sqliteIndex
1367
- : Number.isInteger(chunk.index)
1368
- ? chunk.index
1369
- : index;
1370
- if (Number.isInteger(sqliteIndex)) {
1371
- vec = this.sqliteStore.getVector(sqliteIndex);
1372
- }
1373
- }
1374
-
1375
- if (!vec) return null;
1376
-
1377
- if (this.config.vectorStoreLoadMode !== 'disk') {
1378
- chunk.vector = vec;
1379
- }
1380
- this.annVectorCache[index] = vec;
1381
- return vec;
1382
- }
1383
-
1384
- getAnnIndexPaths() {
1385
- return {
1386
- indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
1387
- metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE),
1388
- };
1389
- }
1390
-
1391
-
1392
-
1393
-
1394
- async ensureAnnIndex() {
1395
- if (!this.config.annEnabled) return null;
1396
- if (!Array.isArray(this.vectorStore)) return null;
1397
- if (this.vectorStore.length < (this.config.annMinChunks ?? 5000)) return null;
1398
- if (this.annIndex && !this.annDirty) return this.annIndex;
1399
- if (this.annLoading) return this.annLoading;
1400
-
1401
- this.annLoading = (async () => {
1402
- try {
1403
- const HierarchicalNSW = await loadHnswlib();
1404
- if (!HierarchicalNSW) {
1405
- if (hnswlibLoadError) {
1406
- this.addInitError('loadHnswlib', hnswlibLoadError);
1407
- }
1408
- return null;
1409
- }
1410
-
1411
- const dim =
1412
- this.vectorStore[0]?.vector?.length ||
1413
- this.binaryStore?.dim ||
1414
- this.sqliteStore?.dim;
1415
- if (!dim) return null;
1416
-
1417
-
1418
-
1419
- let dimensionMismatch = false;
1420
- const sampleSize = Math.min(ANN_DIMENSION_SAMPLE_SIZE, this.vectorStore.length);
1421
- const step = Math.max(1, Math.floor(this.vectorStore.length / sampleSize));
1422
- for (let i = step; i < this.vectorStore.length; i += step) {
1423
- const v = this.vectorStore[i]?.vector;
1424
- if (v && v.length !== dim) {
1425
- dimensionMismatch = true;
1426
- console.warn(
1427
- `[ANN] Dimension mismatch at index ${i}: expected ${dim}, got ${v.length}. ` +
1428
- 'This may indicate a config change mid-index. Consider full reindex.'
1429
- );
1430
- break;
1431
- }
1432
- }
1433
-
1434
- if (dimensionMismatch) {
1435
- this.addInitError('ensureAnnIndex', `Vector dimension inconsistency detected. Expected ${dim}. Full reindex required.`);
1436
- return null;
1437
- }
1438
-
1439
- if (!this.annDirty && this.config.annIndexCache !== false) {
1440
- const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
1441
- if (loaded) return this.annIndex;
1442
- }
1443
-
1444
- return await this.buildAnnIndex(HierarchicalNSW, dim);
1445
- } finally {
1446
- this.annLoading = null;
1447
- }
1448
- })();
1449
-
1450
- return this.annLoading;
1451
- }
1452
-
1453
- async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
1454
- const { indexFile, metaFile } = this.getAnnIndexPaths();
1455
- const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
1456
- if (!metaData) return false;
1457
-
1458
- let meta;
1459
- try {
1460
- meta = JSON.parse(metaData);
1461
- } catch {
1462
- console.warn('[ANN] Invalid ANN metadata, rebuilding');
1463
- return false;
1464
- }
1465
-
1466
- if (meta?.version !== ANN_META_VERSION) {
1467
- console.warn(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
1468
- return false;
1469
- }
1470
-
1471
- if (meta?.embeddingModel !== this.config.embeddingModel) {
1472
- console.warn('[ANN] Embedding model changed for ANN index, rebuilding');
1473
- return false;
1474
- }
1475
-
1476
- if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
1477
- console.warn('[ANN] ANN index size mismatch, rebuilding');
1478
- return false;
1479
- }
1480
-
1481
- if (
1482
- meta?.metric !== this.config.annMetric ||
1483
- meta?.m !== this.config.annM ||
1484
- meta?.efConstruction !== this.config.annEfConstruction
1485
- ) {
1486
- console.warn('[ANN] ANN index config changed, rebuilding');
1487
- return false;
1488
- }
1489
-
1490
- let maxElements = meta?.maxElements;
1491
- if (!Number.isInteger(maxElements)) {
1492
- maxElements = meta.count;
1493
- } else if (maxElements < meta.count) {
1494
- console.warn('[ANN] ANN capacity invalid, rebuilding');
1495
- return false;
1496
- }
1497
-
1498
- const index = new HierarchicalNSW(meta.metric, dim);
1499
- const loaded = readHnswIndex(index, indexFile, maxElements);
1500
- if (!loaded) {
1501
- console.warn('[ANN] Failed to load ANN index file, rebuilding');
1502
- return false;
1503
- }
1504
-
1505
- if (typeof index.setEf === 'function') {
1506
- index.setEf(this.config.annEfSearch);
1507
- }
1508
-
1509
- this.annIndex = index;
1510
- this.annMeta = { ...meta, maxElements };
1511
- this.annDirty = false;
1512
- this.annPersistDirty = false;
1513
-
1514
- if (this.config.verbose) {
1515
- console.info(`[ANN] Loaded ANN index (${meta.count} vectors, cap=${maxElements})`);
1516
- }
1517
- return true;
1518
- }
1519
-
1520
- async buildAnnIndex(HierarchicalNSW, dim) {
1521
- if (!Array.isArray(this.vectorStore)) return null;
1522
- const total = this.vectorStore.length;
1523
- if (total === 0) return null;
1524
-
1525
- try {
1526
- const index = new HierarchicalNSW(this.config.annMetric, dim);
1527
-
1528
- const maxElements = computeAnnCapacity(total, this.config);
1529
- initHnswIndex(index, maxElements, this.config.annM, this.config.annEfConstruction);
1530
-
1531
- const yieldEvery = Number.isInteger(this.config.annBuildYieldEvery)
1532
- ? this.config.annBuildYieldEvery
1533
- : 1000;
1534
-
1535
- for (let i = 0; i < total; i++) {
1536
- const vector = this.getAnnVector(i);
1537
- if (!vector) throw new Error(`Missing vector for ANN index at position ${i}`);
1538
- index.addPoint(vector, i);
1539
-
1540
- if (yieldEvery > 0 && i > 0 && i % yieldEvery === 0) {
1541
- await yieldToLoop();
1542
- }
1543
- }
1544
-
1545
- if (typeof index.setEf === 'function') {
1546
- index.setEf(this.config.annEfSearch);
1547
- }
1548
-
1549
- this.annIndex = index;
1550
- this.annMeta = {
1551
- version: ANN_META_VERSION,
1552
- embeddingModel: this.config.embeddingModel,
1553
- metric: this.config.annMetric,
1554
- dim,
1555
- count: total,
1556
- maxElements,
1557
- m: this.config.annM,
1558
- efConstruction: this.config.annEfConstruction,
1559
- efSearch: this.config.annEfSearch,
1560
- };
1561
- this.annDirty = false;
1562
- this.annPersistDirty = true;
1563
-
1564
- if (this.config.annIndexCache !== false) {
1565
- try {
1566
- await fs.mkdir(this.config.cacheDirectory, { recursive: true });
1567
- const { indexFile, metaFile } = this.getAnnIndexPaths();
1568
- index.writeIndexSync(indexFile);
1569
- await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
1570
- this.annPersistDirty = false;
1571
- if (this.config.verbose) {
1572
- console.info(`[ANN] Saved ANN index (${total} vectors, cap=${maxElements})`);
1573
- }
1574
- } catch (error) {
1575
- console.warn(`[ANN] Failed to save ANN index: ${error.message}`);
1576
- }
1577
- }
1578
-
1579
- return index;
1580
- } catch (error) {
1581
- console.warn(`[ANN] Failed to build ANN index: ${error.message}`);
1582
- this.addInitError('buildAnnIndex', error);
1583
- this.annIndex = null;
1584
- this.annMeta = null;
1585
- this.annDirty = true;
1586
- this.annPersistDirty = false;
1587
- return null;
1588
- }
1589
- }
1590
-
1591
-
1592
- async queryAnn(queryVector, k) {
1593
- if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
1594
- const index = await this.ensureAnnIndex();
1595
- if (!index) return [];
1596
-
1597
- const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
1598
-
1599
-
1600
- let results;
1601
- try {
1602
- results = index.searchKnn(qVec, k);
1603
- } catch (err) {
1604
- console.warn(`[ANN] searchKnn failed: ${err.message}. Falling back to linear search.`);
1605
- this.addInitError('queryAnn', err);
1606
-
1607
- this.invalidateAnnIndex();
1608
- return [];
1609
- }
1610
-
1611
- const labels = normalizeLabels(results);
1612
-
1613
- if (labels.length === 0) return [];
1614
-
1615
- const filtered = labels.filter(
1616
- (label) => Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
1617
- );
1618
-
1619
- return filtered;
1620
- }
1621
-
1622
- async clear() {
1623
- if (!this.config.enableCache) return;
1624
-
1625
- try {
1626
- await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
1627
- this.vectorStore = [];
1628
- if (this.binaryStore) {
1629
- try {
1630
- await this.binaryStore.close();
1631
- } catch {
1632
-
1633
- }
1634
- }
1635
- this.binaryStore = null;
1636
- if (this.sqliteStore) {
1637
- try {
1638
- this.sqliteStore.close();
1639
- } catch {
1640
-
1641
- }
1642
- }
1643
- this.sqliteStore = null;
1644
- this.fileHashes = new Map();
1645
- this.invalidateAnnIndex();
1646
- await this.clearCallGraphData();
1647
- if (this.config.verbose) {
1648
- console.info(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
1649
- }
1650
- } catch (error) {
1651
- console.error('[Cache] Failed to clear cache:', error.message);
1652
- throw error;
1653
- }
1654
- }
1655
-
1656
-
1657
- setEfSearch(efSearch) {
1658
- if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
1659
- return {
1660
- success: false,
1661
- error: 'efSearch must be a number between 1 and 1000',
1662
- };
1663
- }
1664
-
1665
- this.config.annEfSearch = efSearch;
1666
-
1667
- if (this.annIndex && typeof this.annIndex.setEf === 'function') {
1668
- this.annIndex.setEf(efSearch);
1669
- if (this.annMeta) this.annMeta.efSearch = efSearch;
1670
- this.annPersistDirty = true;
1671
- if (this.config.verbose) {
1672
- console.info(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
1673
- }
1674
- return { success: true, applied: true, efSearch };
1675
- }
1676
-
1677
- if (this.config.verbose) {
1678
- console.info(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
1679
- }
1680
- return { success: true, applied: false, efSearch };
1681
- }
1682
-
1683
-
1684
- getAnnStats() {
1685
- return {
1686
- enabled: this.config.annEnabled ?? false,
1687
- indexLoaded: this.annIndex !== null,
1688
- dirty: this.annDirty,
1689
- vectorCount: Array.isArray(this.vectorStore) ? this.vectorStore.length : 0,
1690
- minChunksForAnn: this.config.annMinChunks ?? 5000,
1691
- config: this.annMeta
1692
- ? {
1693
- metric: this.annMeta.metric,
1694
- dim: this.annMeta.dim,
1695
- count: this.annMeta.count,
1696
- m: this.annMeta.m,
1697
- efConstruction: this.annMeta.efConstruction,
1698
- efSearch: this.config.annEfSearch,
1699
- }
1700
- : null,
1701
- };
1702
- }
1703
-
1704
-
1705
-
1706
- async clearCallGraphData({ removeFile = false } = {}) {
1707
- this.fileCallData.clear();
1708
- this.callGraph = null;
1709
-
1710
- if (removeFile && this.config.enableCache) {
1711
- const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
1712
- try {
1713
- await fs.rm(callGraphFile, { force: true });
1714
- } catch (error) {
1715
- if (this.config.verbose) {
1716
- console.warn(`[Cache] Failed to remove call-graph cache: ${error.message}`);
1717
- }
1718
- }
1719
- }
1720
- }
1721
-
1722
- pruneCallGraphData(validFiles) {
1723
- if (!validFiles || this.fileCallData.size === 0) return 0;
1724
-
1725
- let pruned = 0;
1726
- for (const file of Array.from(this.fileCallData.keys())) {
1727
- if (!validFiles.has(file)) {
1728
- this.fileCallData.delete(file);
1729
- pruned++;
1730
- }
1731
- }
1732
-
1733
- if (pruned > 0) this.callGraph = null;
1734
- return pruned;
1735
- }
1736
-
1737
- getFileCallData(file) {
1738
- return this.fileCallData.get(file);
1739
- }
1740
-
1741
- hasFileCallData(file) {
1742
- return this.fileCallData.has(file);
1743
- }
1744
-
1745
- getFileCallDataKeys() {
1746
- return Array.from(this.fileCallData.keys());
1747
- }
1748
-
1749
- getFileCallDataCount() {
1750
- return this.fileCallData.size;
1751
- }
1752
-
1753
-
1754
- setFileCallData(file, data) {
1755
- this.fileCallData.set(file, data);
1756
- this.callGraph = null;
1757
- }
1758
-
1759
-
1760
- setFileCallDataEntries(entries) {
1761
- if (entries instanceof Map) {
1762
- this.fileCallData = entries;
1763
- } else {
1764
- this.fileCallData.clear();
1765
- if (entries && typeof entries === 'object') {
1766
- for (const [file, data] of Object.entries(entries)) {
1767
- this.fileCallData.set(file, data);
1768
- }
1769
- }
1770
- }
1771
- this.callGraph = null;
1772
- }
1773
-
1774
- clearFileCallData() {
1775
- this.fileCallData.clear();
1776
- this.callGraph = null;
1777
- }
1778
-
1779
- removeFileCallData(file) {
1780
- this.fileCallData.delete(file);
1781
- this.callGraph = null;
1782
- }
1783
-
1784
- async rebuildCallGraph() {
1785
- if (this._callGraphBuild) return this._callGraphBuild;
1786
-
1787
- this._callGraphBuild = (async () => {
1788
- try {
1789
- const { buildCallGraph } = await import('./call-graph.js');
1790
- this.callGraph = buildCallGraph(this.fileCallData);
1791
- if (this.config.verbose && this.callGraph) {
1792
- console.info(
1793
- `[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`
1794
- );
1795
- }
1796
- } catch (err) {
1797
- console.error(`[CallGraph] Failed to build: ${err.message}`);
1798
- this.callGraph = null;
1799
- } finally {
1800
- this._callGraphBuild = null;
1801
- }
1802
- })();
1803
-
1804
- return this._callGraphBuild;
1805
- }
1806
-
1807
- async getRelatedFiles(symbols) {
1808
- if (!this.config.callGraphEnabled || symbols.length === 0) return new Map();
1809
- if (!this.callGraph && this.fileCallData.size > 0) await this.rebuildCallGraph();
1810
- if (!this.callGraph) return new Map();
1811
-
1812
- const { getRelatedFiles } = await import('./call-graph.js');
1813
- return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
1814
- }
1815
-
1816
- getCallGraphStats() {
1817
- return {
1818
- enabled: this.config.callGraphEnabled ?? false,
1819
- filesWithData: this.fileCallData.size,
1820
- graphBuilt: this.callGraph !== null,
1821
- definitions: this.callGraph?.defines.size ?? 0,
1822
- callTargets: this.callGraph?.calledBy.size ?? 0,
1823
- };
1824
- }
1825
-
1826
-
1827
-
1828
-
1829
- getStoreSize() {
1830
- if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
1831
- if (this.binaryStore) return this.binaryStore.length;
1832
- if (this.sqliteStore) return this.sqliteStore.length();
1833
- return 0;
1834
- }
1835
-
1836
-
1837
- getVector(index) {
1838
- return this.getChunkVector(index);
1839
- }
1840
-
1841
-
1842
- getChunk(index) {
1843
- if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
1844
- return this.vectorStore[index];
1845
- }
1846
- if (this.binaryStore) {
1847
- const record = this.binaryStore.getRecord(index);
1848
- if (record) {
1849
- return {
1850
- file: record.file,
1851
- startLine: record.startLine,
1852
- endLine: record.endLine,
1853
- vector: this.binaryStore.getVector(index),
1854
- _index: index,
1855
- _binaryIndex: index,
1856
- };
1857
- }
1858
- }
1859
- if (this.sqliteStore) {
1860
- const record = this.sqliteStore.getRecord(index);
1861
- if (record) {
1862
- return {
1863
- file: record.file,
1864
- startLine: record.startLine,
1865
- endLine: record.endLine,
1866
- vector: this.sqliteStore.getVector(index),
1867
- _index: index,
1868
- _sqliteIndex: index,
1869
- };
1870
- }
1871
- }
1872
- return null;
1873
- }
1874
- }
655
+ }
656
+ }
657
+
658
+ if (useSqlite) {
659
+ try {
660
+ this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
661
+ if (this.sqliteStore) {
662
+ cacheData = this.sqliteStore.toChunkViews({
663
+ includeContent: this.config.vectorStoreContentMode === 'inline',
664
+ includeVector: effectiveVectorLoadMode !== 'disk',
665
+ });
666
+ } else {
667
+ console.warn(
668
+ '[Cache] vectorStoreFormat=sqlite but vectors.sqlite is missing. Reindex to regenerate the cache.'
669
+ );
670
+ }
671
+ } catch (err) {
672
+ this.sqliteStore = null;
673
+ console.warn(`[Cache] Failed to load SQLite vector store: ${err.message}`);
674
+ }
675
+ }
676
+
677
+ if (!cacheData) {
678
+ cacheData = await readJsonFile(cacheFile, { workerThresholdBytes });
679
+ }
680
+
681
+ const hasCacheData = Array.isArray(cacheData);
682
+ const hasHashData = hashData && typeof hashData === 'object';
683
+
684
+ if (hasCacheData) {
685
+ const allowedExtensions = new Set(
686
+ (this.config.fileExtensions || []).map((ext) => `.${ext}`)
687
+ );
688
+ const allowedFileNames = new Set(this.config.fileNames || []);
689
+ const applyExtensionFilter = !this.binaryStore;
690
+ const shouldKeepFile = (filePath) => {
691
+ const ext = path.extname(filePath);
692
+ if (allowedExtensions.has(ext)) return true;
693
+ return allowedFileNames.has(path.basename(filePath));
694
+ };
695
+
696
+ const rawHashes = hasHashData ? new Map(Object.entries(hashData)) : new Map();
697
+ this.vectorStore = [];
698
+ this.fileHashes.clear();
699
+
700
+ for (const chunk of cacheData) {
701
+ if (applyExtensionFilter) {
702
+ if (!shouldKeepFile(chunk.file)) continue;
703
+ }
704
+ normalizeChunkVector(chunk);
705
+ this.vectorStore.push(chunk);
706
+ }
707
+ const filteredCount = cacheData.length - this.vectorStore.length;
708
+ if (filteredCount > 0 && this.config.verbose) {
709
+ console.info(`[Cache] Filtered ${filteredCount} outdated cache entries`);
710
+ }
711
+
712
+ if (hasHashData) {
713
+ for (const [file, entry] of rawHashes) {
714
+ if (!applyExtensionFilter || shouldKeepFile(file)) {
715
+ const normalized = normalizeFileHashEntry(entry);
716
+ if (normalized) {
717
+ this.fileHashes.set(file, normalized);
718
+ }
719
+ }
720
+ }
721
+ } else {
722
+ console.warn(
723
+ '[Cache] Missing file-hashes.json; loaded embeddings but hashes were cleared'
724
+ );
725
+ }
726
+
727
+ assignChunkIndices(this.vectorStore);
728
+
729
+ if (this.config.verbose) {
730
+ console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
731
+ }
732
+
733
+ this.annDirty = false;
734
+ this.annPersistDirty = false;
735
+ this.annIndex = null;
736
+ this.annMeta = null;
737
+ this.annVectorCache = null;
738
+ } else if (cacheData) {
739
+ console.warn('[Cache] Cache data is not an array; ignoring cached embeddings');
740
+ } else if (hasHashData) {
741
+ console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
742
+ }
743
+
744
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
745
+ try {
746
+ const callGraphData = await fs.readFile(callGraphFile, 'utf8');
747
+ const parsed = JSON.parse(callGraphData);
748
+ this.fileCallData = new Map(Object.entries(parsed));
749
+ if (this.config.verbose) {
750
+ console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
751
+ }
752
+ } catch {}
753
+ } catch (error) {
754
+ console.warn('[Cache] Failed to load cache:', error.message);
755
+ this.clearInMemoryState();
756
+ }
757
+ }
758
+
759
+ save({ throwOnError = false } = {}) {
760
+ if (!this.config.enableCache) return Promise.resolve();
761
+
762
+ this._saveRequested = true;
763
+ if (throwOnError) {
764
+ this._saveThrowOnError = true;
765
+ }
766
+
767
+ if (this._saveTimer) return this._savePromise ?? Promise.resolve();
768
+
769
+ const debounceMs = Number.isInteger(this.config.saveDebounceMs)
770
+ ? this.config.saveDebounceMs
771
+ : 250;
772
+
773
+ this._savePromise = new Promise((resolve, reject) => {
774
+ this._saveTimer = setTimeout(() => {
775
+ this._saveTimer = null;
776
+ const rejectOnSaveError = this._saveThrowOnError;
777
+ this._saveThrowOnError = false;
778
+
779
+ this.saveQueue = this.saveQueue
780
+ .catch(() => {})
781
+ .then(async () => {
782
+ while (this._saveRequested) {
783
+ this._saveRequested = false;
784
+ await this.performSave({ throwOnError: rejectOnSaveError });
785
+ }
786
+ })
787
+ .then(resolve, reject)
788
+ .finally(() => {
789
+ this._savePromise = null;
790
+ });
791
+ }, debounceMs);
792
+ });
793
+
794
+ return this._savePromise;
795
+ }
796
+
797
+ async performSave({ throwOnError = false } = {}) {
798
+ this._saveInProgress = true;
799
+ if (
800
+ this.config.allowSystemWorkspaceCache !== true &&
801
+ this.config.searchDirectory &&
802
+ isNonProjectDirectory(this.config.searchDirectory)
803
+ ) {
804
+ const source = this.config.workspaceResolution?.source || 'unknown';
805
+ console.warn(
806
+ `[Cache] Skipping cache save for non-project workspace (${source}): ${this.config.searchDirectory}`
807
+ );
808
+ this._saveInProgress = false;
809
+ return;
810
+ }
811
+
812
+ if (this.activeReads > 0) {
813
+ const timeoutMs = this.config.saveReaderWaitTimeoutMs ?? DEFAULT_READER_WAIT_TIMEOUT_MS;
814
+ const allReadersFinished = await this.waitForReadersWithTimeout(timeoutMs);
815
+ if (!allReadersFinished && !this.config.forceSaveWithActiveReaders) {
816
+ console.warn('[Cache] Aborting save - active readers still present after timeout');
817
+ this._saveInProgress = false;
818
+ return;
819
+ }
820
+ }
821
+
822
+ this.isSaving = true;
823
+
824
+ try {
825
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
826
+
827
+ const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
828
+ const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
829
+ const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
830
+
831
+ const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
832
+ const supportsBackendVectorResolve =
833
+ this.config.vectorStoreFormat === 'binary' || this.config.vectorStoreFormat === 'sqlite';
834
+ const hasMissingVectors = snapshotStore.some(
835
+ (chunk) => chunk && (chunk.vector === undefined || chunk.vector === null)
836
+ );
837
+ const useDiskVectors =
838
+ supportsBackendVectorResolve &&
839
+ (this.config.vectorStoreLoadMode === 'disk' || hasMissingVectors);
840
+ if (hasMissingVectors && !useDiskVectors) {
841
+ throw new Error(
842
+ 'Missing vector data for cache write and backend vector resolution is unavailable'
843
+ );
844
+ }
845
+
846
+ this.cacheMeta = {
847
+ version: CACHE_META_VERSION,
848
+ embeddingModel: this.config.embeddingModel,
849
+ embeddingDimension: this.config.embeddingDimension ?? null,
850
+ lastSaveTime: new Date().toISOString(),
851
+ filesIndexed: this.fileHashes.size,
852
+ chunksStored: snapshotStore.length,
853
+ workspace: this.config.searchDirectory || null,
854
+ };
855
+ if (Number.isFinite(this.lastIndexDurationMs) && this.lastIndexDurationMs >= 0) {
856
+ this.cacheMeta.indexDurationMs = Math.round(this.lastIndexDurationMs);
857
+ }
858
+ if (this.lastIndexStats && typeof this.lastIndexStats === 'object') {
859
+ Object.assign(this.cacheMeta, this.lastIndexStats);
860
+ }
861
+
862
+ const total = snapshotStore.length;
863
+ if (this.config.vectorStoreFormat === 'binary') {
864
+ this.binaryStore = await BinaryVectorStore.write(
865
+ this.config.cacheDirectory,
866
+ snapshotStore,
867
+ {
868
+ contentCacheEntries: this.config.contentCacheEntries,
869
+ vectorCacheEntries: this.config.vectorCacheEntries,
870
+ vectorLoadMode: useDiskVectors ? 'disk' : this.config.vectorStoreLoadMode,
871
+ getContent: (chunk, index) => this.getChunkContent(chunk, index),
872
+ getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
873
+ preRename: async () => {
874
+ if (this.activeReads > 0) {
875
+ await this.waitForReadersWithTimeout(
876
+ Number.isInteger(this.config.saveReaderWaitTimeoutMs)
877
+ ? this.config.saveReaderWaitTimeoutMs
878
+ : 5000
879
+ );
880
+ }
881
+ if (this.binaryStore) {
882
+ await this.binaryStore.close();
883
+ this.binaryStore = null;
884
+ }
885
+ },
886
+ }
887
+ );
888
+ if (this.binaryStore) {
889
+ this.cacheMeta.chunksStored = this.binaryStore.length;
890
+ }
891
+ } else if (this.config.vectorStoreFormat === 'sqlite') {
892
+ if (this.sqliteStore) {
893
+ try {
894
+ this.sqliteStore.close();
895
+ } catch {}
896
+ this.sqliteStore = null;
897
+ }
898
+ this.sqliteStore = await SqliteVectorStore.write(
899
+ this.config.cacheDirectory,
900
+ snapshotStore,
901
+ {
902
+ getContent: (chunk, index) => this.getChunkContent(chunk, index),
903
+ getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
904
+ preRename: async () => {
905
+ if (this.activeReads > 0) {
906
+ await this.waitForReadersWithTimeout(
907
+ Number.isInteger(this.config.saveReaderWaitTimeoutMs)
908
+ ? this.config.saveReaderWaitTimeoutMs
909
+ : 5000
910
+ );
911
+ }
912
+ },
913
+ }
914
+ );
915
+ if (this.sqliteStore) {
916
+ this.cacheMeta.chunksStored = this.sqliteStore.length();
917
+ }
918
+ } else {
919
+ const vectorWriter = new StreamingJsonWriter(cacheFile, {
920
+ highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
921
+ floatDigits: this.config.cacheVectorFloatDigits ?? 6,
922
+ flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
923
+ indent: '',
924
+ assumeFinite: this.config.cacheVectorAssumeFinite,
925
+ checkFinite: this.config.cacheVectorCheckFinite,
926
+ noMutation: this.config.cacheVectorNoMutation ?? false,
927
+ joinThreshold: this.config.cacheVectorJoinThreshold ?? 8192,
928
+ joinChunkSize: this.config.cacheVectorJoinChunkSize ?? 2048,
929
+ });
930
+
931
+ await vectorWriter.writeStart();
932
+
933
+ const yieldEvery = total >= 50_000 ? 5000 : 0;
934
+
935
+ try {
936
+ for (let i = 0; i < total; i++) {
937
+ const pending = vectorWriter.writeItem(snapshotStore[i]);
938
+ if (pending) await pending;
939
+ if (yieldEvery && i > 0 && i % yieldEvery === 0) await yieldToLoop();
940
+ }
941
+ await vectorWriter.writeEnd();
942
+ } catch (e) {
943
+ vectorWriter.abort(e);
944
+ throw e;
945
+ }
946
+ }
947
+
948
+ const hashEntries = {};
949
+ for (const [file, entry] of this.fileHashes) {
950
+ const serialized = serializeFileHashEntry(entry);
951
+ if (serialized) {
952
+ hashEntries[file] = serialized;
953
+ }
954
+ }
955
+
956
+ await Promise.all([
957
+ fs.writeFile(hashFile, JSON.stringify(hashEntries, null, 2)),
958
+ fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
959
+ ]);
960
+
961
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
962
+ if (this.fileCallData.size > 0) {
963
+ await fs.writeFile(
964
+ callGraphFile,
965
+ JSON.stringify(Object.fromEntries(this.fileCallData), null, 2)
966
+ );
967
+ } else {
968
+ await fs.rm(callGraphFile, { force: true });
969
+ }
970
+
971
+ if (
972
+ this.config.annIndexCache !== false &&
973
+ this.annPersistDirty &&
974
+ !this.annDirty &&
975
+ !this._annWriting &&
976
+ this.annIndex &&
977
+ this.annMeta
978
+ ) {
979
+ this._annWriting = true;
980
+ try {
981
+ const { indexFile, metaFile: annMetaFile } = this.getAnnIndexPaths();
982
+ this.annIndex.writeIndexSync(indexFile);
983
+ await fs.writeFile(annMetaFile, JSON.stringify(this.annMeta, null, 2));
984
+ this.annPersistDirty = false;
985
+ if (this.config.verbose) {
986
+ console.info(`[ANN] Persisted updated ANN index (${this.annMeta.count} vectors)`);
987
+ }
988
+ } catch (error) {
989
+ console.warn(`[ANN] Failed to persist ANN index: ${error.message}`);
990
+ } finally {
991
+ this._annWriting = false;
992
+ }
993
+ }
994
+ this.lastSaveError = null;
995
+ } catch (error) {
996
+ this.lastSaveError = error instanceof Error ? error : new Error(String(error));
997
+ console.warn('[Cache] Failed to save cache:', this.lastSaveError.message);
998
+
999
+ if (
1000
+ this.config.vectorStoreFormat === 'binary' &&
1001
+ this.binaryStore &&
1002
+ !this.binaryStore.vectorsBuffer
1003
+ ) {
1004
+ try {
1005
+ console.info('[Cache] Attempting to recover binary store after failed save...');
1006
+ this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
1007
+ contentCacheEntries: this.config.contentCacheEntries,
1008
+ });
1009
+ console.info('[Cache] Binary store recovered.');
1010
+ } catch (recoverErr) {
1011
+ console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
1012
+ this.binaryStore = null;
1013
+ }
1014
+ }
1015
+
1016
+ if (this.config.vectorStoreFormat === 'sqlite' && !this.sqliteStore) {
1017
+ try {
1018
+ console.info('[Cache] Attempting to recover SQLite store after failed save...');
1019
+ this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
1020
+ if (this.sqliteStore) {
1021
+ console.info('[Cache] SQLite store recovered.');
1022
+ }
1023
+ } catch (recoverErr) {
1024
+ console.warn(`[Cache] Failed to recover SQLite store: ${recoverErr.message}`);
1025
+ this.sqliteStore = null;
1026
+ }
1027
+ }
1028
+ if (throwOnError) {
1029
+ const wrapped = new Error(`Cache save failed: ${this.lastSaveError.message}`);
1030
+ wrapped.cause = this.lastSaveError;
1031
+ throw wrapped;
1032
+ }
1033
+ } finally {
1034
+ this.isSaving = false;
1035
+ this._saveInProgress = false;
1036
+ }
1037
+ }
1038
+
1039
+ getVectorStore() {
1040
+ return Array.isArray(this.vectorStore) ? this.vectorStore : [];
1041
+ }
1042
+
1043
+ async setVectorStore(store) {
1044
+ const previousBinaryStore = this.binaryStore;
1045
+ const previousSqliteStore = this.sqliteStore;
1046
+ this.vectorStore = store;
1047
+ this.binaryStore = null;
1048
+ this.sqliteStore = null;
1049
+ if (Array.isArray(this.vectorStore)) {
1050
+ for (const chunk of this.vectorStore) normalizeChunkVector(chunk);
1051
+ assignChunkIndices(this.vectorStore);
1052
+ }
1053
+ this.invalidateAnnIndex();
1054
+ if (previousBinaryStore) {
1055
+ try {
1056
+ await previousBinaryStore.close();
1057
+ } catch {}
1058
+ }
1059
+ if (previousSqliteStore) {
1060
+ try {
1061
+ previousSqliteStore.close();
1062
+ } catch {}
1063
+ }
1064
+ }
1065
+
1066
+ setLastIndexDuration(durationMs) {
1067
+ if (Number.isFinite(durationMs) && durationMs >= 0) {
1068
+ this.lastIndexDurationMs = durationMs;
1069
+ }
1070
+ }
1071
+
1072
+ setLastIndexStats(stats) {
1073
+ if (stats && typeof stats === 'object') {
1074
+ this.lastIndexStats = { ...stats };
1075
+ }
1076
+ }
1077
+
1078
+ getFileHash(file) {
1079
+ const entry = this.fileHashes.get(file);
1080
+ if (typeof entry === 'string') return entry;
1081
+ return entry?.hash;
1082
+ }
1083
+
1084
+ getFileHashKeys() {
1085
+ return Array.from(this.fileHashes.keys());
1086
+ }
1087
+
1088
+ getFileHashCount() {
1089
+ return this.fileHashes.size;
1090
+ }
1091
+
1092
+ clearFileHashes() {
1093
+ this.fileHashes.clear();
1094
+ }
1095
+
1096
+ setFileHashes(entries) {
1097
+ this.fileHashes.clear();
1098
+ if (!entries || typeof entries !== 'object') return;
1099
+ const iterator = entries instanceof Map ? entries.entries() : Object.entries(entries);
1100
+ if (!iterator) return;
1101
+ for (const [file, entry] of iterator) {
1102
+ const normalized = normalizeFileHashEntry(entry);
1103
+ if (normalized) {
1104
+ this.fileHashes.set(file, normalized);
1105
+ }
1106
+ }
1107
+ }
1108
+
1109
+ setFileHash(file, hash, meta = null) {
1110
+ const entry = { hash };
1111
+ if (meta && typeof meta === 'object') {
1112
+ if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
1113
+ if (Number.isFinite(meta.size)) entry.size = meta.size;
1114
+ }
1115
+ this.fileHashes.set(file, entry);
1116
+ }
1117
+
1118
+ getFileMeta(file) {
1119
+ const entry = this.fileHashes.get(file);
1120
+ if (!entry) return null;
1121
+ if (typeof entry === 'string') return { hash: entry };
1122
+ return entry;
1123
+ }
1124
+
1125
+ getChunkVector(chunk, index = null) {
1126
+ if (typeof chunk === 'number') {
1127
+ const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
1128
+ const entry = store ? store[chunk] : null;
1129
+ if (entry?.vector) return entry.vector;
1130
+ if (this.binaryStore) {
1131
+ const resolved = Number.isInteger(entry?._binaryIndex) ? entry._binaryIndex : chunk;
1132
+ return this.binaryStore.getVector(resolved);
1133
+ }
1134
+ if (this.sqliteStore) {
1135
+ const resolved = Number.isInteger(entry?._sqliteIndex) ? entry._sqliteIndex : chunk;
1136
+ return this.sqliteStore.getVector(resolved);
1137
+ }
1138
+ return null;
1139
+ }
1140
+
1141
+ if (chunk?.vector) return chunk.vector;
1142
+ const resolved = Number.isInteger(index) ? index : chunk?._index;
1143
+ if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
1144
+ return this.binaryStore.getVector(chunk._binaryIndex);
1145
+ }
1146
+ if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
1147
+ return this.binaryStore.getVector(resolved);
1148
+ }
1149
+ if (this.sqliteStore) {
1150
+ const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
1151
+ ? chunk._sqliteIndex
1152
+ : Number.isInteger(chunk?.index)
1153
+ ? chunk.index
1154
+ : resolved;
1155
+ if (Number.isInteger(sqliteIndex)) {
1156
+ return this.sqliteStore.getVector(sqliteIndex);
1157
+ }
1158
+ }
1159
+ return null;
1160
+ }
1161
+
1162
+ async getChunkContent(chunk, index = null) {
1163
+ if (typeof chunk === 'number') {
1164
+ const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
1165
+ const entry = store ? store[chunk] : null;
1166
+ if (entry) return await this.getChunkContent(entry, chunk);
1167
+ if (!store && this.binaryStore) {
1168
+ const content = await this.binaryStore.getContent(chunk);
1169
+ return content ?? '';
1170
+ }
1171
+ if (!store && this.sqliteStore) {
1172
+ return this.sqliteStore.getContent(chunk) ?? '';
1173
+ }
1174
+ return '';
1175
+ }
1176
+ if (chunk?.content !== undefined && chunk?.content !== null) {
1177
+ return chunk.content;
1178
+ }
1179
+ if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
1180
+ const content = await this.binaryStore.getContent(chunk._binaryIndex);
1181
+ return content ?? '';
1182
+ }
1183
+ const resolved = Number.isInteger(index) ? index : chunk?._index;
1184
+ if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
1185
+ const content = await this.binaryStore.getContent(resolved);
1186
+ return content ?? '';
1187
+ }
1188
+ if (this.sqliteStore) {
1189
+ const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
1190
+ ? chunk._sqliteIndex
1191
+ : Number.isInteger(chunk?.index)
1192
+ ? chunk.index
1193
+ : resolved;
1194
+ if (Number.isInteger(sqliteIndex)) {
1195
+ return this.sqliteStore.getContent(sqliteIndex) ?? '';
1196
+ }
1197
+ }
1198
+ return '';
1199
+ }
1200
+
1201
+ deleteFileHash(file) {
1202
+ this.fileHashes.delete(file);
1203
+ }
1204
+
1205
+ async removeFileFromStore(file) {
1206
+ if (!Array.isArray(this.vectorStore)) return;
1207
+
1208
+ let w = 0;
1209
+ for (let r = 0; r < this.vectorStore.length; r++) {
1210
+ const chunk = this.vectorStore[r];
1211
+ if (chunk.file !== file) {
1212
+ chunk._index = w;
1213
+ this.vectorStore[w++] = chunk;
1214
+ }
1215
+ }
1216
+ this.vectorStore.length = w;
1217
+
1218
+ this.invalidateAnnIndex();
1219
+ this.removeFileCallData(file);
1220
+
1221
+ this.fileHashes.delete(file);
1222
+ }
1223
+
1224
+ addToStore(chunk) {
1225
+ normalizeChunkVector(chunk);
1226
+
1227
+ if (!Array.isArray(this.vectorStore)) {
1228
+ this.vectorStore = [];
1229
+ }
1230
+
1231
+ const label = this.vectorStore.length;
1232
+ chunk._index = label;
1233
+ this.vectorStore.push(chunk);
1234
+ if (Array.isArray(this.annVectorCache) && this.annVectorCache.length === label) {
1235
+ this.annVectorCache.push(chunk.vector);
1236
+ }
1237
+
1238
+ if (
1239
+ this.annIndex &&
1240
+ !this.annDirty &&
1241
+ this.annMeta &&
1242
+ typeof this.annIndex.addPoint === 'function' &&
1243
+ this.annMeta.count === label &&
1244
+ this.annMeta.maxElements > this.annMeta.count
1245
+ ) {
1246
+ try {
1247
+ this.annIndex.addPoint(chunk.vector, label);
1248
+ this.annMeta.count += 1;
1249
+ this.annPersistDirty = true;
1250
+ return;
1251
+ } catch {}
1252
+ }
1253
+
1254
+ this.invalidateAnnIndex();
1255
+ }
1256
+
1257
+ invalidateAnnIndex() {
1258
+ this.annIndex = null;
1259
+ this.annMeta = null;
1260
+ this.annDirty = true;
1261
+ this.annPersistDirty = false;
1262
+ this.annVectorCache = null;
1263
+ }
1264
+
1265
+ getAnnVector(index) {
1266
+ if (!Array.isArray(this.vectorStore)) return null;
1267
+ const chunk = this.vectorStore[index];
1268
+ if (!chunk) return null;
1269
+
1270
+ if (
1271
+ !Array.isArray(this.annVectorCache) ||
1272
+ this.annVectorCache.length !== this.vectorStore.length
1273
+ ) {
1274
+ this.annVectorCache = new Array(this.vectorStore.length);
1275
+ }
1276
+
1277
+ const cached = this.annVectorCache[index];
1278
+ if (cached) return cached;
1279
+
1280
+ let vec = null;
1281
+ if (chunk.vector) {
1282
+ vec = ensureFloat32(chunk.vector);
1283
+ } else if (this.binaryStore && Number.isInteger(chunk._binaryIndex)) {
1284
+ vec = this.binaryStore.getVector(chunk._binaryIndex);
1285
+ } else if (this.sqliteStore) {
1286
+ const sqliteIndex = Number.isInteger(chunk._sqliteIndex)
1287
+ ? chunk._sqliteIndex
1288
+ : Number.isInteger(chunk.index)
1289
+ ? chunk.index
1290
+ : index;
1291
+ if (Number.isInteger(sqliteIndex)) {
1292
+ vec = this.sqliteStore.getVector(sqliteIndex);
1293
+ }
1294
+ }
1295
+
1296
+ if (!vec) return null;
1297
+
1298
+ if (this.config.vectorStoreLoadMode !== 'disk') {
1299
+ chunk.vector = vec;
1300
+ }
1301
+ this.annVectorCache[index] = vec;
1302
+ return vec;
1303
+ }
1304
+
1305
+ getAnnIndexPaths() {
1306
+ return {
1307
+ indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
1308
+ metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE),
1309
+ };
1310
+ }
1311
+
1312
+ async ensureAnnIndex() {
1313
+ if (!this.config.annEnabled) return null;
1314
+ if (!Array.isArray(this.vectorStore)) return null;
1315
+ if (this.vectorStore.length < (this.config.annMinChunks ?? 5000)) return null;
1316
+ if (this.annIndex && !this.annDirty) return this.annIndex;
1317
+ if (this.annLoading) return this.annLoading;
1318
+
1319
+ this.annLoading = (async () => {
1320
+ try {
1321
+ const HierarchicalNSW = await loadHnswlib();
1322
+ if (!HierarchicalNSW) {
1323
+ if (hnswlibLoadError) {
1324
+ this.addInitError('loadHnswlib', hnswlibLoadError);
1325
+ }
1326
+ return null;
1327
+ }
1328
+
1329
+ const dim =
1330
+ this.vectorStore[0]?.vector?.length || this.binaryStore?.dim || this.sqliteStore?.dim;
1331
+ if (!dim) return null;
1332
+
1333
+ let dimensionMismatch = false;
1334
+ const sampleSize = Math.min(ANN_DIMENSION_SAMPLE_SIZE, this.vectorStore.length);
1335
+ const step = Math.max(1, Math.floor(this.vectorStore.length / sampleSize));
1336
+ for (let i = step; i < this.vectorStore.length; i += step) {
1337
+ const v = this.vectorStore[i]?.vector;
1338
+ if (v && v.length !== dim) {
1339
+ dimensionMismatch = true;
1340
+ console.warn(
1341
+ `[ANN] Dimension mismatch at index ${i}: expected ${dim}, got ${v.length}. ` +
1342
+ 'This may indicate a config change mid-index. Consider full reindex.'
1343
+ );
1344
+ break;
1345
+ }
1346
+ }
1347
+
1348
+ if (dimensionMismatch) {
1349
+ this.addInitError(
1350
+ 'ensureAnnIndex',
1351
+ `Vector dimension inconsistency detected. Expected ${dim}. Full reindex required.`
1352
+ );
1353
+ return null;
1354
+ }
1355
+
1356
+ if (!this.annDirty && this.config.annIndexCache !== false) {
1357
+ const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
1358
+ if (loaded) return this.annIndex;
1359
+ }
1360
+
1361
+ return await this.buildAnnIndex(HierarchicalNSW, dim);
1362
+ } finally {
1363
+ this.annLoading = null;
1364
+ }
1365
+ })();
1366
+
1367
+ return this.annLoading;
1368
+ }
1369
+
1370
+ async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
1371
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
1372
+ const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
1373
+ if (!metaData) return false;
1374
+
1375
+ let meta;
1376
+ try {
1377
+ meta = JSON.parse(metaData);
1378
+ } catch {
1379
+ console.warn('[ANN] Invalid ANN metadata, rebuilding');
1380
+ return false;
1381
+ }
1382
+
1383
+ if (meta?.version !== ANN_META_VERSION) {
1384
+ console.warn(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
1385
+ return false;
1386
+ }
1387
+
1388
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
1389
+ console.warn('[ANN] Embedding model changed for ANN index, rebuilding');
1390
+ return false;
1391
+ }
1392
+
1393
+ if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
1394
+ console.warn('[ANN] ANN index size mismatch, rebuilding');
1395
+ return false;
1396
+ }
1397
+
1398
+ if (
1399
+ meta?.metric !== this.config.annMetric ||
1400
+ meta?.m !== this.config.annM ||
1401
+ meta?.efConstruction !== this.config.annEfConstruction
1402
+ ) {
1403
+ console.warn('[ANN] ANN index config changed, rebuilding');
1404
+ return false;
1405
+ }
1406
+
1407
+ let maxElements = meta?.maxElements;
1408
+ if (!Number.isInteger(maxElements)) {
1409
+ maxElements = meta.count;
1410
+ } else if (maxElements < meta.count) {
1411
+ console.warn('[ANN] ANN capacity invalid, rebuilding');
1412
+ return false;
1413
+ }
1414
+
1415
+ const index = new HierarchicalNSW(meta.metric, dim);
1416
+ const loaded = readHnswIndex(index, indexFile, maxElements);
1417
+ if (!loaded) {
1418
+ console.warn('[ANN] Failed to load ANN index file, rebuilding');
1419
+ return false;
1420
+ }
1421
+
1422
+ if (typeof index.setEf === 'function') {
1423
+ index.setEf(this.config.annEfSearch);
1424
+ }
1425
+
1426
+ this.annIndex = index;
1427
+ this.annMeta = { ...meta, maxElements };
1428
+ this.annDirty = false;
1429
+ this.annPersistDirty = false;
1430
+
1431
+ if (this.config.verbose) {
1432
+ console.info(`[ANN] Loaded ANN index (${meta.count} vectors, cap=${maxElements})`);
1433
+ }
1434
+ return true;
1435
+ }
1436
+
1437
+ async buildAnnIndex(HierarchicalNSW, dim) {
1438
+ if (!Array.isArray(this.vectorStore)) return null;
1439
+ const total = this.vectorStore.length;
1440
+ if (total === 0) return null;
1441
+
1442
+ try {
1443
+ const index = new HierarchicalNSW(this.config.annMetric, dim);
1444
+
1445
+ const maxElements = computeAnnCapacity(total, this.config);
1446
+ initHnswIndex(index, maxElements, this.config.annM, this.config.annEfConstruction);
1447
+
1448
+ const yieldEvery = Number.isInteger(this.config.annBuildYieldEvery)
1449
+ ? this.config.annBuildYieldEvery
1450
+ : 1000;
1451
+
1452
+ for (let i = 0; i < total; i++) {
1453
+ const vector = this.getAnnVector(i);
1454
+ if (!vector) throw new Error(`Missing vector for ANN index at position ${i}`);
1455
+ index.addPoint(vector, i);
1456
+
1457
+ if (yieldEvery > 0 && i > 0 && i % yieldEvery === 0) {
1458
+ await yieldToLoop();
1459
+ }
1460
+ }
1461
+
1462
+ if (typeof index.setEf === 'function') {
1463
+ index.setEf(this.config.annEfSearch);
1464
+ }
1465
+
1466
+ this.annIndex = index;
1467
+ this.annMeta = {
1468
+ version: ANN_META_VERSION,
1469
+ embeddingModel: this.config.embeddingModel,
1470
+ metric: this.config.annMetric,
1471
+ dim,
1472
+ count: total,
1473
+ maxElements,
1474
+ m: this.config.annM,
1475
+ efConstruction: this.config.annEfConstruction,
1476
+ efSearch: this.config.annEfSearch,
1477
+ };
1478
+ this.annDirty = false;
1479
+ this.annPersistDirty = true;
1480
+
1481
+ if (this.config.annIndexCache !== false) {
1482
+ try {
1483
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
1484
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
1485
+ index.writeIndexSync(indexFile);
1486
+ await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
1487
+ this.annPersistDirty = false;
1488
+ if (this.config.verbose) {
1489
+ console.info(`[ANN] Saved ANN index (${total} vectors, cap=${maxElements})`);
1490
+ }
1491
+ } catch (error) {
1492
+ console.warn(`[ANN] Failed to save ANN index: ${error.message}`);
1493
+ }
1494
+ }
1495
+
1496
+ return index;
1497
+ } catch (error) {
1498
+ console.warn(`[ANN] Failed to build ANN index: ${error.message}`);
1499
+ this.addInitError('buildAnnIndex', error);
1500
+ this.annIndex = null;
1501
+ this.annMeta = null;
1502
+ this.annDirty = true;
1503
+ this.annPersistDirty = false;
1504
+ return null;
1505
+ }
1506
+ }
1507
+
1508
+ async queryAnn(queryVector, k) {
1509
+ if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
1510
+ const index = await this.ensureAnnIndex();
1511
+ if (!index) return [];
1512
+
1513
+ const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
1514
+
1515
+ let results;
1516
+ try {
1517
+ results = index.searchKnn(qVec, k);
1518
+ } catch (err) {
1519
+ console.warn(`[ANN] searchKnn failed: ${err.message}. Falling back to linear search.`);
1520
+ this.addInitError('queryAnn', err);
1521
+
1522
+ this.invalidateAnnIndex();
1523
+ return [];
1524
+ }
1525
+
1526
+ const labels = normalizeLabels(results);
1527
+
1528
+ if (labels.length === 0) return [];
1529
+
1530
+ const filtered = labels.filter(
1531
+ (label) => Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
1532
+ );
1533
+
1534
+ return filtered;
1535
+ }
1536
+
1537
+ async clear() {
1538
+ if (!this.config.enableCache) return;
1539
+
1540
+ try {
1541
+ await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
1542
+ this.vectorStore = [];
1543
+ if (this.binaryStore) {
1544
+ try {
1545
+ await this.binaryStore.close();
1546
+ } catch {}
1547
+ }
1548
+ this.binaryStore = null;
1549
+ if (this.sqliteStore) {
1550
+ try {
1551
+ this.sqliteStore.close();
1552
+ } catch {}
1553
+ }
1554
+ this.sqliteStore = null;
1555
+ this.fileHashes = new Map();
1556
+ this.invalidateAnnIndex();
1557
+ await this.clearCallGraphData();
1558
+ if (this.config.verbose) {
1559
+ console.info(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
1560
+ }
1561
+ } catch (error) {
1562
+ console.error('[Cache] Failed to clear cache:', error.message);
1563
+ throw error;
1564
+ }
1565
+ }
1566
+
1567
+ setEfSearch(efSearch) {
1568
+ if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
1569
+ return {
1570
+ success: false,
1571
+ error: 'efSearch must be a number between 1 and 1000',
1572
+ };
1573
+ }
1574
+
1575
+ this.config.annEfSearch = efSearch;
1576
+
1577
+ if (this.annIndex && typeof this.annIndex.setEf === 'function') {
1578
+ this.annIndex.setEf(efSearch);
1579
+ if (this.annMeta) this.annMeta.efSearch = efSearch;
1580
+ this.annPersistDirty = true;
1581
+ if (this.config.verbose) {
1582
+ console.info(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
1583
+ }
1584
+ return { success: true, applied: true, efSearch };
1585
+ }
1586
+
1587
+ if (this.config.verbose) {
1588
+ console.info(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
1589
+ }
1590
+ return { success: true, applied: false, efSearch };
1591
+ }
1592
+
1593
+ getAnnStats() {
1594
+ return {
1595
+ enabled: this.config.annEnabled ?? false,
1596
+ indexLoaded: this.annIndex !== null,
1597
+ dirty: this.annDirty,
1598
+ vectorCount: Array.isArray(this.vectorStore) ? this.vectorStore.length : 0,
1599
+ minChunksForAnn: this.config.annMinChunks ?? 5000,
1600
+ config: this.annMeta
1601
+ ? {
1602
+ metric: this.annMeta.metric,
1603
+ dim: this.annMeta.dim,
1604
+ count: this.annMeta.count,
1605
+ m: this.annMeta.m,
1606
+ efConstruction: this.annMeta.efConstruction,
1607
+ efSearch: this.config.annEfSearch,
1608
+ }
1609
+ : null,
1610
+ };
1611
+ }
1612
+
1613
+ async clearCallGraphData({ removeFile = false } = {}) {
1614
+ this.fileCallData.clear();
1615
+ this.callGraph = null;
1616
+
1617
+ if (removeFile && this.config.enableCache) {
1618
+ const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
1619
+ try {
1620
+ await fs.rm(callGraphFile, { force: true });
1621
+ } catch (error) {
1622
+ if (this.config.verbose) {
1623
+ console.warn(`[Cache] Failed to remove call-graph cache: ${error.message}`);
1624
+ }
1625
+ }
1626
+ }
1627
+ }
1628
+
1629
+ pruneCallGraphData(validFiles) {
1630
+ if (!validFiles || this.fileCallData.size === 0) return 0;
1631
+
1632
+ let pruned = 0;
1633
+ for (const file of Array.from(this.fileCallData.keys())) {
1634
+ if (!validFiles.has(file)) {
1635
+ this.fileCallData.delete(file);
1636
+ pruned++;
1637
+ }
1638
+ }
1639
+
1640
+ if (pruned > 0) this.callGraph = null;
1641
+ return pruned;
1642
+ }
1643
+
1644
+ getFileCallData(file) {
1645
+ return this.fileCallData.get(file);
1646
+ }
1647
+
1648
+ hasFileCallData(file) {
1649
+ return this.fileCallData.has(file);
1650
+ }
1651
+
1652
+ getFileCallDataKeys() {
1653
+ return Array.from(this.fileCallData.keys());
1654
+ }
1655
+
1656
+ getFileCallDataCount() {
1657
+ return this.fileCallData.size;
1658
+ }
1659
+
1660
+ setFileCallData(file, data) {
1661
+ this.fileCallData.set(file, data);
1662
+ this.callGraph = null;
1663
+ }
1664
+
1665
+ setFileCallDataEntries(entries) {
1666
+ if (entries instanceof Map) {
1667
+ this.fileCallData = entries;
1668
+ } else {
1669
+ this.fileCallData.clear();
1670
+ if (entries && typeof entries === 'object') {
1671
+ for (const [file, data] of Object.entries(entries)) {
1672
+ this.fileCallData.set(file, data);
1673
+ }
1674
+ }
1675
+ }
1676
+ this.callGraph = null;
1677
+ }
1678
+
1679
+ clearFileCallData() {
1680
+ this.fileCallData.clear();
1681
+ this.callGraph = null;
1682
+ }
1683
+
1684
+ removeFileCallData(file) {
1685
+ this.fileCallData.delete(file);
1686
+ this.callGraph = null;
1687
+ }
1688
+
1689
+ async rebuildCallGraph() {
1690
+ if (this._callGraphBuild) return this._callGraphBuild;
1691
+
1692
+ this._callGraphBuild = (async () => {
1693
+ try {
1694
+ const { buildCallGraph } = await import('./call-graph.js');
1695
+ this.callGraph = buildCallGraph(this.fileCallData);
1696
+ if (this.config.verbose && this.callGraph) {
1697
+ console.info(
1698
+ `[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`
1699
+ );
1700
+ }
1701
+ } catch (err) {
1702
+ console.error(`[CallGraph] Failed to build: ${err.message}`);
1703
+ this.callGraph = null;
1704
+ } finally {
1705
+ this._callGraphBuild = null;
1706
+ }
1707
+ })();
1708
+
1709
+ return this._callGraphBuild;
1710
+ }
1711
+
1712
+ async getRelatedFiles(symbols) {
1713
+ if (!this.config.callGraphEnabled || symbols.length === 0) return new Map();
1714
+ if (!this.callGraph && this.fileCallData.size > 0) await this.rebuildCallGraph();
1715
+ if (!this.callGraph) return new Map();
1716
+
1717
+ const { getRelatedFiles } = await import('./call-graph.js');
1718
+ return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
1719
+ }
1720
+
1721
+ getCallGraphStats() {
1722
+ return {
1723
+ enabled: this.config.callGraphEnabled ?? false,
1724
+ filesWithData: this.fileCallData.size,
1725
+ graphBuilt: this.callGraph !== null,
1726
+ definitions: this.callGraph?.defines.size ?? 0,
1727
+ callTargets: this.callGraph?.calledBy.size ?? 0,
1728
+ };
1729
+ }
1730
+
1731
+ getStoreSize() {
1732
+ if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
1733
+ if (this.binaryStore) return this.binaryStore.length;
1734
+ if (this.sqliteStore) return this.sqliteStore.length();
1735
+ return 0;
1736
+ }
1737
+
1738
+ getVector(index) {
1739
+ return this.getChunkVector(index);
1740
+ }
1741
+
1742
+ getChunk(index) {
1743
+ if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
1744
+ return this.vectorStore[index];
1745
+ }
1746
+ if (this.binaryStore) {
1747
+ const record = this.binaryStore.getRecord(index);
1748
+ if (record) {
1749
+ return {
1750
+ file: record.file,
1751
+ startLine: record.startLine,
1752
+ endLine: record.endLine,
1753
+ vector: this.binaryStore.getVector(index),
1754
+ _index: index,
1755
+ _binaryIndex: index,
1756
+ };
1757
+ }
1758
+ }
1759
+ if (this.sqliteStore) {
1760
+ const record = this.sqliteStore.getRecord(index);
1761
+ if (record) {
1762
+ return {
1763
+ file: record.file,
1764
+ startLine: record.startLine,
1765
+ endLine: record.endLine,
1766
+ vector: this.sqliteStore.getVector(index),
1767
+ _index: index,
1768
+ _sqliteIndex: index,
1769
+ };
1770
+ }
1771
+ }
1772
+ return null;
1773
+ }
1774
+ }