@softerist/heuristic-mcp 3.0.15 → 3.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -104
- package/config.jsonc +173 -173
- package/features/ann-config.js +131 -0
- package/features/clear-cache.js +84 -0
- package/features/find-similar-code.js +291 -0
- package/features/hybrid-search.js +544 -0
- package/features/index-codebase.js +3268 -0
- package/features/lifecycle.js +1189 -0
- package/features/package-version.js +302 -0
- package/features/register.js +408 -0
- package/features/resources.js +156 -0
- package/features/set-workspace.js +265 -0
- package/index.js +96 -96
- package/lib/cache-ops.js +22 -22
- package/lib/cache-utils.js +565 -565
- package/lib/cache.js +1870 -1870
- package/lib/call-graph.js +396 -396
- package/lib/cli.js +1 -1
- package/lib/config.js +517 -517
- package/lib/constants.js +39 -39
- package/lib/embed-query-process.js +7 -7
- package/lib/embedding-process.js +7 -7
- package/lib/embedding-worker.js +299 -299
- package/lib/ignore-patterns.js +316 -316
- package/lib/json-worker.js +14 -14
- package/lib/json-writer.js +337 -337
- package/lib/logging.js +164 -164
- package/lib/memory-logger.js +13 -13
- package/lib/onnx-backend.js +193 -193
- package/lib/project-detector.js +84 -84
- package/lib/server-lifecycle.js +165 -165
- package/lib/settings-editor.js +754 -754
- package/lib/tokenizer.js +256 -256
- package/lib/utils.js +428 -428
- package/lib/vector-store-binary.js +627 -627
- package/lib/vector-store-sqlite.js +95 -95
- package/lib/workspace-env.js +28 -28
- package/mcp_config.json +9 -9
- package/package.json +86 -75
- package/scripts/clear-cache.js +20 -0
- package/scripts/download-model.js +43 -0
- package/scripts/mcp-launcher.js +49 -0
- package/scripts/postinstall.js +12 -0
- package/search-configs.js +36 -36
- package/.prettierrc +0 -7
- package/debug-pids.js +0 -30
- package/eslint.config.js +0 -36
- package/specs/plan.md +0 -23
- package/vitest.config.js +0 -39
package/lib/cache.js
CHANGED
|
@@ -1,1870 +1,1870 @@
|
|
|
1
|
-
import fs from 'fs/promises';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
import { Worker } from 'worker_threads';
|
|
4
|
-
import { StreamingJsonWriter } from './json-writer.js';
|
|
5
|
-
import { BinaryVectorStore } from './vector-store-binary.js';
|
|
6
|
-
import { SqliteVectorStore } from './vector-store-sqlite.js';
|
|
7
|
-
import {
|
|
8
|
-
JSON_WORKER_THRESHOLD_BYTES,
|
|
9
|
-
ANN_DIMENSION_SAMPLE_SIZE,
|
|
10
|
-
HNSWLIB_ERROR_RESET_MS,
|
|
11
|
-
DEFAULT_READER_WAIT_TIMEOUT_MS,
|
|
12
|
-
} from './constants.js';
|
|
13
|
-
|
|
14
|
-
const CACHE_META_VERSION = 1;
|
|
15
|
-
const CACHE_META_FILE = 'meta.json';
|
|
16
|
-
|
|
17
|
-
// ANN meta version stays at 1 for compatibility; maxElements is optional.
|
|
18
|
-
const ANN_META_VERSION = 1;
|
|
19
|
-
const ANN_INDEX_FILE = 'ann-index.bin';
|
|
20
|
-
const ANN_META_FILE = 'ann-meta.json';
|
|
21
|
-
|
|
22
|
-
const CALL_GRAPH_FILE = 'call-graph.json';
|
|
23
|
-
|
|
24
|
-
const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
|
|
25
|
-
|
|
26
|
-
// Yield to event loop to keep IDE/extension host responsive during heavy CPU loops
|
|
27
|
-
const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
|
|
28
|
-
|
|
29
|
-
let hnswlibPromise = null;
|
|
30
|
-
let hnswlibLoadError = null;
|
|
31
|
-
|
|
32
|
-
async function parseJsonInWorker(filePath) {
|
|
33
|
-
return new Promise((resolve, reject) => {
|
|
34
|
-
let settled = false;
|
|
35
|
-
const worker = new Worker(new URL('./json-worker.js', import.meta.url), {
|
|
36
|
-
workerData: { filePath },
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
// finish() provides safe cleanup: removeAllListeners ensures no memory leak from
|
|
40
|
-
// orphaned handlers, terminate() cleans up the worker process. The 'settled' flag
|
|
41
|
-
// prevents double-resolution if multiple events fire before cleanup completes.
|
|
42
|
-
const finish = (handler, value) => {
|
|
43
|
-
if (settled) return;
|
|
44
|
-
settled = true;
|
|
45
|
-
worker.removeAllListeners();
|
|
46
|
-
const termination = worker.terminate?.();
|
|
47
|
-
if (termination && typeof termination.catch === 'function') termination.catch(() => null);
|
|
48
|
-
handler(value);
|
|
49
|
-
};
|
|
50
|
-
|
|
51
|
-
worker.once('message', (msg) => {
|
|
52
|
-
if (msg?.ok) {
|
|
53
|
-
finish(resolve, msg.data);
|
|
54
|
-
} else {
|
|
55
|
-
const err = new Error(msg?.error || 'JSON worker failed');
|
|
56
|
-
console.warn(`[Cache] ${err.message}`);
|
|
57
|
-
finish(reject, err);
|
|
58
|
-
}
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
worker.once('error', (err) => {
|
|
62
|
-
console.error(`[Cache] JSON worker error: ${err.message}`);
|
|
63
|
-
finish(reject, err);
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
worker.once('exit', (code) => {
|
|
67
|
-
if (code !== 0) {
|
|
68
|
-
const err = new Error(`JSON worker exited with code ${code}`);
|
|
69
|
-
console.error(`[Cache] ${err.message}`);
|
|
70
|
-
finish(reject, err);
|
|
71
|
-
return;
|
|
72
|
-
}
|
|
73
|
-
if (!settled) {
|
|
74
|
-
const err = new Error('JSON worker exited without sending a response');
|
|
75
|
-
console.error(`[Cache] ${err.message}`);
|
|
76
|
-
finish(reject, err);
|
|
77
|
-
}
|
|
78
|
-
});
|
|
79
|
-
});
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
async function readJsonFile(
|
|
83
|
-
filePath,
|
|
84
|
-
{ workerThresholdBytes = JSON_WORKER_THRESHOLD_BYTES } = {}
|
|
85
|
-
) {
|
|
86
|
-
let stats;
|
|
87
|
-
try {
|
|
88
|
-
stats = await fs.stat(filePath);
|
|
89
|
-
} catch {
|
|
90
|
-
// File doesn't exist - this is expected and not an error condition
|
|
91
|
-
return null;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
try {
|
|
95
|
-
const canUseWorker = typeof Worker === 'function';
|
|
96
|
-
const useWorker =
|
|
97
|
-
canUseWorker && stats && typeof stats.size === 'number'
|
|
98
|
-
? stats.size >= workerThresholdBytes
|
|
99
|
-
: false;
|
|
100
|
-
|
|
101
|
-
if (useWorker) return await parseJsonInWorker(filePath);
|
|
102
|
-
|
|
103
|
-
const data = await fs.readFile(filePath, 'utf-8');
|
|
104
|
-
return JSON.parse(data);
|
|
105
|
-
} catch (error) {
|
|
106
|
-
console.warn(`[Cache] Failed to parse ${path.basename(filePath)}: ${error.message}`);
|
|
107
|
-
return null;
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
async function loadHnswlib() {
|
|
112
|
-
// Reset error state after configured timeout to allow retry
|
|
113
|
-
if (hnswlibLoadError) {
|
|
114
|
-
if (hnswlibLoadError._timestamp && Date.now() - hnswlibLoadError._timestamp > HNSWLIB_ERROR_RESET_MS) {
|
|
115
|
-
hnswlibLoadError = null;
|
|
116
|
-
hnswlibPromise = null;
|
|
117
|
-
} else {
|
|
118
|
-
return null;
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
if (!hnswlibPromise) {
|
|
123
|
-
hnswlibPromise = import('hnswlib-node')
|
|
124
|
-
.then((mod) => {
|
|
125
|
-
const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
|
|
126
|
-
if (!HierarchicalNSW) throw new Error('HierarchicalNSW export not found');
|
|
127
|
-
return HierarchicalNSW;
|
|
128
|
-
})
|
|
129
|
-
.catch((err) => {
|
|
130
|
-
// Store timestamp to allow later retry
|
|
131
|
-
err._timestamp = Date.now();
|
|
132
|
-
hnswlibLoadError = err;
|
|
133
|
-
console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
|
|
134
|
-
return null;
|
|
135
|
-
});
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
return hnswlibPromise;
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
function initHnswIndex(index, maxElements, m, efConstruction) {
|
|
142
|
-
try {
|
|
143
|
-
index.initIndex(maxElements, m, efConstruction, 100);
|
|
144
|
-
return;
|
|
145
|
-
} catch (err) {
|
|
146
|
-
console.warn(`[ANN] Standard init failed: ${err.message}`);
|
|
147
|
-
}
|
|
148
|
-
try {
|
|
149
|
-
index.initIndex(maxElements, m, efConstruction);
|
|
150
|
-
return;
|
|
151
|
-
} catch (err) {
|
|
152
|
-
console.warn(`[ANN] Legacy init failed: ${err.message}`);
|
|
153
|
-
}
|
|
154
|
-
index.initIndex(maxElements);
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
function readHnswIndex(index, filePath, maxElements) {
|
|
158
|
-
try {
|
|
159
|
-
index.readIndexSync(filePath, maxElements);
|
|
160
|
-
return true;
|
|
161
|
-
} catch {
|
|
162
|
-
/* ignore */
|
|
163
|
-
}
|
|
164
|
-
try {
|
|
165
|
-
index.readIndexSync(filePath);
|
|
166
|
-
return true;
|
|
167
|
-
} catch (err) {
|
|
168
|
-
console.warn(`[ANN] Read index failed: ${err.message}`);
|
|
169
|
-
}
|
|
170
|
-
return false;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
function normalizeLabels(result) {
|
|
174
|
-
if (!result) return [];
|
|
175
|
-
if (Array.isArray(result)) return result;
|
|
176
|
-
const labels = result.labels || result.neighbors || result.indices;
|
|
177
|
-
return labels ? Array.from(labels) : [];
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
function ensureFloat32(vector) {
|
|
181
|
-
if (!vector) return null;
|
|
182
|
-
if (vector instanceof Float32Array) return vector;
|
|
183
|
-
|
|
184
|
-
// Convert values (do NOT reinterpret bytes)
|
|
185
|
-
let result;
|
|
186
|
-
if (ArrayBuffer.isView(vector)) {
|
|
187
|
-
result = Float32Array.from(vector);
|
|
188
|
-
} else {
|
|
189
|
-
result = new Float32Array(vector);
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// In test environment, validate that all values are finite to catch corruption early
|
|
193
|
-
if (IS_TEST_ENV && result.length > 0) {
|
|
194
|
-
for (let i = 0; i < result.length; i++) {
|
|
195
|
-
if (!Number.isFinite(result[i])) {
|
|
196
|
-
throw new Error(
|
|
197
|
-
`Invalid vector value at index ${i}: ${result[i]}. ` +
|
|
198
|
-
'Vector contains NaN or Infinity, which will corrupt search results.'
|
|
199
|
-
);
|
|
200
|
-
}
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
return result;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
function normalizeChunkVector(chunk) {
|
|
208
|
-
if (chunk?.vector) chunk.vector = ensureFloat32(chunk.vector);
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
function assignChunkIndices(store) {
|
|
212
|
-
if (!Array.isArray(store)) return;
|
|
213
|
-
for (let i = 0; i < store.length; i += 1) {
|
|
214
|
-
const chunk = store[i];
|
|
215
|
-
if (chunk) {
|
|
216
|
-
chunk._index = i;
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
|
|
221
|
-
function normalizeFileHashEntry(entry) {
|
|
222
|
-
if (!entry) return null;
|
|
223
|
-
if (typeof entry === 'string') return { hash: entry };
|
|
224
|
-
if (typeof entry !== 'object') return null;
|
|
225
|
-
if (typeof entry.hash !== 'string') return null;
|
|
226
|
-
const normalized = { hash: entry.hash };
|
|
227
|
-
if (Number.isFinite(entry.mtimeMs)) normalized.mtimeMs = entry.mtimeMs;
|
|
228
|
-
if (Number.isFinite(entry.size)) normalized.size = entry.size;
|
|
229
|
-
return normalized;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
function serializeFileHashEntry(entry) {
|
|
233
|
-
if (!entry) return null;
|
|
234
|
-
if (typeof entry === 'string') return { hash: entry };
|
|
235
|
-
if (typeof entry !== 'object') return null;
|
|
236
|
-
if (typeof entry.hash !== 'string') return null;
|
|
237
|
-
const serialized = { hash: entry.hash };
|
|
238
|
-
if (Number.isFinite(entry.mtimeMs)) serialized.mtimeMs = entry.mtimeMs;
|
|
239
|
-
if (Number.isFinite(entry.size)) serialized.size = entry.size;
|
|
240
|
-
return serialized;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
function computeAnnCapacity(total, config) {
|
|
244
|
-
const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
|
|
245
|
-
const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
|
|
246
|
-
const byFactor = Math.ceil(total * factor);
|
|
247
|
-
const byExtra = total + extra;
|
|
248
|
-
return Math.max(total, byFactor, byExtra);
|
|
249
|
-
}
|
|
250
|
-
|
|
251
|
-
export class EmbeddingsCache {
|
|
252
|
-
constructor(config) {
|
|
253
|
-
this.config = config;
|
|
254
|
-
|
|
255
|
-
this.vectorStore = [];
|
|
256
|
-
this.fileHashes = new Map();
|
|
257
|
-
this.isSaving = false;
|
|
258
|
-
this.lastIndexDurationMs = null;
|
|
259
|
-
this.lastIndexStats = null;
|
|
260
|
-
|
|
261
|
-
this.cacheMeta = {
|
|
262
|
-
version: CACHE_META_VERSION,
|
|
263
|
-
embeddingModel: config.embeddingModel,
|
|
264
|
-
embeddingDimension: config.embeddingDimension ?? null,
|
|
265
|
-
};
|
|
266
|
-
|
|
267
|
-
// Save coalescing / debounce (serialized via saveQueue)
|
|
268
|
-
this.saveQueue = Promise.resolve();
|
|
269
|
-
this._saveTimer = null;
|
|
270
|
-
this._saveRequested = false;
|
|
271
|
-
this._savePromise = null;
|
|
272
|
-
|
|
273
|
-
// ANN state
|
|
274
|
-
this.annIndex = null;
|
|
275
|
-
this.annMeta = null;
|
|
276
|
-
this.annDirty = false; // needs rebuild
|
|
277
|
-
this.annPersistDirty = false; // in-memory differs from disk
|
|
278
|
-
this.annLoading = null;
|
|
279
|
-
this.annVectorCache = null;
|
|
280
|
-
|
|
281
|
-
// Call graph
|
|
282
|
-
this.fileCallData = new Map();
|
|
283
|
-
this.callGraph = null;
|
|
284
|
-
this._callGraphBuild = null;
|
|
285
|
-
|
|
286
|
-
// Binary vector store (optional)
|
|
287
|
-
this.binaryStore = null;
|
|
288
|
-
|
|
289
|
-
// SQLite vector store (optional)
|
|
290
|
-
this.sqliteStore = null;
|
|
291
|
-
|
|
292
|
-
// Error tracking
|
|
293
|
-
this.initErrors = [];
|
|
294
|
-
|
|
295
|
-
// Concurrency hooks (read tracking)
|
|
296
|
-
this.activeReads = 0;
|
|
297
|
-
this._readWaiters = [];
|
|
298
|
-
this._saveInProgress = false; // Prevents new reads during save
|
|
299
|
-
|
|
300
|
-
// Lazy reload support after dropping in-memory vectors
|
|
301
|
-
this._clearedAfterIndex = false;
|
|
302
|
-
this._loadPromise = null;
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
/**
|
|
306
|
-
* Add an initialization error with consistent structure.
|
|
307
|
-
* @param {string} stage - The stage where the error occurred (e.g., 'loadHnswlib', 'ensureAnnIndex')
|
|
308
|
-
* @param {Error|string} error - The error object or message
|
|
309
|
-
*/
|
|
310
|
-
addInitError(stage, error) {
|
|
311
|
-
this.initErrors.push({
|
|
312
|
-
stage,
|
|
313
|
-
message: error instanceof Error ? error.message : String(error),
|
|
314
|
-
stack: error instanceof Error ? error.stack : null,
|
|
315
|
-
timestamp: Date.now(),
|
|
316
|
-
});
|
|
317
|
-
}
|
|
318
|
-
|
|
319
|
-
clearInMemoryState() {
|
|
320
|
-
this.vectorStore = [];
|
|
321
|
-
this.fileHashes.clear();
|
|
322
|
-
this.invalidateAnnIndex();
|
|
323
|
-
this.fileCallData.clear();
|
|
324
|
-
this.callGraph = null;
|
|
325
|
-
this.initErrors = [];
|
|
326
|
-
if (this.binaryStore) {
|
|
327
|
-
try {
|
|
328
|
-
this.binaryStore.close?.();
|
|
329
|
-
} catch {
|
|
330
|
-
// ignore close errors
|
|
331
|
-
}
|
|
332
|
-
this.binaryStore = null;
|
|
333
|
-
}
|
|
334
|
-
if (this.sqliteStore) {
|
|
335
|
-
try {
|
|
336
|
-
this.sqliteStore.close?.();
|
|
337
|
-
} catch {
|
|
338
|
-
// ignore close errors
|
|
339
|
-
}
|
|
340
|
-
this.sqliteStore = null;
|
|
341
|
-
}
|
|
342
|
-
}
|
|
343
|
-
|
|
344
|
-
async close() {
|
|
345
|
-
if (this.binaryStore) {
|
|
346
|
-
await this.binaryStore.close();
|
|
347
|
-
this.binaryStore = null;
|
|
348
|
-
}
|
|
349
|
-
if (this.sqliteStore) {
|
|
350
|
-
try {
|
|
351
|
-
this.sqliteStore.close();
|
|
352
|
-
} catch {
|
|
353
|
-
// SQLite may already be closed or in error state
|
|
354
|
-
}
|
|
355
|
-
this.sqliteStore = null;
|
|
356
|
-
}
|
|
357
|
-
}
|
|
358
|
-
|
|
359
|
-
async ensureLoaded({ preferDisk = false } = {}) {
|
|
360
|
-
if (!this.config.enableCache) return;
|
|
361
|
-
if (!this._clearedAfterIndex) return;
|
|
362
|
-
if (this._loadPromise) return this._loadPromise;
|
|
363
|
-
|
|
364
|
-
this._loadPromise = (async () => {
|
|
365
|
-
if (preferDisk && this.config.verbose) {
|
|
366
|
-
console.info('[Cache] ensureLoaded: forcing disk vector mode for incremental low-RAM reload');
|
|
367
|
-
}
|
|
368
|
-
await this.load({
|
|
369
|
-
forceVectorLoadMode: preferDisk ? 'disk' : undefined,
|
|
370
|
-
});
|
|
371
|
-
this._clearedAfterIndex = false;
|
|
372
|
-
})().finally(() => {
|
|
373
|
-
this._loadPromise = null;
|
|
374
|
-
});
|
|
375
|
-
|
|
376
|
-
return this._loadPromise;
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
async dropInMemoryVectors() {
|
|
380
|
-
if (!this.config.enableCache) return;
|
|
381
|
-
|
|
382
|
-
if (this.activeReads > 0) {
|
|
383
|
-
await this.waitForReaders();
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
this.vectorStore = [];
|
|
387
|
-
this.annVectorCache = null;
|
|
388
|
-
this.annIndex = null;
|
|
389
|
-
this.annMeta = null;
|
|
390
|
-
this.annDirty = true;
|
|
391
|
-
this.annPersistDirty = false;
|
|
392
|
-
|
|
393
|
-
if (this.binaryStore) {
|
|
394
|
-
try {
|
|
395
|
-
await this.binaryStore.close();
|
|
396
|
-
} catch {
|
|
397
|
-
// ignore close errors
|
|
398
|
-
}
|
|
399
|
-
this.binaryStore = null;
|
|
400
|
-
}
|
|
401
|
-
|
|
402
|
-
if (this.sqliteStore) {
|
|
403
|
-
try {
|
|
404
|
-
this.sqliteStore.close();
|
|
405
|
-
} catch {
|
|
406
|
-
// ignore close errors
|
|
407
|
-
}
|
|
408
|
-
this.sqliteStore = null;
|
|
409
|
-
}
|
|
410
|
-
|
|
411
|
-
this._clearedAfterIndex = true;
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
// -------------------- Concurrency Hooks --------------------
|
|
415
|
-
|
|
416
|
-
startRead() {
|
|
417
|
-
// Prevent new reads while save is in progress to avoid race conditions
|
|
418
|
-
if (this._saveInProgress) {
|
|
419
|
-
throw new Error('Cache save in progress, try again shortly');
|
|
420
|
-
}
|
|
421
|
-
this.activeReads++;
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
endRead() {
|
|
425
|
-
if (this.activeReads > 0) {
|
|
426
|
-
this.activeReads--;
|
|
427
|
-
if (this.activeReads === 0 && this._readWaiters.length > 0) {
|
|
428
|
-
const waiters = this._readWaiters;
|
|
429
|
-
this._readWaiters = [];
|
|
430
|
-
for (const resolve of waiters) {
|
|
431
|
-
resolve();
|
|
432
|
-
}
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
}
|
|
436
|
-
|
|
437
|
-
async waitForReaders() {
|
|
438
|
-
if (this.activeReads === 0) return;
|
|
439
|
-
await new Promise((resolve) => {
|
|
440
|
-
this._readWaiters.push(resolve);
|
|
441
|
-
});
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
async waitForReadersWithTimeout(timeoutMs = 5000) {
|
|
445
|
-
if (this.activeReads === 0) return true;
|
|
446
|
-
let timedOut = false;
|
|
447
|
-
let resolved = false;
|
|
448
|
-
let waiterResolve;
|
|
449
|
-
const waiterPromise = new Promise((resolve) => {
|
|
450
|
-
waiterResolve = () => {
|
|
451
|
-
if (!resolved) {
|
|
452
|
-
resolved = true;
|
|
453
|
-
resolve();
|
|
454
|
-
}
|
|
455
|
-
};
|
|
456
|
-
this._readWaiters.push(waiterResolve);
|
|
457
|
-
});
|
|
458
|
-
await Promise.race([
|
|
459
|
-
waiterPromise,
|
|
460
|
-
new Promise((resolve) => {
|
|
461
|
-
setTimeout(() => {
|
|
462
|
-
if (!resolved) {
|
|
463
|
-
resolved = true;
|
|
464
|
-
timedOut = true;
|
|
465
|
-
// Remove waiter from array to prevent late invocation after timeout
|
|
466
|
-
const idx = this._readWaiters.indexOf(waiterResolve);
|
|
467
|
-
if (idx >= 0) this._readWaiters.splice(idx, 1);
|
|
468
|
-
resolve();
|
|
469
|
-
}
|
|
470
|
-
}, timeoutMs);
|
|
471
|
-
}),
|
|
472
|
-
]);
|
|
473
|
-
if (timedOut) {
|
|
474
|
-
// Always warn (not just verbose) since proceeding with active readers is risky
|
|
475
|
-
console.warn(
|
|
476
|
-
`[Cache] Timed out waiting for ${this.activeReads} active reader(s); proceeding with save anyway. ` +
|
|
477
|
-
'This may cause data inconsistency if readers access the store during write.'
|
|
478
|
-
);
|
|
479
|
-
}
|
|
480
|
-
return !timedOut;
|
|
481
|
-
}
|
|
482
|
-
|
|
483
|
-
// -------------------- Reset --------------------
|
|
484
|
-
|
|
485
|
-
/**
|
|
486
|
-
* Resets the cache state (clears vectors, hashes, and call graph).
|
|
487
|
-
* Used for forced reindexing.
|
|
488
|
-
*/
|
|
489
|
-
async reset() {
|
|
490
|
-
this.vectorStore = [];
|
|
491
|
-
if (this.binaryStore) {
|
|
492
|
-
try {
|
|
493
|
-
await this.binaryStore.close();
|
|
494
|
-
} catch {
|
|
495
|
-
// ignore close errors
|
|
496
|
-
}
|
|
497
|
-
this.binaryStore = null;
|
|
498
|
-
}
|
|
499
|
-
if (this.sqliteStore) {
|
|
500
|
-
try {
|
|
501
|
-
this.sqliteStore.close();
|
|
502
|
-
} catch {
|
|
503
|
-
// ignore close errors
|
|
504
|
-
}
|
|
505
|
-
this.sqliteStore = null;
|
|
506
|
-
}
|
|
507
|
-
this.fileHashes.clear();
|
|
508
|
-
this.invalidateAnnIndex();
|
|
509
|
-
await this.clearCallGraphData({ removeFile: true });
|
|
510
|
-
this.initErrors = [];
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
// -------------------- Load --------------------
|
|
514
|
-
|
|
515
|
-
async load({ forceVectorLoadMode } = {}) {
|
|
516
|
-
if (!this.config.enableCache) return;
|
|
517
|
-
|
|
518
|
-
try {
|
|
519
|
-
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
520
|
-
|
|
521
|
-
const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
|
|
522
|
-
const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
|
|
523
|
-
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
524
|
-
|
|
525
|
-
const workerThresholdBytes =
|
|
526
|
-
Number.isInteger(this.config.jsonWorkerThresholdBytes) &&
|
|
527
|
-
this.config.jsonWorkerThresholdBytes > 0
|
|
528
|
-
? this.config.jsonWorkerThresholdBytes
|
|
529
|
-
: JSON_WORKER_THRESHOLD_BYTES;
|
|
530
|
-
|
|
531
|
-
const useBinary = this.config.vectorStoreFormat === 'binary';
|
|
532
|
-
const useSqlite = this.config.vectorStoreFormat === 'sqlite';
|
|
533
|
-
|
|
534
|
-
const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(
|
|
535
|
-
this.config.cacheDirectory
|
|
536
|
-
);
|
|
537
|
-
const pathExists = async (targetPath) => {
|
|
538
|
-
try {
|
|
539
|
-
await fs.access(targetPath);
|
|
540
|
-
return true;
|
|
541
|
-
} catch {
|
|
542
|
-
return false;
|
|
543
|
-
}
|
|
544
|
-
};
|
|
545
|
-
|
|
546
|
-
// In tests, read cache files eagerly to exercise worker paths.
|
|
547
|
-
let cacheData = null;
|
|
548
|
-
let hashData = null;
|
|
549
|
-
let prefetched = false;
|
|
550
|
-
if (IS_TEST_ENV) {
|
|
551
|
-
prefetched = true;
|
|
552
|
-
const cachePromise = useBinary || useSqlite
|
|
553
|
-
? Promise.resolve(null)
|
|
554
|
-
: readJsonFile(cacheFile, { workerThresholdBytes });
|
|
555
|
-
[cacheData, hashData] = await Promise.all([
|
|
556
|
-
cachePromise,
|
|
557
|
-
readJsonFile(hashFile, { workerThresholdBytes }),
|
|
558
|
-
]);
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
// Read meta first to avoid parsing huge cache files when invalid
|
|
562
|
-
const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
|
|
563
|
-
if (!metaData) {
|
|
564
|
-
console.warn('[Cache] Missing cache metadata, ignoring cache');
|
|
565
|
-
this.clearInMemoryState();
|
|
566
|
-
return;
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
let meta;
|
|
570
|
-
try {
|
|
571
|
-
meta = JSON.parse(metaData);
|
|
572
|
-
} catch {
|
|
573
|
-
console.warn('[Cache] Invalid cache metadata, ignoring cache');
|
|
574
|
-
this.clearInMemoryState();
|
|
575
|
-
return;
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
if (meta?.version !== CACHE_META_VERSION) {
|
|
579
|
-
console.warn(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
|
|
580
|
-
this.clearInMemoryState();
|
|
581
|
-
return;
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
585
|
-
console.warn(
|
|
586
|
-
`[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`
|
|
587
|
-
);
|
|
588
|
-
this.clearInMemoryState();
|
|
589
|
-
return;
|
|
590
|
-
}
|
|
591
|
-
const expectedDimension = this.config.embeddingDimension ?? null;
|
|
592
|
-
const metaDimension = meta?.embeddingDimension ?? null;
|
|
593
|
-
if (metaDimension !== expectedDimension) {
|
|
594
|
-
console.warn(
|
|
595
|
-
`[Cache] Embedding dimension changed, ignoring cache (${metaDimension} -> ${expectedDimension})`
|
|
596
|
-
);
|
|
597
|
-
this.clearInMemoryState();
|
|
598
|
-
return;
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
if (!prefetched) {
|
|
602
|
-
[cacheData, hashData] = await Promise.all([
|
|
603
|
-
useBinary || useSqlite ? Promise.resolve(null) : readJsonFile(cacheFile, { workerThresholdBytes }),
|
|
604
|
-
readJsonFile(hashFile, { workerThresholdBytes }),
|
|
605
|
-
]);
|
|
606
|
-
}
|
|
607
|
-
|
|
608
|
-
this.cacheMeta = meta;
|
|
609
|
-
|
|
610
|
-
const [binaryFilesPresent, jsonCachePresent] = await Promise.all([
|
|
611
|
-
(async () => {
|
|
612
|
-
const [vectorsOk, recordsOk, contentOk, filesOk] = await Promise.all([
|
|
613
|
-
pathExists(vectorsPath),
|
|
614
|
-
pathExists(recordsPath),
|
|
615
|
-
pathExists(contentPath),
|
|
616
|
-
pathExists(filesPath),
|
|
617
|
-
]);
|
|
618
|
-
return vectorsOk && recordsOk && contentOk && filesOk;
|
|
619
|
-
})(),
|
|
620
|
-
pathExists(cacheFile),
|
|
621
|
-
]);
|
|
622
|
-
|
|
623
|
-
if (useBinary && !binaryFilesPresent) {
|
|
624
|
-
if (jsonCachePresent) {
|
|
625
|
-
console.warn(
|
|
626
|
-
'[Cache] vectorStoreFormat=binary but binary cache files are missing; embeddings.json exists. If you switched formats, reindex or set vectorStoreFormat=json.'
|
|
627
|
-
);
|
|
628
|
-
} else {
|
|
629
|
-
console.warn(
|
|
630
|
-
'[Cache] vectorStoreFormat=binary but binary cache files are missing. Reindex to regenerate the cache.'
|
|
631
|
-
);
|
|
632
|
-
}
|
|
633
|
-
} else if (!useBinary && !useSqlite && !jsonCachePresent) {
|
|
634
|
-
if (binaryFilesPresent) {
|
|
635
|
-
console.warn(
|
|
636
|
-
'[Cache] vectorStoreFormat=json but binary cache files exist. If you switched formats, set vectorStoreFormat=binary or reindex.'
|
|
637
|
-
);
|
|
638
|
-
} else {
|
|
639
|
-
console.warn(
|
|
640
|
-
'[Cache] vectorStoreFormat=json but embeddings.json is missing. Reindex to regenerate the cache.'
|
|
641
|
-
);
|
|
642
|
-
}
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
const configuredVectorLoadMode =
|
|
646
|
-
typeof this.config.vectorStoreLoadMode === 'string'
|
|
647
|
-
? this.config.vectorStoreLoadMode.toLowerCase()
|
|
648
|
-
: 'memory';
|
|
649
|
-
const effectiveVectorLoadMode =
|
|
650
|
-
forceVectorLoadMode === 'disk' || forceVectorLoadMode === 'memory'
|
|
651
|
-
? forceVectorLoadMode
|
|
652
|
-
: configuredVectorLoadMode;
|
|
653
|
-
|
|
654
|
-
if (useBinary) {
|
|
655
|
-
try {
|
|
656
|
-
this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
|
|
657
|
-
contentCacheEntries: this.config.contentCacheEntries,
|
|
658
|
-
vectorCacheEntries: this.config.vectorCacheEntries,
|
|
659
|
-
vectorLoadMode: effectiveVectorLoadMode,
|
|
660
|
-
});
|
|
661
|
-
cacheData = await this.binaryStore.toChunkViews({
|
|
662
|
-
includeContent: this.config.vectorStoreContentMode === 'inline',
|
|
663
|
-
includeVector: effectiveVectorLoadMode !== 'disk',
|
|
664
|
-
});
|
|
665
|
-
} catch (err) {
|
|
666
|
-
this.binaryStore = null;
|
|
667
|
-
console.warn(`[Cache] Failed to load binary vector store: ${err.message}`);
|
|
668
|
-
}
|
|
669
|
-
}
|
|
670
|
-
|
|
671
|
-
// SQLite store loading
|
|
672
|
-
if (useSqlite) {
|
|
673
|
-
try {
|
|
674
|
-
this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
|
|
675
|
-
if (this.sqliteStore) {
|
|
676
|
-
cacheData = this.sqliteStore.toChunkViews({
|
|
677
|
-
includeContent: this.config.vectorStoreContentMode === 'inline',
|
|
678
|
-
includeVector: effectiveVectorLoadMode !== 'disk',
|
|
679
|
-
});
|
|
680
|
-
} else {
|
|
681
|
-
// SQLite file missing, need reindex
|
|
682
|
-
console.warn('[Cache] vectorStoreFormat=sqlite but vectors.sqlite is missing. Reindex to regenerate the cache.');
|
|
683
|
-
}
|
|
684
|
-
} catch (err) {
|
|
685
|
-
this.sqliteStore = null;
|
|
686
|
-
console.warn(`[Cache] Failed to load SQLite vector store: ${err.message}`);
|
|
687
|
-
}
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
if (!cacheData) {
|
|
691
|
-
cacheData = await readJsonFile(cacheFile, { workerThresholdBytes });
|
|
692
|
-
}
|
|
693
|
-
|
|
694
|
-
const hasCacheData = Array.isArray(cacheData);
|
|
695
|
-
const hasHashData = hashData && typeof hashData === 'object';
|
|
696
|
-
|
|
697
|
-
if (hasCacheData) {
|
|
698
|
-
const allowedExtensions = new Set(
|
|
699
|
-
(this.config.fileExtensions || []).map((ext) => `.${ext}`)
|
|
700
|
-
);
|
|
701
|
-
const allowedFileNames = new Set(this.config.fileNames || []);
|
|
702
|
-
const applyExtensionFilter = !this.binaryStore;
|
|
703
|
-
const shouldKeepFile = (filePath) => {
|
|
704
|
-
const ext = path.extname(filePath);
|
|
705
|
-
if (allowedExtensions.has(ext)) return true;
|
|
706
|
-
return allowedFileNames.has(path.basename(filePath));
|
|
707
|
-
};
|
|
708
|
-
|
|
709
|
-
const rawHashes = hasHashData ? new Map(Object.entries(hashData)) : new Map();
|
|
710
|
-
this.vectorStore = [];
|
|
711
|
-
this.fileHashes.clear();
|
|
712
|
-
|
|
713
|
-
// Single-pass filter + normalization
|
|
714
|
-
for (const chunk of cacheData) {
|
|
715
|
-
if (applyExtensionFilter) {
|
|
716
|
-
if (!shouldKeepFile(chunk.file)) continue;
|
|
717
|
-
}
|
|
718
|
-
normalizeChunkVector(chunk);
|
|
719
|
-
this.vectorStore.push(chunk);
|
|
720
|
-
}
|
|
721
|
-
const filteredCount = cacheData.length - this.vectorStore.length;
|
|
722
|
-
if (filteredCount > 0 && this.config.verbose) {
|
|
723
|
-
console.info(`[Cache] Filtered ${filteredCount} outdated cache entries`);
|
|
724
|
-
}
|
|
725
|
-
|
|
726
|
-
if (hasHashData) {
|
|
727
|
-
// Only keep hashes for allowed extensions
|
|
728
|
-
for (const [file, entry] of rawHashes) {
|
|
729
|
-
if (!applyExtensionFilter || shouldKeepFile(file)) {
|
|
730
|
-
const normalized = normalizeFileHashEntry(entry);
|
|
731
|
-
if (normalized) {
|
|
732
|
-
this.fileHashes.set(file, normalized);
|
|
733
|
-
}
|
|
734
|
-
}
|
|
735
|
-
}
|
|
736
|
-
} else {
|
|
737
|
-
console.warn(
|
|
738
|
-
'[Cache] Missing file-hashes.json; loaded embeddings but hashes were cleared'
|
|
739
|
-
);
|
|
740
|
-
}
|
|
741
|
-
|
|
742
|
-
assignChunkIndices(this.vectorStore);
|
|
743
|
-
|
|
744
|
-
if (this.config.verbose) {
|
|
745
|
-
console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
|
|
746
|
-
}
|
|
747
|
-
|
|
748
|
-
// ANN index is lazily loaded/built on first query
|
|
749
|
-
this.annDirty = false;
|
|
750
|
-
this.annPersistDirty = false;
|
|
751
|
-
this.annIndex = null;
|
|
752
|
-
this.annMeta = null;
|
|
753
|
-
this.annVectorCache = null;
|
|
754
|
-
} else if (cacheData) {
|
|
755
|
-
console.warn('[Cache] Cache data is not an array; ignoring cached embeddings');
|
|
756
|
-
} else if (hasHashData) {
|
|
757
|
-
console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
|
|
758
|
-
}
|
|
759
|
-
|
|
760
|
-
// Load call-graph data if it exists
|
|
761
|
-
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
762
|
-
try {
|
|
763
|
-
const callGraphData = await fs.readFile(callGraphFile, 'utf8');
|
|
764
|
-
const parsed = JSON.parse(callGraphData);
|
|
765
|
-
this.fileCallData = new Map(Object.entries(parsed));
|
|
766
|
-
if (this.config.verbose) {
|
|
767
|
-
console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
|
|
768
|
-
}
|
|
769
|
-
} catch {
|
|
770
|
-
// no cache yet, OK
|
|
771
|
-
}
|
|
772
|
-
} catch (error) {
|
|
773
|
-
console.warn('[Cache] Failed to load cache:', error.message);
|
|
774
|
-
this.clearInMemoryState();
|
|
775
|
-
}
|
|
776
|
-
}
|
|
777
|
-
|
|
778
|
-
// -------------------- Save (debounced + serialized) --------------------
|
|
779
|
-
|
|
780
|
-
save() {
|
|
781
|
-
if (!this.config.enableCache) return Promise.resolve();
|
|
782
|
-
|
|
783
|
-
this._saveRequested = true;
|
|
784
|
-
|
|
785
|
-
if (this._saveTimer) return this._savePromise ?? Promise.resolve();
|
|
786
|
-
|
|
787
|
-
const debounceMs = Number.isInteger(this.config.saveDebounceMs)
|
|
788
|
-
? this.config.saveDebounceMs
|
|
789
|
-
: 250;
|
|
790
|
-
|
|
791
|
-
this._savePromise = new Promise((resolve, reject) => {
|
|
792
|
-
this._saveTimer = setTimeout(() => {
|
|
793
|
-
this._saveTimer = null;
|
|
794
|
-
|
|
795
|
-
this.saveQueue = this.saveQueue
|
|
796
|
-
.then(async () => {
|
|
797
|
-
while (this._saveRequested) {
|
|
798
|
-
this._saveRequested = false;
|
|
799
|
-
await this.performSave();
|
|
800
|
-
}
|
|
801
|
-
})
|
|
802
|
-
.then(resolve, reject)
|
|
803
|
-
.finally(() => {
|
|
804
|
-
this._savePromise = null;
|
|
805
|
-
});
|
|
806
|
-
}, debounceMs);
|
|
807
|
-
});
|
|
808
|
-
|
|
809
|
-
return this._savePromise;
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
async performSave() {
|
|
813
|
-
// Block new reads from starting during save operation
|
|
814
|
-
this._saveInProgress = true;
|
|
815
|
-
|
|
816
|
-
// Wait for active readers before modifying state to prevent data corruption
|
|
817
|
-
if (this.activeReads > 0) {
|
|
818
|
-
const timeoutMs = this.config.saveReaderWaitTimeoutMs ?? DEFAULT_READER_WAIT_TIMEOUT_MS;
|
|
819
|
-
const allReadersFinished = await this.waitForReadersWithTimeout(timeoutMs);
|
|
820
|
-
if (!allReadersFinished && !this.config.forceSaveWithActiveReaders) {
|
|
821
|
-
console.warn('[Cache] Aborting save - active readers still present after timeout');
|
|
822
|
-
this._saveInProgress = false; // Reset flag on early return
|
|
823
|
-
return; // Abort instead of risking data corruption
|
|
824
|
-
}
|
|
825
|
-
}
|
|
826
|
-
|
|
827
|
-
this.isSaving = true;
|
|
828
|
-
|
|
829
|
-
try {
|
|
830
|
-
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
831
|
-
|
|
832
|
-
const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
|
|
833
|
-
const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
|
|
834
|
-
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
835
|
-
|
|
836
|
-
// Snapshot to avoid race conditions during async write.
|
|
837
|
-
// Keep this shallow for binary/sqlite to prevent multi-GB vector materialization.
|
|
838
|
-
const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
|
|
839
|
-
const supportsBackendVectorResolve =
|
|
840
|
-
this.config.vectorStoreFormat === 'binary' || this.config.vectorStoreFormat === 'sqlite';
|
|
841
|
-
const hasMissingVectors = snapshotStore.some(
|
|
842
|
-
(chunk) => chunk && (chunk.vector === undefined || chunk.vector === null)
|
|
843
|
-
);
|
|
844
|
-
const useDiskVectors =
|
|
845
|
-
supportsBackendVectorResolve &&
|
|
846
|
-
(this.config.vectorStoreLoadMode === 'disk' || hasMissingVectors);
|
|
847
|
-
if (hasMissingVectors && !useDiskVectors) {
|
|
848
|
-
throw new Error(
|
|
849
|
-
'Missing vector data for cache write and backend vector resolution is unavailable'
|
|
850
|
-
);
|
|
851
|
-
}
|
|
852
|
-
|
|
853
|
-
this.cacheMeta = {
|
|
854
|
-
version: CACHE_META_VERSION,
|
|
855
|
-
embeddingModel: this.config.embeddingModel,
|
|
856
|
-
embeddingDimension: this.config.embeddingDimension ?? null,
|
|
857
|
-
lastSaveTime: new Date().toISOString(),
|
|
858
|
-
filesIndexed: this.fileHashes.size,
|
|
859
|
-
chunksStored: snapshotStore.length,
|
|
860
|
-
workspace: this.config.searchDirectory || null,
|
|
861
|
-
};
|
|
862
|
-
if (Number.isFinite(this.lastIndexDurationMs) && this.lastIndexDurationMs >= 0) {
|
|
863
|
-
this.cacheMeta.indexDurationMs = Math.round(this.lastIndexDurationMs);
|
|
864
|
-
}
|
|
865
|
-
if (this.lastIndexStats && typeof this.lastIndexStats === 'object') {
|
|
866
|
-
Object.assign(this.cacheMeta, this.lastIndexStats);
|
|
867
|
-
}
|
|
868
|
-
|
|
869
|
-
const total = snapshotStore.length;
|
|
870
|
-
if (this.config.vectorStoreFormat === 'binary') {
|
|
871
|
-
this.binaryStore = await BinaryVectorStore.write(
|
|
872
|
-
this.config.cacheDirectory,
|
|
873
|
-
snapshotStore,
|
|
874
|
-
{
|
|
875
|
-
contentCacheEntries: this.config.contentCacheEntries,
|
|
876
|
-
vectorCacheEntries: this.config.vectorCacheEntries,
|
|
877
|
-
vectorLoadMode: useDiskVectors ? 'disk' : this.config.vectorStoreLoadMode,
|
|
878
|
-
getContent: (chunk, index) => this.getChunkContent(chunk, index),
|
|
879
|
-
getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
|
|
880
|
-
preRename: async () => {
|
|
881
|
-
if (this.activeReads > 0) {
|
|
882
|
-
await this.waitForReadersWithTimeout(
|
|
883
|
-
Number.isInteger(this.config.saveReaderWaitTimeoutMs)
|
|
884
|
-
? this.config.saveReaderWaitTimeoutMs
|
|
885
|
-
: 5000
|
|
886
|
-
);
|
|
887
|
-
}
|
|
888
|
-
if (this.binaryStore) {
|
|
889
|
-
await this.binaryStore.close();
|
|
890
|
-
this.binaryStore = null;
|
|
891
|
-
}
|
|
892
|
-
},
|
|
893
|
-
}
|
|
894
|
-
);
|
|
895
|
-
if (this.binaryStore) {
|
|
896
|
-
this.cacheMeta.chunksStored = this.binaryStore.length;
|
|
897
|
-
}
|
|
898
|
-
} else if (this.config.vectorStoreFormat === 'sqlite') {
|
|
899
|
-
// SQLite store save
|
|
900
|
-
if (this.sqliteStore) {
|
|
901
|
-
try {
|
|
902
|
-
this.sqliteStore.close();
|
|
903
|
-
} catch {
|
|
904
|
-
// ignore close errors
|
|
905
|
-
}
|
|
906
|
-
this.sqliteStore = null;
|
|
907
|
-
}
|
|
908
|
-
this.sqliteStore = await SqliteVectorStore.write(
|
|
909
|
-
this.config.cacheDirectory,
|
|
910
|
-
snapshotStore,
|
|
911
|
-
{
|
|
912
|
-
getContent: (chunk, index) => this.getChunkContent(chunk, index),
|
|
913
|
-
getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
|
|
914
|
-
preRename: async () => {
|
|
915
|
-
if (this.activeReads > 0) {
|
|
916
|
-
await this.waitForReadersWithTimeout(
|
|
917
|
-
Number.isInteger(this.config.saveReaderWaitTimeoutMs)
|
|
918
|
-
? this.config.saveReaderWaitTimeoutMs
|
|
919
|
-
: 5000
|
|
920
|
-
);
|
|
921
|
-
}
|
|
922
|
-
},
|
|
923
|
-
}
|
|
924
|
-
);
|
|
925
|
-
if (this.sqliteStore) {
|
|
926
|
-
this.cacheMeta.chunksStored = this.sqliteStore.length();
|
|
927
|
-
}
|
|
928
|
-
} else {
|
|
929
|
-
const vectorWriter = new StreamingJsonWriter(cacheFile, {
|
|
930
|
-
highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
|
|
931
|
-
floatDigits: this.config.cacheVectorFloatDigits ?? 6,
|
|
932
|
-
flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
|
|
933
|
-
indent: '', // set to " " if you prefer pretty formatting
|
|
934
|
-
assumeFinite: this.config.cacheVectorAssumeFinite,
|
|
935
|
-
checkFinite: this.config.cacheVectorCheckFinite,
|
|
936
|
-
noMutation: this.config.cacheVectorNoMutation ?? false,
|
|
937
|
-
joinThreshold: this.config.cacheVectorJoinThreshold ?? 8192,
|
|
938
|
-
joinChunkSize: this.config.cacheVectorJoinChunkSize ?? 2048,
|
|
939
|
-
});
|
|
940
|
-
|
|
941
|
-
await vectorWriter.writeStart();
|
|
942
|
-
|
|
943
|
-
// Optional responsiveness yield (only for huge saves)
|
|
944
|
-
const yieldEvery = total >= 50_000 ? 5000 : 0;
|
|
945
|
-
|
|
946
|
-
try {
|
|
947
|
-
for (let i = 0; i < total; i++) {
|
|
948
|
-
const pending = vectorWriter.writeItem(snapshotStore[i]);
|
|
949
|
-
if (pending) await pending;
|
|
950
|
-
if (yieldEvery && i > 0 && i % yieldEvery === 0) await yieldToLoop();
|
|
951
|
-
}
|
|
952
|
-
await vectorWriter.writeEnd();
|
|
953
|
-
} catch (e) {
|
|
954
|
-
vectorWriter.abort(e);
|
|
955
|
-
throw e;
|
|
956
|
-
}
|
|
957
|
-
}
|
|
958
|
-
|
|
959
|
-
const hashEntries = {};
|
|
960
|
-
for (const [file, entry] of this.fileHashes) {
|
|
961
|
-
const serialized = serializeFileHashEntry(entry);
|
|
962
|
-
if (serialized) {
|
|
963
|
-
hashEntries[file] = serialized;
|
|
964
|
-
}
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
await Promise.all([
|
|
968
|
-
fs.writeFile(hashFile, JSON.stringify(hashEntries, null, 2)),
|
|
969
|
-
fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
|
|
970
|
-
]);
|
|
971
|
-
|
|
972
|
-
// Save call-graph data (or remove stale cache)
|
|
973
|
-
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
974
|
-
if (this.fileCallData.size > 0) {
|
|
975
|
-
await fs.writeFile(
|
|
976
|
-
callGraphFile,
|
|
977
|
-
JSON.stringify(Object.fromEntries(this.fileCallData), null, 2)
|
|
978
|
-
);
|
|
979
|
-
} else {
|
|
980
|
-
await fs.rm(callGraphFile, { force: true });
|
|
981
|
-
}
|
|
982
|
-
|
|
983
|
-
// Persist ANN index if it exists and changed in memory
|
|
984
|
-
// Use mutex to prevent concurrent writes (index could be modified during save)
|
|
985
|
-
if (
|
|
986
|
-
this.config.annIndexCache !== false &&
|
|
987
|
-
this.annPersistDirty &&
|
|
988
|
-
!this.annDirty &&
|
|
989
|
-
!this._annWriting &&
|
|
990
|
-
this.annIndex &&
|
|
991
|
-
this.annMeta
|
|
992
|
-
) {
|
|
993
|
-
this._annWriting = true;
|
|
994
|
-
try {
|
|
995
|
-
const { indexFile, metaFile: annMetaFile } = this.getAnnIndexPaths();
|
|
996
|
-
this.annIndex.writeIndexSync(indexFile);
|
|
997
|
-
await fs.writeFile(annMetaFile, JSON.stringify(this.annMeta, null, 2));
|
|
998
|
-
this.annPersistDirty = false;
|
|
999
|
-
if (this.config.verbose) {
|
|
1000
|
-
console.info(`[ANN] Persisted updated ANN index (${this.annMeta.count} vectors)`);
|
|
1001
|
-
}
|
|
1002
|
-
} catch (error) {
|
|
1003
|
-
console.warn(`[ANN] Failed to persist ANN index: ${error.message}`);
|
|
1004
|
-
} finally {
|
|
1005
|
-
this._annWriting = false;
|
|
1006
|
-
}
|
|
1007
|
-
}
|
|
1008
|
-
} catch (error) {
|
|
1009
|
-
console.warn('[Cache] Failed to save cache:', error.message);
|
|
1010
|
-
// Attempt to recover binary store if it was closed during failed save
|
|
1011
|
-
if (
|
|
1012
|
-
this.config.vectorStoreFormat === 'binary' &&
|
|
1013
|
-
this.binaryStore &&
|
|
1014
|
-
!this.binaryStore.vectorsBuffer
|
|
1015
|
-
) {
|
|
1016
|
-
try {
|
|
1017
|
-
console.info('[Cache] Attempting to recover binary store after failed save...');
|
|
1018
|
-
this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
|
|
1019
|
-
contentCacheEntries: this.config.contentCacheEntries,
|
|
1020
|
-
});
|
|
1021
|
-
console.info('[Cache] Binary store recovered.');
|
|
1022
|
-
} catch (recoverErr) {
|
|
1023
|
-
console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
|
|
1024
|
-
this.binaryStore = null; // Ensure it's null if unusable
|
|
1025
|
-
}
|
|
1026
|
-
}
|
|
1027
|
-
// Attempt to recover SQLite store if closed during failed save
|
|
1028
|
-
if (
|
|
1029
|
-
this.config.vectorStoreFormat === 'sqlite' &&
|
|
1030
|
-
!this.sqliteStore
|
|
1031
|
-
) {
|
|
1032
|
-
try {
|
|
1033
|
-
console.info('[Cache] Attempting to recover SQLite store after failed save...');
|
|
1034
|
-
this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
|
|
1035
|
-
if (this.sqliteStore) {
|
|
1036
|
-
console.info('[Cache] SQLite store recovered.');
|
|
1037
|
-
}
|
|
1038
|
-
} catch (recoverErr) {
|
|
1039
|
-
console.warn(`[Cache] Failed to recover SQLite store: ${recoverErr.message}`);
|
|
1040
|
-
this.sqliteStore = null;
|
|
1041
|
-
}
|
|
1042
|
-
}
|
|
1043
|
-
} finally {
|
|
1044
|
-
this.isSaving = false;
|
|
1045
|
-
this._saveInProgress = false; // Allow reads to resume
|
|
1046
|
-
}
|
|
1047
|
-
}
|
|
1048
|
-
|
|
1049
|
-
// -------------------- Vector Store API --------------------
|
|
1050
|
-
|
|
1051
|
-
getVectorStore() {
|
|
1052
|
-
return Array.isArray(this.vectorStore) ? this.vectorStore : [];
|
|
1053
|
-
}
|
|
1054
|
-
|
|
1055
|
-
async setVectorStore(store) {
|
|
1056
|
-
const previousBinaryStore = this.binaryStore;
|
|
1057
|
-
const previousSqliteStore = this.sqliteStore;
|
|
1058
|
-
this.vectorStore = store;
|
|
1059
|
-
this.binaryStore = null;
|
|
1060
|
-
this.sqliteStore = null;
|
|
1061
|
-
if (Array.isArray(this.vectorStore)) {
|
|
1062
|
-
for (const chunk of this.vectorStore) normalizeChunkVector(chunk);
|
|
1063
|
-
assignChunkIndices(this.vectorStore);
|
|
1064
|
-
}
|
|
1065
|
-
this.invalidateAnnIndex();
|
|
1066
|
-
if (previousBinaryStore) {
|
|
1067
|
-
try {
|
|
1068
|
-
await previousBinaryStore.close();
|
|
1069
|
-
} catch {
|
|
1070
|
-
// ignore close errors
|
|
1071
|
-
}
|
|
1072
|
-
}
|
|
1073
|
-
if (previousSqliteStore) {
|
|
1074
|
-
try {
|
|
1075
|
-
previousSqliteStore.close();
|
|
1076
|
-
} catch {
|
|
1077
|
-
// ignore close errors
|
|
1078
|
-
}
|
|
1079
|
-
}
|
|
1080
|
-
}
|
|
1081
|
-
|
|
1082
|
-
setLastIndexDuration(durationMs) {
|
|
1083
|
-
if (Number.isFinite(durationMs) && durationMs >= 0) {
|
|
1084
|
-
this.lastIndexDurationMs = durationMs;
|
|
1085
|
-
}
|
|
1086
|
-
}
|
|
1087
|
-
|
|
1088
|
-
setLastIndexStats(stats) {
|
|
1089
|
-
if (stats && typeof stats === 'object') {
|
|
1090
|
-
this.lastIndexStats = { ...stats };
|
|
1091
|
-
}
|
|
1092
|
-
}
|
|
1093
|
-
|
|
1094
|
-
getFileHash(file) {
|
|
1095
|
-
const entry = this.fileHashes.get(file);
|
|
1096
|
-
if (typeof entry === 'string') return entry;
|
|
1097
|
-
return entry?.hash;
|
|
1098
|
-
}
|
|
1099
|
-
|
|
1100
|
-
getFileHashKeys() {
|
|
1101
|
-
return Array.from(this.fileHashes.keys());
|
|
1102
|
-
}
|
|
1103
|
-
|
|
1104
|
-
getFileHashCount() {
|
|
1105
|
-
return this.fileHashes.size;
|
|
1106
|
-
}
|
|
1107
|
-
|
|
1108
|
-
clearFileHashes() {
|
|
1109
|
-
this.fileHashes.clear();
|
|
1110
|
-
}
|
|
1111
|
-
|
|
1112
|
-
setFileHashes(entries) {
|
|
1113
|
-
this.fileHashes.clear();
|
|
1114
|
-
if (!entries || typeof entries !== 'object') return;
|
|
1115
|
-
const iterator =
|
|
1116
|
-
entries instanceof Map
|
|
1117
|
-
? entries.entries()
|
|
1118
|
-
: Object.entries(entries);
|
|
1119
|
-
if (!iterator) return;
|
|
1120
|
-
for (const [file, entry] of iterator) {
|
|
1121
|
-
const normalized = normalizeFileHashEntry(entry);
|
|
1122
|
-
if (normalized) {
|
|
1123
|
-
this.fileHashes.set(file, normalized);
|
|
1124
|
-
}
|
|
1125
|
-
}
|
|
1126
|
-
}
|
|
1127
|
-
|
|
1128
|
-
setFileHash(file, hash, meta = null) {
|
|
1129
|
-
const entry = { hash };
|
|
1130
|
-
if (meta && typeof meta === 'object') {
|
|
1131
|
-
if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
|
|
1132
|
-
if (Number.isFinite(meta.size)) entry.size = meta.size;
|
|
1133
|
-
}
|
|
1134
|
-
this.fileHashes.set(file, entry);
|
|
1135
|
-
}
|
|
1136
|
-
|
|
1137
|
-
getFileMeta(file) {
|
|
1138
|
-
const entry = this.fileHashes.get(file);
|
|
1139
|
-
if (!entry) return null;
|
|
1140
|
-
if (typeof entry === 'string') return { hash: entry };
|
|
1141
|
-
return entry;
|
|
1142
|
-
}
|
|
1143
|
-
|
|
1144
|
-
getChunkVector(chunk, index = null) {
|
|
1145
|
-
if (typeof chunk === 'number') {
|
|
1146
|
-
const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
|
|
1147
|
-
const entry = store ? store[chunk] : null;
|
|
1148
|
-
if (entry?.vector) return entry.vector;
|
|
1149
|
-
if (this.binaryStore) {
|
|
1150
|
-
const resolved = Number.isInteger(entry?._binaryIndex) ? entry._binaryIndex : chunk;
|
|
1151
|
-
return this.binaryStore.getVector(resolved);
|
|
1152
|
-
}
|
|
1153
|
-
if (this.sqliteStore) {
|
|
1154
|
-
const resolved = Number.isInteger(entry?._sqliteIndex) ? entry._sqliteIndex : chunk;
|
|
1155
|
-
return this.sqliteStore.getVector(resolved);
|
|
1156
|
-
}
|
|
1157
|
-
return null;
|
|
1158
|
-
}
|
|
1159
|
-
|
|
1160
|
-
if (chunk?.vector) return chunk.vector;
|
|
1161
|
-
const resolved = Number.isInteger(index) ? index : chunk?._index;
|
|
1162
|
-
if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
|
|
1163
|
-
return this.binaryStore.getVector(chunk._binaryIndex);
|
|
1164
|
-
}
|
|
1165
|
-
if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
|
|
1166
|
-
return this.binaryStore.getVector(resolved);
|
|
1167
|
-
}
|
|
1168
|
-
if (this.sqliteStore) {
|
|
1169
|
-
const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
|
|
1170
|
-
? chunk._sqliteIndex
|
|
1171
|
-
: Number.isInteger(chunk?.index)
|
|
1172
|
-
? chunk.index
|
|
1173
|
-
: resolved;
|
|
1174
|
-
if (Number.isInteger(sqliteIndex)) {
|
|
1175
|
-
return this.sqliteStore.getVector(sqliteIndex);
|
|
1176
|
-
}
|
|
1177
|
-
}
|
|
1178
|
-
return null;
|
|
1179
|
-
}
|
|
1180
|
-
|
|
1181
|
-
async getChunkContent(chunk, index = null) {
|
|
1182
|
-
if (typeof chunk === 'number') {
|
|
1183
|
-
const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
|
|
1184
|
-
const entry = store ? store[chunk] : null;
|
|
1185
|
-
if (entry) return await this.getChunkContent(entry, chunk);
|
|
1186
|
-
if (!store && this.binaryStore) {
|
|
1187
|
-
const content = await this.binaryStore.getContent(chunk);
|
|
1188
|
-
return content ?? ''; // Ensure consistent empty string return
|
|
1189
|
-
}
|
|
1190
|
-
if (!store && this.sqliteStore) {
|
|
1191
|
-
return this.sqliteStore.getContent(chunk) ?? '';
|
|
1192
|
-
}
|
|
1193
|
-
return '';
|
|
1194
|
-
}
|
|
1195
|
-
if (chunk?.content !== undefined && chunk?.content !== null) {
|
|
1196
|
-
return chunk.content;
|
|
1197
|
-
}
|
|
1198
|
-
if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
|
|
1199
|
-
const content = await this.binaryStore.getContent(chunk._binaryIndex);
|
|
1200
|
-
return content ?? ''; // Ensure consistent empty string return
|
|
1201
|
-
}
|
|
1202
|
-
const resolved = Number.isInteger(index) ? index : chunk?._index;
|
|
1203
|
-
if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
|
|
1204
|
-
const content = await this.binaryStore.getContent(resolved);
|
|
1205
|
-
return content ?? ''; // Ensure consistent empty string return
|
|
1206
|
-
}
|
|
1207
|
-
if (this.sqliteStore) {
|
|
1208
|
-
const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
|
|
1209
|
-
? chunk._sqliteIndex
|
|
1210
|
-
: Number.isInteger(chunk?.index)
|
|
1211
|
-
? chunk.index
|
|
1212
|
-
: resolved;
|
|
1213
|
-
if (Number.isInteger(sqliteIndex)) {
|
|
1214
|
-
return this.sqliteStore.getContent(sqliteIndex) ?? '';
|
|
1215
|
-
}
|
|
1216
|
-
}
|
|
1217
|
-
return '';
|
|
1218
|
-
}
|
|
1219
|
-
|
|
1220
|
-
deleteFileHash(file) {
|
|
1221
|
-
this.fileHashes.delete(file);
|
|
1222
|
-
}
|
|
1223
|
-
|
|
1224
|
-
/**
|
|
1225
|
-
* Remove all chunks for a given file from the vector store.
|
|
1226
|
-
* Note: This is async to support future backend-specific cleanup.
|
|
1227
|
-
* For binary/SQLite stores, actual removal happens on next full save.
|
|
1228
|
-
* @param {string} file - Absolute path of file to remove
|
|
1229
|
-
*/
|
|
1230
|
-
async removeFileFromStore(file) {
|
|
1231
|
-
if (!Array.isArray(this.vectorStore)) return;
|
|
1232
|
-
// In-place compaction to avoid allocating a new large array
|
|
1233
|
-
let w = 0;
|
|
1234
|
-
for (let r = 0; r < this.vectorStore.length; r++) {
|
|
1235
|
-
const chunk = this.vectorStore[r];
|
|
1236
|
-
if (chunk.file !== file) {
|
|
1237
|
-
chunk._index = w;
|
|
1238
|
-
this.vectorStore[w++] = chunk;
|
|
1239
|
-
}
|
|
1240
|
-
}
|
|
1241
|
-
this.vectorStore.length = w;
|
|
1242
|
-
|
|
1243
|
-
// Removing shifts labels => rebuild ANN
|
|
1244
|
-
this.invalidateAnnIndex();
|
|
1245
|
-
this.removeFileCallData(file);
|
|
1246
|
-
// Also remove file hash to prevent orphaned entries
|
|
1247
|
-
this.fileHashes.delete(file);
|
|
1248
|
-
}
|
|
1249
|
-
|
|
1250
|
-
addToStore(chunk) {
|
|
1251
|
-
normalizeChunkVector(chunk);
|
|
1252
|
-
|
|
1253
|
-
if (!Array.isArray(this.vectorStore)) {
|
|
1254
|
-
this.vectorStore = [];
|
|
1255
|
-
}
|
|
1256
|
-
|
|
1257
|
-
const label = this.vectorStore.length;
|
|
1258
|
-
chunk._index = label;
|
|
1259
|
-
this.vectorStore.push(chunk);
|
|
1260
|
-
if (Array.isArray(this.annVectorCache) && this.annVectorCache.length === label) {
|
|
1261
|
-
this.annVectorCache.push(chunk.vector);
|
|
1262
|
-
}
|
|
1263
|
-
|
|
1264
|
-
// Best-effort incremental ANN append (fast path)
|
|
1265
|
-
if (
|
|
1266
|
-
this.annIndex &&
|
|
1267
|
-
!this.annDirty &&
|
|
1268
|
-
this.annMeta &&
|
|
1269
|
-
typeof this.annIndex.addPoint === 'function' &&
|
|
1270
|
-
this.annMeta.count === label &&
|
|
1271
|
-
this.annMeta.maxElements > this.annMeta.count
|
|
1272
|
-
) {
|
|
1273
|
-
try {
|
|
1274
|
-
this.annIndex.addPoint(chunk.vector, label);
|
|
1275
|
-
this.annMeta.count += 1;
|
|
1276
|
-
this.annPersistDirty = true;
|
|
1277
|
-
return;
|
|
1278
|
-
} catch {
|
|
1279
|
-
// fall through
|
|
1280
|
-
}
|
|
1281
|
-
}
|
|
1282
|
-
|
|
1283
|
-
this.invalidateAnnIndex();
|
|
1284
|
-
}
|
|
1285
|
-
|
|
1286
|
-
invalidateAnnIndex() {
|
|
1287
|
-
this.annIndex = null;
|
|
1288
|
-
this.annMeta = null;
|
|
1289
|
-
this.annDirty = true;
|
|
1290
|
-
this.annPersistDirty = false;
|
|
1291
|
-
this.annVectorCache = null;
|
|
1292
|
-
}
|
|
1293
|
-
|
|
1294
|
-
getAnnVector(index) {
|
|
1295
|
-
if (!Array.isArray(this.vectorStore)) return null;
|
|
1296
|
-
const chunk = this.vectorStore[index];
|
|
1297
|
-
if (!chunk) return null;
|
|
1298
|
-
|
|
1299
|
-
if (
|
|
1300
|
-
!Array.isArray(this.annVectorCache) ||
|
|
1301
|
-
this.annVectorCache.length !== this.vectorStore.length
|
|
1302
|
-
) {
|
|
1303
|
-
this.annVectorCache = new Array(this.vectorStore.length);
|
|
1304
|
-
}
|
|
1305
|
-
|
|
1306
|
-
const cached = this.annVectorCache[index];
|
|
1307
|
-
if (cached) return cached;
|
|
1308
|
-
|
|
1309
|
-
let vec = null;
|
|
1310
|
-
if (chunk.vector) {
|
|
1311
|
-
vec = ensureFloat32(chunk.vector);
|
|
1312
|
-
} else if (this.binaryStore && Number.isInteger(chunk._binaryIndex)) {
|
|
1313
|
-
vec = this.binaryStore.getVector(chunk._binaryIndex);
|
|
1314
|
-
} else if (this.sqliteStore) {
|
|
1315
|
-
const sqliteIndex = Number.isInteger(chunk._sqliteIndex)
|
|
1316
|
-
? chunk._sqliteIndex
|
|
1317
|
-
: Number.isInteger(chunk.index)
|
|
1318
|
-
? chunk.index
|
|
1319
|
-
: index;
|
|
1320
|
-
if (Number.isInteger(sqliteIndex)) {
|
|
1321
|
-
vec = this.sqliteStore.getVector(sqliteIndex);
|
|
1322
|
-
}
|
|
1323
|
-
}
|
|
1324
|
-
|
|
1325
|
-
if (!vec) return null;
|
|
1326
|
-
|
|
1327
|
-
if (this.config.vectorStoreLoadMode !== 'disk') {
|
|
1328
|
-
chunk.vector = vec;
|
|
1329
|
-
}
|
|
1330
|
-
this.annVectorCache[index] = vec;
|
|
1331
|
-
return vec;
|
|
1332
|
-
}
|
|
1333
|
-
|
|
1334
|
-
getAnnIndexPaths() {
|
|
1335
|
-
return {
|
|
1336
|
-
indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
|
|
1337
|
-
metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE),
|
|
1338
|
-
};
|
|
1339
|
-
}
|
|
1340
|
-
|
|
1341
|
-
// -------------------- ANN --------------------
|
|
1342
|
-
|
|
1343
|
-
/**
|
|
1344
|
-
* Ensure ANN (Approximate Nearest Neighbor) index is built and ready.
|
|
1345
|
-
* Loads from disk cache if available and valid, otherwise builds a new index.
|
|
1346
|
-
*
|
|
1347
|
-
* @returns {Promise<HierarchicalNSW|null>} The HNSW index, or null if:
|
|
1348
|
-
* - ANN is disabled in config
|
|
1349
|
-
* - vectorStore is not an array
|
|
1350
|
-
* - vectorStore size is below annMinChunks threshold
|
|
1351
|
-
* - hnswlib-node is not available
|
|
1352
|
-
* - Vector dimension mismatch detected
|
|
1353
|
-
* @note This method is safe to call multiple times; concurrent calls share the same promise.
|
|
1354
|
-
*/
|
|
1355
|
-
async ensureAnnIndex() {
|
|
1356
|
-
if (!this.config.annEnabled) return null;
|
|
1357
|
-
if (!Array.isArray(this.vectorStore)) return null;
|
|
1358
|
-
if (this.vectorStore.length < (this.config.annMinChunks ?? 5000)) return null;
|
|
1359
|
-
if (this.annIndex && !this.annDirty) return this.annIndex;
|
|
1360
|
-
if (this.annLoading) return this.annLoading;
|
|
1361
|
-
|
|
1362
|
-
this.annLoading = (async () => {
|
|
1363
|
-
try {
|
|
1364
|
-
const HierarchicalNSW = await loadHnswlib();
|
|
1365
|
-
if (!HierarchicalNSW) {
|
|
1366
|
-
if (hnswlibLoadError) {
|
|
1367
|
-
this.addInitError('loadHnswlib', hnswlibLoadError);
|
|
1368
|
-
}
|
|
1369
|
-
return null;
|
|
1370
|
-
}
|
|
1371
|
-
|
|
1372
|
-
const dim =
|
|
1373
|
-
this.vectorStore[0]?.vector?.length ||
|
|
1374
|
-
this.binaryStore?.dim ||
|
|
1375
|
-
this.sqliteStore?.dim;
|
|
1376
|
-
if (!dim) return null;
|
|
1377
|
-
|
|
1378
|
-
// Validate dimension consistency before building index
|
|
1379
|
-
// Use stratified sampling for better coverage across entire store
|
|
1380
|
-
let dimensionMismatch = false;
|
|
1381
|
-
const sampleSize = Math.min(ANN_DIMENSION_SAMPLE_SIZE, this.vectorStore.length);
|
|
1382
|
-
const step = Math.max(1, Math.floor(this.vectorStore.length / sampleSize));
|
|
1383
|
-
for (let i = step; i < this.vectorStore.length; i += step) {
|
|
1384
|
-
const v = this.vectorStore[i]?.vector;
|
|
1385
|
-
if (v && v.length !== dim) {
|
|
1386
|
-
dimensionMismatch = true;
|
|
1387
|
-
console.warn(
|
|
1388
|
-
`[ANN] Dimension mismatch at index ${i}: expected ${dim}, got ${v.length}. ` +
|
|
1389
|
-
'This may indicate a config change mid-index. Consider full reindex.'
|
|
1390
|
-
);
|
|
1391
|
-
break;
|
|
1392
|
-
}
|
|
1393
|
-
}
|
|
1394
|
-
|
|
1395
|
-
if (dimensionMismatch) {
|
|
1396
|
-
this.addInitError('ensureAnnIndex', `Vector dimension inconsistency detected. Expected ${dim}. Full reindex required.`);
|
|
1397
|
-
return null; // Skip ANN build - fallback to linear search
|
|
1398
|
-
}
|
|
1399
|
-
|
|
1400
|
-
if (!this.annDirty && this.config.annIndexCache !== false) {
|
|
1401
|
-
const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
|
|
1402
|
-
if (loaded) return this.annIndex;
|
|
1403
|
-
}
|
|
1404
|
-
|
|
1405
|
-
return await this.buildAnnIndex(HierarchicalNSW, dim);
|
|
1406
|
-
} finally {
|
|
1407
|
-
this.annLoading = null;
|
|
1408
|
-
}
|
|
1409
|
-
})();
|
|
1410
|
-
|
|
1411
|
-
return this.annLoading;
|
|
1412
|
-
}
|
|
1413
|
-
|
|
1414
|
-
async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
|
|
1415
|
-
const { indexFile, metaFile } = this.getAnnIndexPaths();
|
|
1416
|
-
const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
|
|
1417
|
-
if (!metaData) return false;
|
|
1418
|
-
|
|
1419
|
-
let meta;
|
|
1420
|
-
try {
|
|
1421
|
-
meta = JSON.parse(metaData);
|
|
1422
|
-
} catch {
|
|
1423
|
-
console.warn('[ANN] Invalid ANN metadata, rebuilding');
|
|
1424
|
-
return false;
|
|
1425
|
-
}
|
|
1426
|
-
|
|
1427
|
-
if (meta?.version !== ANN_META_VERSION) {
|
|
1428
|
-
console.warn(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
|
|
1429
|
-
return false;
|
|
1430
|
-
}
|
|
1431
|
-
|
|
1432
|
-
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
1433
|
-
console.warn('[ANN] Embedding model changed for ANN index, rebuilding');
|
|
1434
|
-
return false;
|
|
1435
|
-
}
|
|
1436
|
-
|
|
1437
|
-
if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
|
|
1438
|
-
console.warn('[ANN] ANN index size mismatch, rebuilding');
|
|
1439
|
-
return false;
|
|
1440
|
-
}
|
|
1441
|
-
|
|
1442
|
-
if (
|
|
1443
|
-
meta?.metric !== this.config.annMetric ||
|
|
1444
|
-
meta?.m !== this.config.annM ||
|
|
1445
|
-
meta?.efConstruction !== this.config.annEfConstruction
|
|
1446
|
-
) {
|
|
1447
|
-
console.warn('[ANN] ANN index config changed, rebuilding');
|
|
1448
|
-
return false;
|
|
1449
|
-
}
|
|
1450
|
-
|
|
1451
|
-
let maxElements = meta?.maxElements;
|
|
1452
|
-
if (!Number.isInteger(maxElements)) {
|
|
1453
|
-
maxElements = meta.count;
|
|
1454
|
-
} else if (maxElements < meta.count) {
|
|
1455
|
-
console.warn('[ANN] ANN capacity invalid, rebuilding');
|
|
1456
|
-
return false;
|
|
1457
|
-
}
|
|
1458
|
-
|
|
1459
|
-
const index = new HierarchicalNSW(meta.metric, dim);
|
|
1460
|
-
const loaded = readHnswIndex(index, indexFile, maxElements);
|
|
1461
|
-
if (!loaded) {
|
|
1462
|
-
console.warn('[ANN] Failed to load ANN index file, rebuilding');
|
|
1463
|
-
return false;
|
|
1464
|
-
}
|
|
1465
|
-
|
|
1466
|
-
if (typeof index.setEf === 'function') {
|
|
1467
|
-
index.setEf(this.config.annEfSearch);
|
|
1468
|
-
}
|
|
1469
|
-
|
|
1470
|
-
this.annIndex = index;
|
|
1471
|
-
this.annMeta = { ...meta, maxElements };
|
|
1472
|
-
this.annDirty = false;
|
|
1473
|
-
this.annPersistDirty = false;
|
|
1474
|
-
|
|
1475
|
-
if (this.config.verbose) {
|
|
1476
|
-
console.info(`[ANN] Loaded ANN index (${meta.count} vectors, cap=${maxElements})`);
|
|
1477
|
-
}
|
|
1478
|
-
return true;
|
|
1479
|
-
}
|
|
1480
|
-
|
|
1481
|
-
async buildAnnIndex(HierarchicalNSW, dim) {
|
|
1482
|
-
if (!Array.isArray(this.vectorStore)) return null;
|
|
1483
|
-
const total = this.vectorStore.length;
|
|
1484
|
-
if (total === 0) return null;
|
|
1485
|
-
|
|
1486
|
-
try {
|
|
1487
|
-
const index = new HierarchicalNSW(this.config.annMetric, dim);
|
|
1488
|
-
|
|
1489
|
-
const maxElements = computeAnnCapacity(total, this.config);
|
|
1490
|
-
initHnswIndex(index, maxElements, this.config.annM, this.config.annEfConstruction);
|
|
1491
|
-
|
|
1492
|
-
const yieldEvery = Number.isInteger(this.config.annBuildYieldEvery)
|
|
1493
|
-
? this.config.annBuildYieldEvery
|
|
1494
|
-
: 1000;
|
|
1495
|
-
|
|
1496
|
-
for (let i = 0; i < total; i++) {
|
|
1497
|
-
const vector = this.getAnnVector(i);
|
|
1498
|
-
if (!vector) throw new Error(`Missing vector for ANN index at position ${i}`);
|
|
1499
|
-
index.addPoint(vector, i);
|
|
1500
|
-
|
|
1501
|
-
if (yieldEvery > 0 && i > 0 && i % yieldEvery === 0) {
|
|
1502
|
-
await yieldToLoop();
|
|
1503
|
-
}
|
|
1504
|
-
}
|
|
1505
|
-
|
|
1506
|
-
if (typeof index.setEf === 'function') {
|
|
1507
|
-
index.setEf(this.config.annEfSearch);
|
|
1508
|
-
}
|
|
1509
|
-
|
|
1510
|
-
this.annIndex = index;
|
|
1511
|
-
this.annMeta = {
|
|
1512
|
-
version: ANN_META_VERSION,
|
|
1513
|
-
embeddingModel: this.config.embeddingModel,
|
|
1514
|
-
metric: this.config.annMetric,
|
|
1515
|
-
dim,
|
|
1516
|
-
count: total,
|
|
1517
|
-
maxElements,
|
|
1518
|
-
m: this.config.annM,
|
|
1519
|
-
efConstruction: this.config.annEfConstruction,
|
|
1520
|
-
efSearch: this.config.annEfSearch,
|
|
1521
|
-
};
|
|
1522
|
-
this.annDirty = false;
|
|
1523
|
-
this.annPersistDirty = true;
|
|
1524
|
-
|
|
1525
|
-
if (this.config.annIndexCache !== false) {
|
|
1526
|
-
try {
|
|
1527
|
-
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
1528
|
-
const { indexFile, metaFile } = this.getAnnIndexPaths();
|
|
1529
|
-
index.writeIndexSync(indexFile);
|
|
1530
|
-
await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
|
|
1531
|
-
this.annPersistDirty = false;
|
|
1532
|
-
if (this.config.verbose) {
|
|
1533
|
-
console.info(`[ANN] Saved ANN index (${total} vectors, cap=${maxElements})`);
|
|
1534
|
-
}
|
|
1535
|
-
} catch (error) {
|
|
1536
|
-
console.warn(`[ANN] Failed to save ANN index: ${error.message}`);
|
|
1537
|
-
}
|
|
1538
|
-
}
|
|
1539
|
-
|
|
1540
|
-
return index;
|
|
1541
|
-
} catch (error) {
|
|
1542
|
-
console.warn(`[ANN] Failed to build ANN index: ${error.message}`);
|
|
1543
|
-
this.addInitError('buildAnnIndex', error);
|
|
1544
|
-
this.annIndex = null;
|
|
1545
|
-
this.annMeta = null;
|
|
1546
|
-
this.annDirty = true;
|
|
1547
|
-
this.annPersistDirty = false;
|
|
1548
|
-
return null;
|
|
1549
|
-
}
|
|
1550
|
-
}
|
|
1551
|
-
|
|
1552
|
-
/**
|
|
1553
|
-
* Query the ANN index for k nearest neighbors.
|
|
1554
|
-
* Falls back gracefully to empty results if ANN is unavailable.
|
|
1555
|
-
*
|
|
1556
|
-
* @param {Float32Array|number[]} queryVector - Normalized query embedding
|
|
1557
|
-
* @param {number} k - Number of neighbors to return
|
|
1558
|
-
* @returns {Promise<number[]>} Array of chunk indices sorted by similarity (may be empty)
|
|
1559
|
-
* @throws Never throws - returns empty array on all error conditions
|
|
1560
|
-
* @note Automatically invalidates corrupted index and falls back to linear search on next query
|
|
1561
|
-
*/
|
|
1562
|
-
async queryAnn(queryVector, k) {
|
|
1563
|
-
if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
|
|
1564
|
-
const index = await this.ensureAnnIndex();
|
|
1565
|
-
if (!index) return [];
|
|
1566
|
-
|
|
1567
|
-
const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
|
|
1568
|
-
|
|
1569
|
-
// Wrap searchKnn in try-catch to handle corrupted index or dimension mismatches
|
|
1570
|
-
let results;
|
|
1571
|
-
try {
|
|
1572
|
-
results = index.searchKnn(qVec, k);
|
|
1573
|
-
} catch (err) {
|
|
1574
|
-
console.warn(`[ANN] searchKnn failed: ${err.message}. Falling back to linear search.`);
|
|
1575
|
-
this.addInitError('queryAnn', err);
|
|
1576
|
-
// Invalidate to trigger rebuild on next query
|
|
1577
|
-
this.invalidateAnnIndex();
|
|
1578
|
-
return [];
|
|
1579
|
-
}
|
|
1580
|
-
|
|
1581
|
-
const labels = normalizeLabels(results);
|
|
1582
|
-
|
|
1583
|
-
if (labels.length === 0) return [];
|
|
1584
|
-
|
|
1585
|
-
const filtered = labels.filter(
|
|
1586
|
-
(label) => Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
|
|
1587
|
-
);
|
|
1588
|
-
|
|
1589
|
-
return filtered;
|
|
1590
|
-
}
|
|
1591
|
-
|
|
1592
|
-
async clear() {
|
|
1593
|
-
if (!this.config.enableCache) return;
|
|
1594
|
-
|
|
1595
|
-
try {
|
|
1596
|
-
await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
|
|
1597
|
-
this.vectorStore = [];
|
|
1598
|
-
if (this.binaryStore) {
|
|
1599
|
-
try {
|
|
1600
|
-
await this.binaryStore.close();
|
|
1601
|
-
} catch {
|
|
1602
|
-
// ignore close errors
|
|
1603
|
-
}
|
|
1604
|
-
}
|
|
1605
|
-
this.binaryStore = null;
|
|
1606
|
-
if (this.sqliteStore) {
|
|
1607
|
-
try {
|
|
1608
|
-
this.sqliteStore.close();
|
|
1609
|
-
} catch {
|
|
1610
|
-
// ignore close errors
|
|
1611
|
-
}
|
|
1612
|
-
}
|
|
1613
|
-
this.sqliteStore = null;
|
|
1614
|
-
this.fileHashes = new Map();
|
|
1615
|
-
this.invalidateAnnIndex();
|
|
1616
|
-
await this.clearCallGraphData();
|
|
1617
|
-
if (this.config.verbose) {
|
|
1618
|
-
console.info(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
|
|
1619
|
-
}
|
|
1620
|
-
} catch (error) {
|
|
1621
|
-
console.error('[Cache] Failed to clear cache:', error.message);
|
|
1622
|
-
throw error;
|
|
1623
|
-
}
|
|
1624
|
-
}
|
|
1625
|
-
|
|
1626
|
-
/**
|
|
1627
|
-
* Adjust efSearch at runtime for speed/accuracy tradeoff.
|
|
1628
|
-
* Higher values = more accurate but slower.
|
|
1629
|
-
* @param {number} efSearch - New efSearch value (typically 16-512)
|
|
1630
|
-
* @returns {object} Result with success status and current config
|
|
1631
|
-
*/
|
|
1632
|
-
setEfSearch(efSearch) {
|
|
1633
|
-
if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
|
|
1634
|
-
return {
|
|
1635
|
-
success: false,
|
|
1636
|
-
error: 'efSearch must be a number between 1 and 1000',
|
|
1637
|
-
};
|
|
1638
|
-
}
|
|
1639
|
-
|
|
1640
|
-
this.config.annEfSearch = efSearch;
|
|
1641
|
-
|
|
1642
|
-
if (this.annIndex && typeof this.annIndex.setEf === 'function') {
|
|
1643
|
-
this.annIndex.setEf(efSearch);
|
|
1644
|
-
if (this.annMeta) this.annMeta.efSearch = efSearch;
|
|
1645
|
-
this.annPersistDirty = true;
|
|
1646
|
-
if (this.config.verbose) {
|
|
1647
|
-
console.info(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
|
|
1648
|
-
}
|
|
1649
|
-
return { success: true, applied: true, efSearch };
|
|
1650
|
-
}
|
|
1651
|
-
|
|
1652
|
-
if (this.config.verbose) {
|
|
1653
|
-
console.info(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
|
|
1654
|
-
}
|
|
1655
|
-
return { success: true, applied: false, efSearch };
|
|
1656
|
-
}
|
|
1657
|
-
|
|
1658
|
-
/**
|
|
1659
|
-
* Get current ANN index statistics for diagnostics.
|
|
1660
|
-
* @returns {object} ANN stats including index state, config, and vector count
|
|
1661
|
-
*/
|
|
1662
|
-
getAnnStats() {
|
|
1663
|
-
return {
|
|
1664
|
-
enabled: this.config.annEnabled ?? false,
|
|
1665
|
-
indexLoaded: this.annIndex !== null,
|
|
1666
|
-
dirty: this.annDirty,
|
|
1667
|
-
vectorCount: Array.isArray(this.vectorStore) ? this.vectorStore.length : 0,
|
|
1668
|
-
minChunksForAnn: this.config.annMinChunks ?? 5000,
|
|
1669
|
-
config: this.annMeta
|
|
1670
|
-
? {
|
|
1671
|
-
metric: this.annMeta.metric,
|
|
1672
|
-
dim: this.annMeta.dim,
|
|
1673
|
-
count: this.annMeta.count,
|
|
1674
|
-
m: this.annMeta.m,
|
|
1675
|
-
efConstruction: this.annMeta.efConstruction,
|
|
1676
|
-
efSearch: this.config.annEfSearch,
|
|
1677
|
-
}
|
|
1678
|
-
: null,
|
|
1679
|
-
};
|
|
1680
|
-
}
|
|
1681
|
-
|
|
1682
|
-
// -------------------- Call Graph --------------------
|
|
1683
|
-
|
|
1684
|
-
async clearCallGraphData({ removeFile = false } = {}) {
|
|
1685
|
-
this.fileCallData.clear();
|
|
1686
|
-
this.callGraph = null;
|
|
1687
|
-
|
|
1688
|
-
if (removeFile && this.config.enableCache) {
|
|
1689
|
-
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
1690
|
-
try {
|
|
1691
|
-
await fs.rm(callGraphFile, { force: true });
|
|
1692
|
-
} catch (error) {
|
|
1693
|
-
if (this.config.verbose) {
|
|
1694
|
-
console.warn(`[Cache] Failed to remove call-graph cache: ${error.message}`);
|
|
1695
|
-
}
|
|
1696
|
-
}
|
|
1697
|
-
}
|
|
1698
|
-
}
|
|
1699
|
-
|
|
1700
|
-
pruneCallGraphData(validFiles) {
|
|
1701
|
-
if (!validFiles || this.fileCallData.size === 0) return 0;
|
|
1702
|
-
|
|
1703
|
-
let pruned = 0;
|
|
1704
|
-
for (const file of Array.from(this.fileCallData.keys())) {
|
|
1705
|
-
if (!validFiles.has(file)) {
|
|
1706
|
-
this.fileCallData.delete(file);
|
|
1707
|
-
pruned++;
|
|
1708
|
-
}
|
|
1709
|
-
}
|
|
1710
|
-
|
|
1711
|
-
if (pruned > 0) this.callGraph = null;
|
|
1712
|
-
return pruned;
|
|
1713
|
-
}
|
|
1714
|
-
|
|
1715
|
-
getFileCallData(file) {
|
|
1716
|
-
return this.fileCallData.get(file);
|
|
1717
|
-
}
|
|
1718
|
-
|
|
1719
|
-
hasFileCallData(file) {
|
|
1720
|
-
return this.fileCallData.has(file);
|
|
1721
|
-
}
|
|
1722
|
-
|
|
1723
|
-
getFileCallDataKeys() {
|
|
1724
|
-
return Array.from(this.fileCallData.keys());
|
|
1725
|
-
}
|
|
1726
|
-
|
|
1727
|
-
getFileCallDataCount() {
|
|
1728
|
-
return this.fileCallData.size;
|
|
1729
|
-
}
|
|
1730
|
-
|
|
1731
|
-
/**
|
|
1732
|
-
* Sets call data for a specific file.
|
|
1733
|
-
* @param {string} file
|
|
1734
|
-
* @param {object} data
|
|
1735
|
-
*/
|
|
1736
|
-
setFileCallData(file, data) {
|
|
1737
|
-
this.fileCallData.set(file, data);
|
|
1738
|
-
this.callGraph = null;
|
|
1739
|
-
}
|
|
1740
|
-
|
|
1741
|
-
/**
|
|
1742
|
-
* Sets the entire file call data map.
|
|
1743
|
-
* @param {Map<string, object>|object} entries
|
|
1744
|
-
*/
|
|
1745
|
-
setFileCallDataEntries(entries) {
|
|
1746
|
-
if (entries instanceof Map) {
|
|
1747
|
-
this.fileCallData = entries;
|
|
1748
|
-
} else {
|
|
1749
|
-
this.fileCallData.clear();
|
|
1750
|
-
if (entries && typeof entries === 'object') {
|
|
1751
|
-
for (const [file, data] of Object.entries(entries)) {
|
|
1752
|
-
this.fileCallData.set(file, data);
|
|
1753
|
-
}
|
|
1754
|
-
}
|
|
1755
|
-
}
|
|
1756
|
-
this.callGraph = null;
|
|
1757
|
-
}
|
|
1758
|
-
|
|
1759
|
-
clearFileCallData() {
|
|
1760
|
-
this.fileCallData.clear();
|
|
1761
|
-
this.callGraph = null;
|
|
1762
|
-
}
|
|
1763
|
-
|
|
1764
|
-
removeFileCallData(file) {
|
|
1765
|
-
this.fileCallData.delete(file);
|
|
1766
|
-
this.callGraph = null;
|
|
1767
|
-
}
|
|
1768
|
-
|
|
1769
|
-
async rebuildCallGraph() {
|
|
1770
|
-
if (this._callGraphBuild) return this._callGraphBuild;
|
|
1771
|
-
|
|
1772
|
-
this._callGraphBuild = (async () => {
|
|
1773
|
-
try {
|
|
1774
|
-
const { buildCallGraph } = await import('./call-graph.js');
|
|
1775
|
-
this.callGraph = buildCallGraph(this.fileCallData);
|
|
1776
|
-
if (this.config.verbose && this.callGraph) {
|
|
1777
|
-
console.info(
|
|
1778
|
-
`[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`
|
|
1779
|
-
);
|
|
1780
|
-
}
|
|
1781
|
-
} catch (err) {
|
|
1782
|
-
console.error(`[CallGraph] Failed to build: ${err.message}`);
|
|
1783
|
-
this.callGraph = null;
|
|
1784
|
-
} finally {
|
|
1785
|
-
this._callGraphBuild = null;
|
|
1786
|
-
}
|
|
1787
|
-
})();
|
|
1788
|
-
|
|
1789
|
-
return this._callGraphBuild;
|
|
1790
|
-
}
|
|
1791
|
-
|
|
1792
|
-
async getRelatedFiles(symbols) {
|
|
1793
|
-
if (!this.config.callGraphEnabled || symbols.length === 0) return new Map();
|
|
1794
|
-
if (!this.callGraph && this.fileCallData.size > 0) await this.rebuildCallGraph();
|
|
1795
|
-
if (!this.callGraph) return new Map();
|
|
1796
|
-
|
|
1797
|
-
const { getRelatedFiles } = await import('./call-graph.js');
|
|
1798
|
-
return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
|
|
1799
|
-
}
|
|
1800
|
-
|
|
1801
|
-
getCallGraphStats() {
|
|
1802
|
-
return {
|
|
1803
|
-
enabled: this.config.callGraphEnabled ?? false,
|
|
1804
|
-
filesWithData: this.fileCallData.size,
|
|
1805
|
-
graphBuilt: this.callGraph !== null,
|
|
1806
|
-
definitions: this.callGraph?.defines.size ?? 0,
|
|
1807
|
-
callTargets: this.callGraph?.calledBy.size ?? 0,
|
|
1808
|
-
};
|
|
1809
|
-
}
|
|
1810
|
-
|
|
1811
|
-
// -------------------- Abstraction Layer --------------------
|
|
1812
|
-
|
|
1813
|
-
/**
|
|
1814
|
-
* Returns the total number of chunks in the store.
|
|
1815
|
-
* @returns {number}
|
|
1816
|
-
*/
|
|
1817
|
-
getStoreSize() {
|
|
1818
|
-
if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
|
|
1819
|
-
if (this.binaryStore) return this.binaryStore.length;
|
|
1820
|
-
if (this.sqliteStore) return this.sqliteStore.length();
|
|
1821
|
-
return 0;
|
|
1822
|
-
}
|
|
1823
|
-
|
|
1824
|
-
/**
|
|
1825
|
-
* Retrieves a vector by its store index.
|
|
1826
|
-
* @param {number} index
|
|
1827
|
-
* @returns {Float32Array|null}
|
|
1828
|
-
*/
|
|
1829
|
-
getVector(index) {
|
|
1830
|
-
return this.getChunkVector(index);
|
|
1831
|
-
}
|
|
1832
|
-
|
|
1833
|
-
/**
|
|
1834
|
-
* Retrieves a chunk object by its store index.
|
|
1835
|
-
* @param {number} index
|
|
1836
|
-
* @returns {object|null}
|
|
1837
|
-
*/
|
|
1838
|
-
getChunk(index) {
|
|
1839
|
-
if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
|
|
1840
|
-
return this.vectorStore[index];
|
|
1841
|
-
}
|
|
1842
|
-
if (this.binaryStore) {
|
|
1843
|
-
const record = this.binaryStore.getRecord(index);
|
|
1844
|
-
if (record) {
|
|
1845
|
-
return {
|
|
1846
|
-
file: record.file,
|
|
1847
|
-
startLine: record.startLine,
|
|
1848
|
-
endLine: record.endLine,
|
|
1849
|
-
vector: this.binaryStore.getVector(index),
|
|
1850
|
-
_index: index,
|
|
1851
|
-
_binaryIndex: index,
|
|
1852
|
-
};
|
|
1853
|
-
}
|
|
1854
|
-
}
|
|
1855
|
-
if (this.sqliteStore) {
|
|
1856
|
-
const record = this.sqliteStore.getRecord(index);
|
|
1857
|
-
if (record) {
|
|
1858
|
-
return {
|
|
1859
|
-
file: record.file,
|
|
1860
|
-
startLine: record.startLine,
|
|
1861
|
-
endLine: record.endLine,
|
|
1862
|
-
vector: this.sqliteStore.getVector(index),
|
|
1863
|
-
_index: index,
|
|
1864
|
-
_sqliteIndex: index,
|
|
1865
|
-
};
|
|
1866
|
-
}
|
|
1867
|
-
}
|
|
1868
|
-
return null;
|
|
1869
|
-
}
|
|
1870
|
-
}
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { Worker } from 'worker_threads';
|
|
4
|
+
import { StreamingJsonWriter } from './json-writer.js';
|
|
5
|
+
import { BinaryVectorStore } from './vector-store-binary.js';
|
|
6
|
+
import { SqliteVectorStore } from './vector-store-sqlite.js';
|
|
7
|
+
import {
|
|
8
|
+
JSON_WORKER_THRESHOLD_BYTES,
|
|
9
|
+
ANN_DIMENSION_SAMPLE_SIZE,
|
|
10
|
+
HNSWLIB_ERROR_RESET_MS,
|
|
11
|
+
DEFAULT_READER_WAIT_TIMEOUT_MS,
|
|
12
|
+
} from './constants.js';
|
|
13
|
+
|
|
14
|
+
const CACHE_META_VERSION = 1;
|
|
15
|
+
const CACHE_META_FILE = 'meta.json';
|
|
16
|
+
|
|
17
|
+
// ANN meta version stays at 1 for compatibility; maxElements is optional.
|
|
18
|
+
const ANN_META_VERSION = 1;
|
|
19
|
+
const ANN_INDEX_FILE = 'ann-index.bin';
|
|
20
|
+
const ANN_META_FILE = 'ann-meta.json';
|
|
21
|
+
|
|
22
|
+
const CALL_GRAPH_FILE = 'call-graph.json';
|
|
23
|
+
|
|
24
|
+
const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
|
|
25
|
+
|
|
26
|
+
// Yield to event loop to keep IDE/extension host responsive during heavy CPU loops
|
|
27
|
+
const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
|
|
28
|
+
|
|
29
|
+
let hnswlibPromise = null;
|
|
30
|
+
let hnswlibLoadError = null;
|
|
31
|
+
|
|
32
|
+
async function parseJsonInWorker(filePath) {
|
|
33
|
+
return new Promise((resolve, reject) => {
|
|
34
|
+
let settled = false;
|
|
35
|
+
const worker = new Worker(new URL('./json-worker.js', import.meta.url), {
|
|
36
|
+
workerData: { filePath },
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// finish() provides safe cleanup: removeAllListeners ensures no memory leak from
|
|
40
|
+
// orphaned handlers, terminate() cleans up the worker process. The 'settled' flag
|
|
41
|
+
// prevents double-resolution if multiple events fire before cleanup completes.
|
|
42
|
+
const finish = (handler, value) => {
|
|
43
|
+
if (settled) return;
|
|
44
|
+
settled = true;
|
|
45
|
+
worker.removeAllListeners();
|
|
46
|
+
const termination = worker.terminate?.();
|
|
47
|
+
if (termination && typeof termination.catch === 'function') termination.catch(() => null);
|
|
48
|
+
handler(value);
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
worker.once('message', (msg) => {
|
|
52
|
+
if (msg?.ok) {
|
|
53
|
+
finish(resolve, msg.data);
|
|
54
|
+
} else {
|
|
55
|
+
const err = new Error(msg?.error || 'JSON worker failed');
|
|
56
|
+
console.warn(`[Cache] ${err.message}`);
|
|
57
|
+
finish(reject, err);
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
worker.once('error', (err) => {
|
|
62
|
+
console.error(`[Cache] JSON worker error: ${err.message}`);
|
|
63
|
+
finish(reject, err);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
worker.once('exit', (code) => {
|
|
67
|
+
if (code !== 0) {
|
|
68
|
+
const err = new Error(`JSON worker exited with code ${code}`);
|
|
69
|
+
console.error(`[Cache] ${err.message}`);
|
|
70
|
+
finish(reject, err);
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
if (!settled) {
|
|
74
|
+
const err = new Error('JSON worker exited without sending a response');
|
|
75
|
+
console.error(`[Cache] ${err.message}`);
|
|
76
|
+
finish(reject, err);
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async function readJsonFile(
|
|
83
|
+
filePath,
|
|
84
|
+
{ workerThresholdBytes = JSON_WORKER_THRESHOLD_BYTES } = {}
|
|
85
|
+
) {
|
|
86
|
+
let stats;
|
|
87
|
+
try {
|
|
88
|
+
stats = await fs.stat(filePath);
|
|
89
|
+
} catch {
|
|
90
|
+
// File doesn't exist - this is expected and not an error condition
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
try {
|
|
95
|
+
const canUseWorker = typeof Worker === 'function';
|
|
96
|
+
const useWorker =
|
|
97
|
+
canUseWorker && stats && typeof stats.size === 'number'
|
|
98
|
+
? stats.size >= workerThresholdBytes
|
|
99
|
+
: false;
|
|
100
|
+
|
|
101
|
+
if (useWorker) return await parseJsonInWorker(filePath);
|
|
102
|
+
|
|
103
|
+
const data = await fs.readFile(filePath, 'utf-8');
|
|
104
|
+
return JSON.parse(data);
|
|
105
|
+
} catch (error) {
|
|
106
|
+
console.warn(`[Cache] Failed to parse ${path.basename(filePath)}: ${error.message}`);
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
async function loadHnswlib() {
|
|
112
|
+
// Reset error state after configured timeout to allow retry
|
|
113
|
+
if (hnswlibLoadError) {
|
|
114
|
+
if (hnswlibLoadError._timestamp && Date.now() - hnswlibLoadError._timestamp > HNSWLIB_ERROR_RESET_MS) {
|
|
115
|
+
hnswlibLoadError = null;
|
|
116
|
+
hnswlibPromise = null;
|
|
117
|
+
} else {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (!hnswlibPromise) {
|
|
123
|
+
hnswlibPromise = import('hnswlib-node')
|
|
124
|
+
.then((mod) => {
|
|
125
|
+
const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
|
|
126
|
+
if (!HierarchicalNSW) throw new Error('HierarchicalNSW export not found');
|
|
127
|
+
return HierarchicalNSW;
|
|
128
|
+
})
|
|
129
|
+
.catch((err) => {
|
|
130
|
+
// Store timestamp to allow later retry
|
|
131
|
+
err._timestamp = Date.now();
|
|
132
|
+
hnswlibLoadError = err;
|
|
133
|
+
console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
|
|
134
|
+
return null;
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return hnswlibPromise;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function initHnswIndex(index, maxElements, m, efConstruction) {
|
|
142
|
+
try {
|
|
143
|
+
index.initIndex(maxElements, m, efConstruction, 100);
|
|
144
|
+
return;
|
|
145
|
+
} catch (err) {
|
|
146
|
+
console.warn(`[ANN] Standard init failed: ${err.message}`);
|
|
147
|
+
}
|
|
148
|
+
try {
|
|
149
|
+
index.initIndex(maxElements, m, efConstruction);
|
|
150
|
+
return;
|
|
151
|
+
} catch (err) {
|
|
152
|
+
console.warn(`[ANN] Legacy init failed: ${err.message}`);
|
|
153
|
+
}
|
|
154
|
+
index.initIndex(maxElements);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function readHnswIndex(index, filePath, maxElements) {
|
|
158
|
+
try {
|
|
159
|
+
index.readIndexSync(filePath, maxElements);
|
|
160
|
+
return true;
|
|
161
|
+
} catch {
|
|
162
|
+
/* ignore */
|
|
163
|
+
}
|
|
164
|
+
try {
|
|
165
|
+
index.readIndexSync(filePath);
|
|
166
|
+
return true;
|
|
167
|
+
} catch (err) {
|
|
168
|
+
console.warn(`[ANN] Read index failed: ${err.message}`);
|
|
169
|
+
}
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function normalizeLabels(result) {
|
|
174
|
+
if (!result) return [];
|
|
175
|
+
if (Array.isArray(result)) return result;
|
|
176
|
+
const labels = result.labels || result.neighbors || result.indices;
|
|
177
|
+
return labels ? Array.from(labels) : [];
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function ensureFloat32(vector) {
|
|
181
|
+
if (!vector) return null;
|
|
182
|
+
if (vector instanceof Float32Array) return vector;
|
|
183
|
+
|
|
184
|
+
// Convert values (do NOT reinterpret bytes)
|
|
185
|
+
let result;
|
|
186
|
+
if (ArrayBuffer.isView(vector)) {
|
|
187
|
+
result = Float32Array.from(vector);
|
|
188
|
+
} else {
|
|
189
|
+
result = new Float32Array(vector);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// In test environment, validate that all values are finite to catch corruption early
|
|
193
|
+
if (IS_TEST_ENV && result.length > 0) {
|
|
194
|
+
for (let i = 0; i < result.length; i++) {
|
|
195
|
+
if (!Number.isFinite(result[i])) {
|
|
196
|
+
throw new Error(
|
|
197
|
+
`Invalid vector value at index ${i}: ${result[i]}. ` +
|
|
198
|
+
'Vector contains NaN or Infinity, which will corrupt search results.'
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return result;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function normalizeChunkVector(chunk) {
|
|
208
|
+
if (chunk?.vector) chunk.vector = ensureFloat32(chunk.vector);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function assignChunkIndices(store) {
|
|
212
|
+
if (!Array.isArray(store)) return;
|
|
213
|
+
for (let i = 0; i < store.length; i += 1) {
|
|
214
|
+
const chunk = store[i];
|
|
215
|
+
if (chunk) {
|
|
216
|
+
chunk._index = i;
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function normalizeFileHashEntry(entry) {
|
|
222
|
+
if (!entry) return null;
|
|
223
|
+
if (typeof entry === 'string') return { hash: entry };
|
|
224
|
+
if (typeof entry !== 'object') return null;
|
|
225
|
+
if (typeof entry.hash !== 'string') return null;
|
|
226
|
+
const normalized = { hash: entry.hash };
|
|
227
|
+
if (Number.isFinite(entry.mtimeMs)) normalized.mtimeMs = entry.mtimeMs;
|
|
228
|
+
if (Number.isFinite(entry.size)) normalized.size = entry.size;
|
|
229
|
+
return normalized;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function serializeFileHashEntry(entry) {
|
|
233
|
+
if (!entry) return null;
|
|
234
|
+
if (typeof entry === 'string') return { hash: entry };
|
|
235
|
+
if (typeof entry !== 'object') return null;
|
|
236
|
+
if (typeof entry.hash !== 'string') return null;
|
|
237
|
+
const serialized = { hash: entry.hash };
|
|
238
|
+
if (Number.isFinite(entry.mtimeMs)) serialized.mtimeMs = entry.mtimeMs;
|
|
239
|
+
if (Number.isFinite(entry.size)) serialized.size = entry.size;
|
|
240
|
+
return serialized;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function computeAnnCapacity(total, config) {
|
|
244
|
+
const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
|
|
245
|
+
const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
|
|
246
|
+
const byFactor = Math.ceil(total * factor);
|
|
247
|
+
const byExtra = total + extra;
|
|
248
|
+
return Math.max(total, byFactor, byExtra);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
export class EmbeddingsCache {
|
|
252
|
+
constructor(config) {
|
|
253
|
+
this.config = config;
|
|
254
|
+
|
|
255
|
+
this.vectorStore = [];
|
|
256
|
+
this.fileHashes = new Map();
|
|
257
|
+
this.isSaving = false;
|
|
258
|
+
this.lastIndexDurationMs = null;
|
|
259
|
+
this.lastIndexStats = null;
|
|
260
|
+
|
|
261
|
+
this.cacheMeta = {
|
|
262
|
+
version: CACHE_META_VERSION,
|
|
263
|
+
embeddingModel: config.embeddingModel,
|
|
264
|
+
embeddingDimension: config.embeddingDimension ?? null,
|
|
265
|
+
};
|
|
266
|
+
|
|
267
|
+
// Save coalescing / debounce (serialized via saveQueue)
|
|
268
|
+
this.saveQueue = Promise.resolve();
|
|
269
|
+
this._saveTimer = null;
|
|
270
|
+
this._saveRequested = false;
|
|
271
|
+
this._savePromise = null;
|
|
272
|
+
|
|
273
|
+
// ANN state
|
|
274
|
+
this.annIndex = null;
|
|
275
|
+
this.annMeta = null;
|
|
276
|
+
this.annDirty = false; // needs rebuild
|
|
277
|
+
this.annPersistDirty = false; // in-memory differs from disk
|
|
278
|
+
this.annLoading = null;
|
|
279
|
+
this.annVectorCache = null;
|
|
280
|
+
|
|
281
|
+
// Call graph
|
|
282
|
+
this.fileCallData = new Map();
|
|
283
|
+
this.callGraph = null;
|
|
284
|
+
this._callGraphBuild = null;
|
|
285
|
+
|
|
286
|
+
// Binary vector store (optional)
|
|
287
|
+
this.binaryStore = null;
|
|
288
|
+
|
|
289
|
+
// SQLite vector store (optional)
|
|
290
|
+
this.sqliteStore = null;
|
|
291
|
+
|
|
292
|
+
// Error tracking
|
|
293
|
+
this.initErrors = [];
|
|
294
|
+
|
|
295
|
+
// Concurrency hooks (read tracking)
|
|
296
|
+
this.activeReads = 0;
|
|
297
|
+
this._readWaiters = [];
|
|
298
|
+
this._saveInProgress = false; // Prevents new reads during save
|
|
299
|
+
|
|
300
|
+
// Lazy reload support after dropping in-memory vectors
|
|
301
|
+
this._clearedAfterIndex = false;
|
|
302
|
+
this._loadPromise = null;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Add an initialization error with consistent structure.
|
|
307
|
+
* @param {string} stage - The stage where the error occurred (e.g., 'loadHnswlib', 'ensureAnnIndex')
|
|
308
|
+
* @param {Error|string} error - The error object or message
|
|
309
|
+
*/
|
|
310
|
+
addInitError(stage, error) {
|
|
311
|
+
this.initErrors.push({
|
|
312
|
+
stage,
|
|
313
|
+
message: error instanceof Error ? error.message : String(error),
|
|
314
|
+
stack: error instanceof Error ? error.stack : null,
|
|
315
|
+
timestamp: Date.now(),
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
clearInMemoryState() {
|
|
320
|
+
this.vectorStore = [];
|
|
321
|
+
this.fileHashes.clear();
|
|
322
|
+
this.invalidateAnnIndex();
|
|
323
|
+
this.fileCallData.clear();
|
|
324
|
+
this.callGraph = null;
|
|
325
|
+
this.initErrors = [];
|
|
326
|
+
if (this.binaryStore) {
|
|
327
|
+
try {
|
|
328
|
+
this.binaryStore.close?.();
|
|
329
|
+
} catch {
|
|
330
|
+
// ignore close errors
|
|
331
|
+
}
|
|
332
|
+
this.binaryStore = null;
|
|
333
|
+
}
|
|
334
|
+
if (this.sqliteStore) {
|
|
335
|
+
try {
|
|
336
|
+
this.sqliteStore.close?.();
|
|
337
|
+
} catch {
|
|
338
|
+
// ignore close errors
|
|
339
|
+
}
|
|
340
|
+
this.sqliteStore = null;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
async close() {
|
|
345
|
+
if (this.binaryStore) {
|
|
346
|
+
await this.binaryStore.close();
|
|
347
|
+
this.binaryStore = null;
|
|
348
|
+
}
|
|
349
|
+
if (this.sqliteStore) {
|
|
350
|
+
try {
|
|
351
|
+
this.sqliteStore.close();
|
|
352
|
+
} catch {
|
|
353
|
+
// SQLite may already be closed or in error state
|
|
354
|
+
}
|
|
355
|
+
this.sqliteStore = null;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
async ensureLoaded({ preferDisk = false } = {}) {
|
|
360
|
+
if (!this.config.enableCache) return;
|
|
361
|
+
if (!this._clearedAfterIndex) return;
|
|
362
|
+
if (this._loadPromise) return this._loadPromise;
|
|
363
|
+
|
|
364
|
+
this._loadPromise = (async () => {
|
|
365
|
+
if (preferDisk && this.config.verbose) {
|
|
366
|
+
console.info('[Cache] ensureLoaded: forcing disk vector mode for incremental low-RAM reload');
|
|
367
|
+
}
|
|
368
|
+
await this.load({
|
|
369
|
+
forceVectorLoadMode: preferDisk ? 'disk' : undefined,
|
|
370
|
+
});
|
|
371
|
+
this._clearedAfterIndex = false;
|
|
372
|
+
})().finally(() => {
|
|
373
|
+
this._loadPromise = null;
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
return this._loadPromise;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
async dropInMemoryVectors() {
|
|
380
|
+
if (!this.config.enableCache) return;
|
|
381
|
+
|
|
382
|
+
if (this.activeReads > 0) {
|
|
383
|
+
await this.waitForReaders();
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
this.vectorStore = [];
|
|
387
|
+
this.annVectorCache = null;
|
|
388
|
+
this.annIndex = null;
|
|
389
|
+
this.annMeta = null;
|
|
390
|
+
this.annDirty = true;
|
|
391
|
+
this.annPersistDirty = false;
|
|
392
|
+
|
|
393
|
+
if (this.binaryStore) {
|
|
394
|
+
try {
|
|
395
|
+
await this.binaryStore.close();
|
|
396
|
+
} catch {
|
|
397
|
+
// ignore close errors
|
|
398
|
+
}
|
|
399
|
+
this.binaryStore = null;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
if (this.sqliteStore) {
|
|
403
|
+
try {
|
|
404
|
+
this.sqliteStore.close();
|
|
405
|
+
} catch {
|
|
406
|
+
// ignore close errors
|
|
407
|
+
}
|
|
408
|
+
this.sqliteStore = null;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
this._clearedAfterIndex = true;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
// -------------------- Concurrency Hooks --------------------
|
|
415
|
+
|
|
416
|
+
startRead() {
|
|
417
|
+
// Prevent new reads while save is in progress to avoid race conditions
|
|
418
|
+
if (this._saveInProgress) {
|
|
419
|
+
throw new Error('Cache save in progress, try again shortly');
|
|
420
|
+
}
|
|
421
|
+
this.activeReads++;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
endRead() {
|
|
425
|
+
if (this.activeReads > 0) {
|
|
426
|
+
this.activeReads--;
|
|
427
|
+
if (this.activeReads === 0 && this._readWaiters.length > 0) {
|
|
428
|
+
const waiters = this._readWaiters;
|
|
429
|
+
this._readWaiters = [];
|
|
430
|
+
for (const resolve of waiters) {
|
|
431
|
+
resolve();
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
async waitForReaders() {
|
|
438
|
+
if (this.activeReads === 0) return;
|
|
439
|
+
await new Promise((resolve) => {
|
|
440
|
+
this._readWaiters.push(resolve);
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
async waitForReadersWithTimeout(timeoutMs = 5000) {
|
|
445
|
+
if (this.activeReads === 0) return true;
|
|
446
|
+
let timedOut = false;
|
|
447
|
+
let resolved = false;
|
|
448
|
+
let waiterResolve;
|
|
449
|
+
const waiterPromise = new Promise((resolve) => {
|
|
450
|
+
waiterResolve = () => {
|
|
451
|
+
if (!resolved) {
|
|
452
|
+
resolved = true;
|
|
453
|
+
resolve();
|
|
454
|
+
}
|
|
455
|
+
};
|
|
456
|
+
this._readWaiters.push(waiterResolve);
|
|
457
|
+
});
|
|
458
|
+
await Promise.race([
|
|
459
|
+
waiterPromise,
|
|
460
|
+
new Promise((resolve) => {
|
|
461
|
+
setTimeout(() => {
|
|
462
|
+
if (!resolved) {
|
|
463
|
+
resolved = true;
|
|
464
|
+
timedOut = true;
|
|
465
|
+
// Remove waiter from array to prevent late invocation after timeout
|
|
466
|
+
const idx = this._readWaiters.indexOf(waiterResolve);
|
|
467
|
+
if (idx >= 0) this._readWaiters.splice(idx, 1);
|
|
468
|
+
resolve();
|
|
469
|
+
}
|
|
470
|
+
}, timeoutMs);
|
|
471
|
+
}),
|
|
472
|
+
]);
|
|
473
|
+
if (timedOut) {
|
|
474
|
+
// Always warn (not just verbose) since proceeding with active readers is risky
|
|
475
|
+
console.warn(
|
|
476
|
+
`[Cache] Timed out waiting for ${this.activeReads} active reader(s); proceeding with save anyway. ` +
|
|
477
|
+
'This may cause data inconsistency if readers access the store during write.'
|
|
478
|
+
);
|
|
479
|
+
}
|
|
480
|
+
return !timedOut;
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// -------------------- Reset --------------------
|
|
484
|
+
|
|
485
|
+
/**
|
|
486
|
+
* Resets the cache state (clears vectors, hashes, and call graph).
|
|
487
|
+
* Used for forced reindexing.
|
|
488
|
+
*/
|
|
489
|
+
async reset() {
|
|
490
|
+
this.vectorStore = [];
|
|
491
|
+
if (this.binaryStore) {
|
|
492
|
+
try {
|
|
493
|
+
await this.binaryStore.close();
|
|
494
|
+
} catch {
|
|
495
|
+
// ignore close errors
|
|
496
|
+
}
|
|
497
|
+
this.binaryStore = null;
|
|
498
|
+
}
|
|
499
|
+
if (this.sqliteStore) {
|
|
500
|
+
try {
|
|
501
|
+
this.sqliteStore.close();
|
|
502
|
+
} catch {
|
|
503
|
+
// ignore close errors
|
|
504
|
+
}
|
|
505
|
+
this.sqliteStore = null;
|
|
506
|
+
}
|
|
507
|
+
this.fileHashes.clear();
|
|
508
|
+
this.invalidateAnnIndex();
|
|
509
|
+
await this.clearCallGraphData({ removeFile: true });
|
|
510
|
+
this.initErrors = [];
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
// -------------------- Load --------------------
|
|
514
|
+
|
|
515
|
+
async load({ forceVectorLoadMode } = {}) {
|
|
516
|
+
if (!this.config.enableCache) return;
|
|
517
|
+
|
|
518
|
+
try {
|
|
519
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
520
|
+
|
|
521
|
+
const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
|
|
522
|
+
const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
|
|
523
|
+
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
524
|
+
|
|
525
|
+
const workerThresholdBytes =
|
|
526
|
+
Number.isInteger(this.config.jsonWorkerThresholdBytes) &&
|
|
527
|
+
this.config.jsonWorkerThresholdBytes > 0
|
|
528
|
+
? this.config.jsonWorkerThresholdBytes
|
|
529
|
+
: JSON_WORKER_THRESHOLD_BYTES;
|
|
530
|
+
|
|
531
|
+
const useBinary = this.config.vectorStoreFormat === 'binary';
|
|
532
|
+
const useSqlite = this.config.vectorStoreFormat === 'sqlite';
|
|
533
|
+
|
|
534
|
+
const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(
|
|
535
|
+
this.config.cacheDirectory
|
|
536
|
+
);
|
|
537
|
+
const pathExists = async (targetPath) => {
|
|
538
|
+
try {
|
|
539
|
+
await fs.access(targetPath);
|
|
540
|
+
return true;
|
|
541
|
+
} catch {
|
|
542
|
+
return false;
|
|
543
|
+
}
|
|
544
|
+
};
|
|
545
|
+
|
|
546
|
+
// In tests, read cache files eagerly to exercise worker paths.
|
|
547
|
+
let cacheData = null;
|
|
548
|
+
let hashData = null;
|
|
549
|
+
let prefetched = false;
|
|
550
|
+
if (IS_TEST_ENV) {
|
|
551
|
+
prefetched = true;
|
|
552
|
+
const cachePromise = useBinary || useSqlite
|
|
553
|
+
? Promise.resolve(null)
|
|
554
|
+
: readJsonFile(cacheFile, { workerThresholdBytes });
|
|
555
|
+
[cacheData, hashData] = await Promise.all([
|
|
556
|
+
cachePromise,
|
|
557
|
+
readJsonFile(hashFile, { workerThresholdBytes }),
|
|
558
|
+
]);
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// Read meta first to avoid parsing huge cache files when invalid
|
|
562
|
+
const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
|
|
563
|
+
if (!metaData) {
|
|
564
|
+
console.warn('[Cache] Missing cache metadata, ignoring cache');
|
|
565
|
+
this.clearInMemoryState();
|
|
566
|
+
return;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
let meta;
|
|
570
|
+
try {
|
|
571
|
+
meta = JSON.parse(metaData);
|
|
572
|
+
} catch {
|
|
573
|
+
console.warn('[Cache] Invalid cache metadata, ignoring cache');
|
|
574
|
+
this.clearInMemoryState();
|
|
575
|
+
return;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
if (meta?.version !== CACHE_META_VERSION) {
|
|
579
|
+
console.warn(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
|
|
580
|
+
this.clearInMemoryState();
|
|
581
|
+
return;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
585
|
+
console.warn(
|
|
586
|
+
`[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`
|
|
587
|
+
);
|
|
588
|
+
this.clearInMemoryState();
|
|
589
|
+
return;
|
|
590
|
+
}
|
|
591
|
+
const expectedDimension = this.config.embeddingDimension ?? null;
|
|
592
|
+
const metaDimension = meta?.embeddingDimension ?? null;
|
|
593
|
+
if (metaDimension !== expectedDimension) {
|
|
594
|
+
console.warn(
|
|
595
|
+
`[Cache] Embedding dimension changed, ignoring cache (${metaDimension} -> ${expectedDimension})`
|
|
596
|
+
);
|
|
597
|
+
this.clearInMemoryState();
|
|
598
|
+
return;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
if (!prefetched) {
|
|
602
|
+
[cacheData, hashData] = await Promise.all([
|
|
603
|
+
useBinary || useSqlite ? Promise.resolve(null) : readJsonFile(cacheFile, { workerThresholdBytes }),
|
|
604
|
+
readJsonFile(hashFile, { workerThresholdBytes }),
|
|
605
|
+
]);
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
this.cacheMeta = meta;
|
|
609
|
+
|
|
610
|
+
const [binaryFilesPresent, jsonCachePresent] = await Promise.all([
|
|
611
|
+
(async () => {
|
|
612
|
+
const [vectorsOk, recordsOk, contentOk, filesOk] = await Promise.all([
|
|
613
|
+
pathExists(vectorsPath),
|
|
614
|
+
pathExists(recordsPath),
|
|
615
|
+
pathExists(contentPath),
|
|
616
|
+
pathExists(filesPath),
|
|
617
|
+
]);
|
|
618
|
+
return vectorsOk && recordsOk && contentOk && filesOk;
|
|
619
|
+
})(),
|
|
620
|
+
pathExists(cacheFile),
|
|
621
|
+
]);
|
|
622
|
+
|
|
623
|
+
if (useBinary && !binaryFilesPresent) {
|
|
624
|
+
if (jsonCachePresent) {
|
|
625
|
+
console.warn(
|
|
626
|
+
'[Cache] vectorStoreFormat=binary but binary cache files are missing; embeddings.json exists. If you switched formats, reindex or set vectorStoreFormat=json.'
|
|
627
|
+
);
|
|
628
|
+
} else {
|
|
629
|
+
console.warn(
|
|
630
|
+
'[Cache] vectorStoreFormat=binary but binary cache files are missing. Reindex to regenerate the cache.'
|
|
631
|
+
);
|
|
632
|
+
}
|
|
633
|
+
} else if (!useBinary && !useSqlite && !jsonCachePresent) {
|
|
634
|
+
if (binaryFilesPresent) {
|
|
635
|
+
console.warn(
|
|
636
|
+
'[Cache] vectorStoreFormat=json but binary cache files exist. If you switched formats, set vectorStoreFormat=binary or reindex.'
|
|
637
|
+
);
|
|
638
|
+
} else {
|
|
639
|
+
console.warn(
|
|
640
|
+
'[Cache] vectorStoreFormat=json but embeddings.json is missing. Reindex to regenerate the cache.'
|
|
641
|
+
);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
const configuredVectorLoadMode =
|
|
646
|
+
typeof this.config.vectorStoreLoadMode === 'string'
|
|
647
|
+
? this.config.vectorStoreLoadMode.toLowerCase()
|
|
648
|
+
: 'memory';
|
|
649
|
+
const effectiveVectorLoadMode =
|
|
650
|
+
forceVectorLoadMode === 'disk' || forceVectorLoadMode === 'memory'
|
|
651
|
+
? forceVectorLoadMode
|
|
652
|
+
: configuredVectorLoadMode;
|
|
653
|
+
|
|
654
|
+
if (useBinary) {
|
|
655
|
+
try {
|
|
656
|
+
this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
|
|
657
|
+
contentCacheEntries: this.config.contentCacheEntries,
|
|
658
|
+
vectorCacheEntries: this.config.vectorCacheEntries,
|
|
659
|
+
vectorLoadMode: effectiveVectorLoadMode,
|
|
660
|
+
});
|
|
661
|
+
cacheData = await this.binaryStore.toChunkViews({
|
|
662
|
+
includeContent: this.config.vectorStoreContentMode === 'inline',
|
|
663
|
+
includeVector: effectiveVectorLoadMode !== 'disk',
|
|
664
|
+
});
|
|
665
|
+
} catch (err) {
|
|
666
|
+
this.binaryStore = null;
|
|
667
|
+
console.warn(`[Cache] Failed to load binary vector store: ${err.message}`);
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// SQLite store loading
|
|
672
|
+
if (useSqlite) {
|
|
673
|
+
try {
|
|
674
|
+
this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
|
|
675
|
+
if (this.sqliteStore) {
|
|
676
|
+
cacheData = this.sqliteStore.toChunkViews({
|
|
677
|
+
includeContent: this.config.vectorStoreContentMode === 'inline',
|
|
678
|
+
includeVector: effectiveVectorLoadMode !== 'disk',
|
|
679
|
+
});
|
|
680
|
+
} else {
|
|
681
|
+
// SQLite file missing, need reindex
|
|
682
|
+
console.warn('[Cache] vectorStoreFormat=sqlite but vectors.sqlite is missing. Reindex to regenerate the cache.');
|
|
683
|
+
}
|
|
684
|
+
} catch (err) {
|
|
685
|
+
this.sqliteStore = null;
|
|
686
|
+
console.warn(`[Cache] Failed to load SQLite vector store: ${err.message}`);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
if (!cacheData) {
|
|
691
|
+
cacheData = await readJsonFile(cacheFile, { workerThresholdBytes });
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
const hasCacheData = Array.isArray(cacheData);
|
|
695
|
+
const hasHashData = hashData && typeof hashData === 'object';
|
|
696
|
+
|
|
697
|
+
if (hasCacheData) {
|
|
698
|
+
const allowedExtensions = new Set(
|
|
699
|
+
(this.config.fileExtensions || []).map((ext) => `.${ext}`)
|
|
700
|
+
);
|
|
701
|
+
const allowedFileNames = new Set(this.config.fileNames || []);
|
|
702
|
+
const applyExtensionFilter = !this.binaryStore;
|
|
703
|
+
const shouldKeepFile = (filePath) => {
|
|
704
|
+
const ext = path.extname(filePath);
|
|
705
|
+
if (allowedExtensions.has(ext)) return true;
|
|
706
|
+
return allowedFileNames.has(path.basename(filePath));
|
|
707
|
+
};
|
|
708
|
+
|
|
709
|
+
const rawHashes = hasHashData ? new Map(Object.entries(hashData)) : new Map();
|
|
710
|
+
this.vectorStore = [];
|
|
711
|
+
this.fileHashes.clear();
|
|
712
|
+
|
|
713
|
+
// Single-pass filter + normalization
|
|
714
|
+
for (const chunk of cacheData) {
|
|
715
|
+
if (applyExtensionFilter) {
|
|
716
|
+
if (!shouldKeepFile(chunk.file)) continue;
|
|
717
|
+
}
|
|
718
|
+
normalizeChunkVector(chunk);
|
|
719
|
+
this.vectorStore.push(chunk);
|
|
720
|
+
}
|
|
721
|
+
const filteredCount = cacheData.length - this.vectorStore.length;
|
|
722
|
+
if (filteredCount > 0 && this.config.verbose) {
|
|
723
|
+
console.info(`[Cache] Filtered ${filteredCount} outdated cache entries`);
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
if (hasHashData) {
|
|
727
|
+
// Only keep hashes for allowed extensions
|
|
728
|
+
for (const [file, entry] of rawHashes) {
|
|
729
|
+
if (!applyExtensionFilter || shouldKeepFile(file)) {
|
|
730
|
+
const normalized = normalizeFileHashEntry(entry);
|
|
731
|
+
if (normalized) {
|
|
732
|
+
this.fileHashes.set(file, normalized);
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
} else {
|
|
737
|
+
console.warn(
|
|
738
|
+
'[Cache] Missing file-hashes.json; loaded embeddings but hashes were cleared'
|
|
739
|
+
);
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
assignChunkIndices(this.vectorStore);
|
|
743
|
+
|
|
744
|
+
if (this.config.verbose) {
|
|
745
|
+
console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
// ANN index is lazily loaded/built on first query
|
|
749
|
+
this.annDirty = false;
|
|
750
|
+
this.annPersistDirty = false;
|
|
751
|
+
this.annIndex = null;
|
|
752
|
+
this.annMeta = null;
|
|
753
|
+
this.annVectorCache = null;
|
|
754
|
+
} else if (cacheData) {
|
|
755
|
+
console.warn('[Cache] Cache data is not an array; ignoring cached embeddings');
|
|
756
|
+
} else if (hasHashData) {
|
|
757
|
+
console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Load call-graph data if it exists
|
|
761
|
+
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
762
|
+
try {
|
|
763
|
+
const callGraphData = await fs.readFile(callGraphFile, 'utf8');
|
|
764
|
+
const parsed = JSON.parse(callGraphData);
|
|
765
|
+
this.fileCallData = new Map(Object.entries(parsed));
|
|
766
|
+
if (this.config.verbose) {
|
|
767
|
+
console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
|
|
768
|
+
}
|
|
769
|
+
} catch {
|
|
770
|
+
// no cache yet, OK
|
|
771
|
+
}
|
|
772
|
+
} catch (error) {
|
|
773
|
+
console.warn('[Cache] Failed to load cache:', error.message);
|
|
774
|
+
this.clearInMemoryState();
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
// -------------------- Save (debounced + serialized) --------------------
|
|
779
|
+
|
|
780
|
+
save() {
|
|
781
|
+
if (!this.config.enableCache) return Promise.resolve();
|
|
782
|
+
|
|
783
|
+
this._saveRequested = true;
|
|
784
|
+
|
|
785
|
+
if (this._saveTimer) return this._savePromise ?? Promise.resolve();
|
|
786
|
+
|
|
787
|
+
const debounceMs = Number.isInteger(this.config.saveDebounceMs)
|
|
788
|
+
? this.config.saveDebounceMs
|
|
789
|
+
: 250;
|
|
790
|
+
|
|
791
|
+
this._savePromise = new Promise((resolve, reject) => {
|
|
792
|
+
this._saveTimer = setTimeout(() => {
|
|
793
|
+
this._saveTimer = null;
|
|
794
|
+
|
|
795
|
+
this.saveQueue = this.saveQueue
|
|
796
|
+
.then(async () => {
|
|
797
|
+
while (this._saveRequested) {
|
|
798
|
+
this._saveRequested = false;
|
|
799
|
+
await this.performSave();
|
|
800
|
+
}
|
|
801
|
+
})
|
|
802
|
+
.then(resolve, reject)
|
|
803
|
+
.finally(() => {
|
|
804
|
+
this._savePromise = null;
|
|
805
|
+
});
|
|
806
|
+
}, debounceMs);
|
|
807
|
+
});
|
|
808
|
+
|
|
809
|
+
return this._savePromise;
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
async performSave() {
|
|
813
|
+
// Block new reads from starting during save operation
|
|
814
|
+
this._saveInProgress = true;
|
|
815
|
+
|
|
816
|
+
// Wait for active readers before modifying state to prevent data corruption
|
|
817
|
+
if (this.activeReads > 0) {
|
|
818
|
+
const timeoutMs = this.config.saveReaderWaitTimeoutMs ?? DEFAULT_READER_WAIT_TIMEOUT_MS;
|
|
819
|
+
const allReadersFinished = await this.waitForReadersWithTimeout(timeoutMs);
|
|
820
|
+
if (!allReadersFinished && !this.config.forceSaveWithActiveReaders) {
|
|
821
|
+
console.warn('[Cache] Aborting save - active readers still present after timeout');
|
|
822
|
+
this._saveInProgress = false; // Reset flag on early return
|
|
823
|
+
return; // Abort instead of risking data corruption
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
this.isSaving = true;
|
|
828
|
+
|
|
829
|
+
try {
|
|
830
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
831
|
+
|
|
832
|
+
const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
|
|
833
|
+
const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
|
|
834
|
+
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
835
|
+
|
|
836
|
+
// Snapshot to avoid race conditions during async write.
|
|
837
|
+
// Keep this shallow for binary/sqlite to prevent multi-GB vector materialization.
|
|
838
|
+
const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
|
|
839
|
+
const supportsBackendVectorResolve =
|
|
840
|
+
this.config.vectorStoreFormat === 'binary' || this.config.vectorStoreFormat === 'sqlite';
|
|
841
|
+
const hasMissingVectors = snapshotStore.some(
|
|
842
|
+
(chunk) => chunk && (chunk.vector === undefined || chunk.vector === null)
|
|
843
|
+
);
|
|
844
|
+
const useDiskVectors =
|
|
845
|
+
supportsBackendVectorResolve &&
|
|
846
|
+
(this.config.vectorStoreLoadMode === 'disk' || hasMissingVectors);
|
|
847
|
+
if (hasMissingVectors && !useDiskVectors) {
|
|
848
|
+
throw new Error(
|
|
849
|
+
'Missing vector data for cache write and backend vector resolution is unavailable'
|
|
850
|
+
);
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
this.cacheMeta = {
|
|
854
|
+
version: CACHE_META_VERSION,
|
|
855
|
+
embeddingModel: this.config.embeddingModel,
|
|
856
|
+
embeddingDimension: this.config.embeddingDimension ?? null,
|
|
857
|
+
lastSaveTime: new Date().toISOString(),
|
|
858
|
+
filesIndexed: this.fileHashes.size,
|
|
859
|
+
chunksStored: snapshotStore.length,
|
|
860
|
+
workspace: this.config.searchDirectory || null,
|
|
861
|
+
};
|
|
862
|
+
if (Number.isFinite(this.lastIndexDurationMs) && this.lastIndexDurationMs >= 0) {
|
|
863
|
+
this.cacheMeta.indexDurationMs = Math.round(this.lastIndexDurationMs);
|
|
864
|
+
}
|
|
865
|
+
if (this.lastIndexStats && typeof this.lastIndexStats === 'object') {
|
|
866
|
+
Object.assign(this.cacheMeta, this.lastIndexStats);
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
const total = snapshotStore.length;
|
|
870
|
+
if (this.config.vectorStoreFormat === 'binary') {
|
|
871
|
+
this.binaryStore = await BinaryVectorStore.write(
|
|
872
|
+
this.config.cacheDirectory,
|
|
873
|
+
snapshotStore,
|
|
874
|
+
{
|
|
875
|
+
contentCacheEntries: this.config.contentCacheEntries,
|
|
876
|
+
vectorCacheEntries: this.config.vectorCacheEntries,
|
|
877
|
+
vectorLoadMode: useDiskVectors ? 'disk' : this.config.vectorStoreLoadMode,
|
|
878
|
+
getContent: (chunk, index) => this.getChunkContent(chunk, index),
|
|
879
|
+
getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
|
|
880
|
+
preRename: async () => {
|
|
881
|
+
if (this.activeReads > 0) {
|
|
882
|
+
await this.waitForReadersWithTimeout(
|
|
883
|
+
Number.isInteger(this.config.saveReaderWaitTimeoutMs)
|
|
884
|
+
? this.config.saveReaderWaitTimeoutMs
|
|
885
|
+
: 5000
|
|
886
|
+
);
|
|
887
|
+
}
|
|
888
|
+
if (this.binaryStore) {
|
|
889
|
+
await this.binaryStore.close();
|
|
890
|
+
this.binaryStore = null;
|
|
891
|
+
}
|
|
892
|
+
},
|
|
893
|
+
}
|
|
894
|
+
);
|
|
895
|
+
if (this.binaryStore) {
|
|
896
|
+
this.cacheMeta.chunksStored = this.binaryStore.length;
|
|
897
|
+
}
|
|
898
|
+
} else if (this.config.vectorStoreFormat === 'sqlite') {
|
|
899
|
+
// SQLite store save
|
|
900
|
+
if (this.sqliteStore) {
|
|
901
|
+
try {
|
|
902
|
+
this.sqliteStore.close();
|
|
903
|
+
} catch {
|
|
904
|
+
// ignore close errors
|
|
905
|
+
}
|
|
906
|
+
this.sqliteStore = null;
|
|
907
|
+
}
|
|
908
|
+
this.sqliteStore = await SqliteVectorStore.write(
|
|
909
|
+
this.config.cacheDirectory,
|
|
910
|
+
snapshotStore,
|
|
911
|
+
{
|
|
912
|
+
getContent: (chunk, index) => this.getChunkContent(chunk, index),
|
|
913
|
+
getVector: useDiskVectors ? (chunk, index) => this.getChunkVector(chunk, index) : null,
|
|
914
|
+
preRename: async () => {
|
|
915
|
+
if (this.activeReads > 0) {
|
|
916
|
+
await this.waitForReadersWithTimeout(
|
|
917
|
+
Number.isInteger(this.config.saveReaderWaitTimeoutMs)
|
|
918
|
+
? this.config.saveReaderWaitTimeoutMs
|
|
919
|
+
: 5000
|
|
920
|
+
);
|
|
921
|
+
}
|
|
922
|
+
},
|
|
923
|
+
}
|
|
924
|
+
);
|
|
925
|
+
if (this.sqliteStore) {
|
|
926
|
+
this.cacheMeta.chunksStored = this.sqliteStore.length();
|
|
927
|
+
}
|
|
928
|
+
} else {
|
|
929
|
+
const vectorWriter = new StreamingJsonWriter(cacheFile, {
|
|
930
|
+
highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
|
|
931
|
+
floatDigits: this.config.cacheVectorFloatDigits ?? 6,
|
|
932
|
+
flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
|
|
933
|
+
indent: '', // set to " " if you prefer pretty formatting
|
|
934
|
+
assumeFinite: this.config.cacheVectorAssumeFinite,
|
|
935
|
+
checkFinite: this.config.cacheVectorCheckFinite,
|
|
936
|
+
noMutation: this.config.cacheVectorNoMutation ?? false,
|
|
937
|
+
joinThreshold: this.config.cacheVectorJoinThreshold ?? 8192,
|
|
938
|
+
joinChunkSize: this.config.cacheVectorJoinChunkSize ?? 2048,
|
|
939
|
+
});
|
|
940
|
+
|
|
941
|
+
await vectorWriter.writeStart();
|
|
942
|
+
|
|
943
|
+
// Optional responsiveness yield (only for huge saves)
|
|
944
|
+
const yieldEvery = total >= 50_000 ? 5000 : 0;
|
|
945
|
+
|
|
946
|
+
try {
|
|
947
|
+
for (let i = 0; i < total; i++) {
|
|
948
|
+
const pending = vectorWriter.writeItem(snapshotStore[i]);
|
|
949
|
+
if (pending) await pending;
|
|
950
|
+
if (yieldEvery && i > 0 && i % yieldEvery === 0) await yieldToLoop();
|
|
951
|
+
}
|
|
952
|
+
await vectorWriter.writeEnd();
|
|
953
|
+
} catch (e) {
|
|
954
|
+
vectorWriter.abort(e);
|
|
955
|
+
throw e;
|
|
956
|
+
}
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
const hashEntries = {};
|
|
960
|
+
for (const [file, entry] of this.fileHashes) {
|
|
961
|
+
const serialized = serializeFileHashEntry(entry);
|
|
962
|
+
if (serialized) {
|
|
963
|
+
hashEntries[file] = serialized;
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
await Promise.all([
|
|
968
|
+
fs.writeFile(hashFile, JSON.stringify(hashEntries, null, 2)),
|
|
969
|
+
fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
|
|
970
|
+
]);
|
|
971
|
+
|
|
972
|
+
// Save call-graph data (or remove stale cache)
|
|
973
|
+
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
974
|
+
if (this.fileCallData.size > 0) {
|
|
975
|
+
await fs.writeFile(
|
|
976
|
+
callGraphFile,
|
|
977
|
+
JSON.stringify(Object.fromEntries(this.fileCallData), null, 2)
|
|
978
|
+
);
|
|
979
|
+
} else {
|
|
980
|
+
await fs.rm(callGraphFile, { force: true });
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
// Persist ANN index if it exists and changed in memory
|
|
984
|
+
// Use mutex to prevent concurrent writes (index could be modified during save)
|
|
985
|
+
if (
|
|
986
|
+
this.config.annIndexCache !== false &&
|
|
987
|
+
this.annPersistDirty &&
|
|
988
|
+
!this.annDirty &&
|
|
989
|
+
!this._annWriting &&
|
|
990
|
+
this.annIndex &&
|
|
991
|
+
this.annMeta
|
|
992
|
+
) {
|
|
993
|
+
this._annWriting = true;
|
|
994
|
+
try {
|
|
995
|
+
const { indexFile, metaFile: annMetaFile } = this.getAnnIndexPaths();
|
|
996
|
+
this.annIndex.writeIndexSync(indexFile);
|
|
997
|
+
await fs.writeFile(annMetaFile, JSON.stringify(this.annMeta, null, 2));
|
|
998
|
+
this.annPersistDirty = false;
|
|
999
|
+
if (this.config.verbose) {
|
|
1000
|
+
console.info(`[ANN] Persisted updated ANN index (${this.annMeta.count} vectors)`);
|
|
1001
|
+
}
|
|
1002
|
+
} catch (error) {
|
|
1003
|
+
console.warn(`[ANN] Failed to persist ANN index: ${error.message}`);
|
|
1004
|
+
} finally {
|
|
1005
|
+
this._annWriting = false;
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
} catch (error) {
|
|
1009
|
+
console.warn('[Cache] Failed to save cache:', error.message);
|
|
1010
|
+
// Attempt to recover binary store if it was closed during failed save
|
|
1011
|
+
if (
|
|
1012
|
+
this.config.vectorStoreFormat === 'binary' &&
|
|
1013
|
+
this.binaryStore &&
|
|
1014
|
+
!this.binaryStore.vectorsBuffer
|
|
1015
|
+
) {
|
|
1016
|
+
try {
|
|
1017
|
+
console.info('[Cache] Attempting to recover binary store after failed save...');
|
|
1018
|
+
this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
|
|
1019
|
+
contentCacheEntries: this.config.contentCacheEntries,
|
|
1020
|
+
});
|
|
1021
|
+
console.info('[Cache] Binary store recovered.');
|
|
1022
|
+
} catch (recoverErr) {
|
|
1023
|
+
console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
|
|
1024
|
+
this.binaryStore = null; // Ensure it's null if unusable
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
// Attempt to recover SQLite store if closed during failed save
|
|
1028
|
+
if (
|
|
1029
|
+
this.config.vectorStoreFormat === 'sqlite' &&
|
|
1030
|
+
!this.sqliteStore
|
|
1031
|
+
) {
|
|
1032
|
+
try {
|
|
1033
|
+
console.info('[Cache] Attempting to recover SQLite store after failed save...');
|
|
1034
|
+
this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
|
|
1035
|
+
if (this.sqliteStore) {
|
|
1036
|
+
console.info('[Cache] SQLite store recovered.');
|
|
1037
|
+
}
|
|
1038
|
+
} catch (recoverErr) {
|
|
1039
|
+
console.warn(`[Cache] Failed to recover SQLite store: ${recoverErr.message}`);
|
|
1040
|
+
this.sqliteStore = null;
|
|
1041
|
+
}
|
|
1042
|
+
}
|
|
1043
|
+
} finally {
|
|
1044
|
+
this.isSaving = false;
|
|
1045
|
+
this._saveInProgress = false; // Allow reads to resume
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
// -------------------- Vector Store API --------------------
|
|
1050
|
+
|
|
1051
|
+
getVectorStore() {
|
|
1052
|
+
return Array.isArray(this.vectorStore) ? this.vectorStore : [];
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
async setVectorStore(store) {
|
|
1056
|
+
const previousBinaryStore = this.binaryStore;
|
|
1057
|
+
const previousSqliteStore = this.sqliteStore;
|
|
1058
|
+
this.vectorStore = store;
|
|
1059
|
+
this.binaryStore = null;
|
|
1060
|
+
this.sqliteStore = null;
|
|
1061
|
+
if (Array.isArray(this.vectorStore)) {
|
|
1062
|
+
for (const chunk of this.vectorStore) normalizeChunkVector(chunk);
|
|
1063
|
+
assignChunkIndices(this.vectorStore);
|
|
1064
|
+
}
|
|
1065
|
+
this.invalidateAnnIndex();
|
|
1066
|
+
if (previousBinaryStore) {
|
|
1067
|
+
try {
|
|
1068
|
+
await previousBinaryStore.close();
|
|
1069
|
+
} catch {
|
|
1070
|
+
// ignore close errors
|
|
1071
|
+
}
|
|
1072
|
+
}
|
|
1073
|
+
if (previousSqliteStore) {
|
|
1074
|
+
try {
|
|
1075
|
+
previousSqliteStore.close();
|
|
1076
|
+
} catch {
|
|
1077
|
+
// ignore close errors
|
|
1078
|
+
}
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
|
|
1082
|
+
setLastIndexDuration(durationMs) {
|
|
1083
|
+
if (Number.isFinite(durationMs) && durationMs >= 0) {
|
|
1084
|
+
this.lastIndexDurationMs = durationMs;
|
|
1085
|
+
}
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
setLastIndexStats(stats) {
|
|
1089
|
+
if (stats && typeof stats === 'object') {
|
|
1090
|
+
this.lastIndexStats = { ...stats };
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
getFileHash(file) {
|
|
1095
|
+
const entry = this.fileHashes.get(file);
|
|
1096
|
+
if (typeof entry === 'string') return entry;
|
|
1097
|
+
return entry?.hash;
|
|
1098
|
+
}
|
|
1099
|
+
|
|
1100
|
+
getFileHashKeys() {
|
|
1101
|
+
return Array.from(this.fileHashes.keys());
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
getFileHashCount() {
|
|
1105
|
+
return this.fileHashes.size;
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
clearFileHashes() {
|
|
1109
|
+
this.fileHashes.clear();
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
setFileHashes(entries) {
|
|
1113
|
+
this.fileHashes.clear();
|
|
1114
|
+
if (!entries || typeof entries !== 'object') return;
|
|
1115
|
+
const iterator =
|
|
1116
|
+
entries instanceof Map
|
|
1117
|
+
? entries.entries()
|
|
1118
|
+
: Object.entries(entries);
|
|
1119
|
+
if (!iterator) return;
|
|
1120
|
+
for (const [file, entry] of iterator) {
|
|
1121
|
+
const normalized = normalizeFileHashEntry(entry);
|
|
1122
|
+
if (normalized) {
|
|
1123
|
+
this.fileHashes.set(file, normalized);
|
|
1124
|
+
}
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
setFileHash(file, hash, meta = null) {
|
|
1129
|
+
const entry = { hash };
|
|
1130
|
+
if (meta && typeof meta === 'object') {
|
|
1131
|
+
if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
|
|
1132
|
+
if (Number.isFinite(meta.size)) entry.size = meta.size;
|
|
1133
|
+
}
|
|
1134
|
+
this.fileHashes.set(file, entry);
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
getFileMeta(file) {
|
|
1138
|
+
const entry = this.fileHashes.get(file);
|
|
1139
|
+
if (!entry) return null;
|
|
1140
|
+
if (typeof entry === 'string') return { hash: entry };
|
|
1141
|
+
return entry;
|
|
1142
|
+
}
|
|
1143
|
+
|
|
1144
|
+
getChunkVector(chunk, index = null) {
|
|
1145
|
+
if (typeof chunk === 'number') {
|
|
1146
|
+
const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
|
|
1147
|
+
const entry = store ? store[chunk] : null;
|
|
1148
|
+
if (entry?.vector) return entry.vector;
|
|
1149
|
+
if (this.binaryStore) {
|
|
1150
|
+
const resolved = Number.isInteger(entry?._binaryIndex) ? entry._binaryIndex : chunk;
|
|
1151
|
+
return this.binaryStore.getVector(resolved);
|
|
1152
|
+
}
|
|
1153
|
+
if (this.sqliteStore) {
|
|
1154
|
+
const resolved = Number.isInteger(entry?._sqliteIndex) ? entry._sqliteIndex : chunk;
|
|
1155
|
+
return this.sqliteStore.getVector(resolved);
|
|
1156
|
+
}
|
|
1157
|
+
return null;
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
if (chunk?.vector) return chunk.vector;
|
|
1161
|
+
const resolved = Number.isInteger(index) ? index : chunk?._index;
|
|
1162
|
+
if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
|
|
1163
|
+
return this.binaryStore.getVector(chunk._binaryIndex);
|
|
1164
|
+
}
|
|
1165
|
+
if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
|
|
1166
|
+
return this.binaryStore.getVector(resolved);
|
|
1167
|
+
}
|
|
1168
|
+
if (this.sqliteStore) {
|
|
1169
|
+
const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
|
|
1170
|
+
? chunk._sqliteIndex
|
|
1171
|
+
: Number.isInteger(chunk?.index)
|
|
1172
|
+
? chunk.index
|
|
1173
|
+
: resolved;
|
|
1174
|
+
if (Number.isInteger(sqliteIndex)) {
|
|
1175
|
+
return this.sqliteStore.getVector(sqliteIndex);
|
|
1176
|
+
}
|
|
1177
|
+
}
|
|
1178
|
+
return null;
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
async getChunkContent(chunk, index = null) {
|
|
1182
|
+
if (typeof chunk === 'number') {
|
|
1183
|
+
const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
|
|
1184
|
+
const entry = store ? store[chunk] : null;
|
|
1185
|
+
if (entry) return await this.getChunkContent(entry, chunk);
|
|
1186
|
+
if (!store && this.binaryStore) {
|
|
1187
|
+
const content = await this.binaryStore.getContent(chunk);
|
|
1188
|
+
return content ?? ''; // Ensure consistent empty string return
|
|
1189
|
+
}
|
|
1190
|
+
if (!store && this.sqliteStore) {
|
|
1191
|
+
return this.sqliteStore.getContent(chunk) ?? '';
|
|
1192
|
+
}
|
|
1193
|
+
return '';
|
|
1194
|
+
}
|
|
1195
|
+
if (chunk?.content !== undefined && chunk?.content !== null) {
|
|
1196
|
+
return chunk.content;
|
|
1197
|
+
}
|
|
1198
|
+
if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
|
|
1199
|
+
const content = await this.binaryStore.getContent(chunk._binaryIndex);
|
|
1200
|
+
return content ?? ''; // Ensure consistent empty string return
|
|
1201
|
+
}
|
|
1202
|
+
const resolved = Number.isInteger(index) ? index : chunk?._index;
|
|
1203
|
+
if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
|
|
1204
|
+
const content = await this.binaryStore.getContent(resolved);
|
|
1205
|
+
return content ?? ''; // Ensure consistent empty string return
|
|
1206
|
+
}
|
|
1207
|
+
if (this.sqliteStore) {
|
|
1208
|
+
const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
|
|
1209
|
+
? chunk._sqliteIndex
|
|
1210
|
+
: Number.isInteger(chunk?.index)
|
|
1211
|
+
? chunk.index
|
|
1212
|
+
: resolved;
|
|
1213
|
+
if (Number.isInteger(sqliteIndex)) {
|
|
1214
|
+
return this.sqliteStore.getContent(sqliteIndex) ?? '';
|
|
1215
|
+
}
|
|
1216
|
+
}
|
|
1217
|
+
return '';
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
deleteFileHash(file) {
|
|
1221
|
+
this.fileHashes.delete(file);
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
/**
|
|
1225
|
+
* Remove all chunks for a given file from the vector store.
|
|
1226
|
+
* Note: This is async to support future backend-specific cleanup.
|
|
1227
|
+
* For binary/SQLite stores, actual removal happens on next full save.
|
|
1228
|
+
* @param {string} file - Absolute path of file to remove
|
|
1229
|
+
*/
|
|
1230
|
+
async removeFileFromStore(file) {
|
|
1231
|
+
if (!Array.isArray(this.vectorStore)) return;
|
|
1232
|
+
// In-place compaction to avoid allocating a new large array
|
|
1233
|
+
let w = 0;
|
|
1234
|
+
for (let r = 0; r < this.vectorStore.length; r++) {
|
|
1235
|
+
const chunk = this.vectorStore[r];
|
|
1236
|
+
if (chunk.file !== file) {
|
|
1237
|
+
chunk._index = w;
|
|
1238
|
+
this.vectorStore[w++] = chunk;
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1241
|
+
this.vectorStore.length = w;
|
|
1242
|
+
|
|
1243
|
+
// Removing shifts labels => rebuild ANN
|
|
1244
|
+
this.invalidateAnnIndex();
|
|
1245
|
+
this.removeFileCallData(file);
|
|
1246
|
+
// Also remove file hash to prevent orphaned entries
|
|
1247
|
+
this.fileHashes.delete(file);
|
|
1248
|
+
}
|
|
1249
|
+
|
|
1250
|
+
addToStore(chunk) {
|
|
1251
|
+
normalizeChunkVector(chunk);
|
|
1252
|
+
|
|
1253
|
+
if (!Array.isArray(this.vectorStore)) {
|
|
1254
|
+
this.vectorStore = [];
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
const label = this.vectorStore.length;
|
|
1258
|
+
chunk._index = label;
|
|
1259
|
+
this.vectorStore.push(chunk);
|
|
1260
|
+
if (Array.isArray(this.annVectorCache) && this.annVectorCache.length === label) {
|
|
1261
|
+
this.annVectorCache.push(chunk.vector);
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
// Best-effort incremental ANN append (fast path)
|
|
1265
|
+
if (
|
|
1266
|
+
this.annIndex &&
|
|
1267
|
+
!this.annDirty &&
|
|
1268
|
+
this.annMeta &&
|
|
1269
|
+
typeof this.annIndex.addPoint === 'function' &&
|
|
1270
|
+
this.annMeta.count === label &&
|
|
1271
|
+
this.annMeta.maxElements > this.annMeta.count
|
|
1272
|
+
) {
|
|
1273
|
+
try {
|
|
1274
|
+
this.annIndex.addPoint(chunk.vector, label);
|
|
1275
|
+
this.annMeta.count += 1;
|
|
1276
|
+
this.annPersistDirty = true;
|
|
1277
|
+
return;
|
|
1278
|
+
} catch {
|
|
1279
|
+
// fall through
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
this.invalidateAnnIndex();
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
invalidateAnnIndex() {
|
|
1287
|
+
this.annIndex = null;
|
|
1288
|
+
this.annMeta = null;
|
|
1289
|
+
this.annDirty = true;
|
|
1290
|
+
this.annPersistDirty = false;
|
|
1291
|
+
this.annVectorCache = null;
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
getAnnVector(index) {
|
|
1295
|
+
if (!Array.isArray(this.vectorStore)) return null;
|
|
1296
|
+
const chunk = this.vectorStore[index];
|
|
1297
|
+
if (!chunk) return null;
|
|
1298
|
+
|
|
1299
|
+
if (
|
|
1300
|
+
!Array.isArray(this.annVectorCache) ||
|
|
1301
|
+
this.annVectorCache.length !== this.vectorStore.length
|
|
1302
|
+
) {
|
|
1303
|
+
this.annVectorCache = new Array(this.vectorStore.length);
|
|
1304
|
+
}
|
|
1305
|
+
|
|
1306
|
+
const cached = this.annVectorCache[index];
|
|
1307
|
+
if (cached) return cached;
|
|
1308
|
+
|
|
1309
|
+
let vec = null;
|
|
1310
|
+
if (chunk.vector) {
|
|
1311
|
+
vec = ensureFloat32(chunk.vector);
|
|
1312
|
+
} else if (this.binaryStore && Number.isInteger(chunk._binaryIndex)) {
|
|
1313
|
+
vec = this.binaryStore.getVector(chunk._binaryIndex);
|
|
1314
|
+
} else if (this.sqliteStore) {
|
|
1315
|
+
const sqliteIndex = Number.isInteger(chunk._sqliteIndex)
|
|
1316
|
+
? chunk._sqliteIndex
|
|
1317
|
+
: Number.isInteger(chunk.index)
|
|
1318
|
+
? chunk.index
|
|
1319
|
+
: index;
|
|
1320
|
+
if (Number.isInteger(sqliteIndex)) {
|
|
1321
|
+
vec = this.sqliteStore.getVector(sqliteIndex);
|
|
1322
|
+
}
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
if (!vec) return null;
|
|
1326
|
+
|
|
1327
|
+
if (this.config.vectorStoreLoadMode !== 'disk') {
|
|
1328
|
+
chunk.vector = vec;
|
|
1329
|
+
}
|
|
1330
|
+
this.annVectorCache[index] = vec;
|
|
1331
|
+
return vec;
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
getAnnIndexPaths() {
|
|
1335
|
+
return {
|
|
1336
|
+
indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
|
|
1337
|
+
metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE),
|
|
1338
|
+
};
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
// -------------------- ANN --------------------
|
|
1342
|
+
|
|
1343
|
+
/**
|
|
1344
|
+
* Ensure ANN (Approximate Nearest Neighbor) index is built and ready.
|
|
1345
|
+
* Loads from disk cache if available and valid, otherwise builds a new index.
|
|
1346
|
+
*
|
|
1347
|
+
* @returns {Promise<HierarchicalNSW|null>} The HNSW index, or null if:
|
|
1348
|
+
* - ANN is disabled in config
|
|
1349
|
+
* - vectorStore is not an array
|
|
1350
|
+
* - vectorStore size is below annMinChunks threshold
|
|
1351
|
+
* - hnswlib-node is not available
|
|
1352
|
+
* - Vector dimension mismatch detected
|
|
1353
|
+
* @note This method is safe to call multiple times; concurrent calls share the same promise.
|
|
1354
|
+
*/
|
|
1355
|
+
async ensureAnnIndex() {
|
|
1356
|
+
if (!this.config.annEnabled) return null;
|
|
1357
|
+
if (!Array.isArray(this.vectorStore)) return null;
|
|
1358
|
+
if (this.vectorStore.length < (this.config.annMinChunks ?? 5000)) return null;
|
|
1359
|
+
if (this.annIndex && !this.annDirty) return this.annIndex;
|
|
1360
|
+
if (this.annLoading) return this.annLoading;
|
|
1361
|
+
|
|
1362
|
+
this.annLoading = (async () => {
|
|
1363
|
+
try {
|
|
1364
|
+
const HierarchicalNSW = await loadHnswlib();
|
|
1365
|
+
if (!HierarchicalNSW) {
|
|
1366
|
+
if (hnswlibLoadError) {
|
|
1367
|
+
this.addInitError('loadHnswlib', hnswlibLoadError);
|
|
1368
|
+
}
|
|
1369
|
+
return null;
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
const dim =
|
|
1373
|
+
this.vectorStore[0]?.vector?.length ||
|
|
1374
|
+
this.binaryStore?.dim ||
|
|
1375
|
+
this.sqliteStore?.dim;
|
|
1376
|
+
if (!dim) return null;
|
|
1377
|
+
|
|
1378
|
+
// Validate dimension consistency before building index
|
|
1379
|
+
// Use stratified sampling for better coverage across entire store
|
|
1380
|
+
let dimensionMismatch = false;
|
|
1381
|
+
const sampleSize = Math.min(ANN_DIMENSION_SAMPLE_SIZE, this.vectorStore.length);
|
|
1382
|
+
const step = Math.max(1, Math.floor(this.vectorStore.length / sampleSize));
|
|
1383
|
+
for (let i = step; i < this.vectorStore.length; i += step) {
|
|
1384
|
+
const v = this.vectorStore[i]?.vector;
|
|
1385
|
+
if (v && v.length !== dim) {
|
|
1386
|
+
dimensionMismatch = true;
|
|
1387
|
+
console.warn(
|
|
1388
|
+
`[ANN] Dimension mismatch at index ${i}: expected ${dim}, got ${v.length}. ` +
|
|
1389
|
+
'This may indicate a config change mid-index. Consider full reindex.'
|
|
1390
|
+
);
|
|
1391
|
+
break;
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
if (dimensionMismatch) {
|
|
1396
|
+
this.addInitError('ensureAnnIndex', `Vector dimension inconsistency detected. Expected ${dim}. Full reindex required.`);
|
|
1397
|
+
return null; // Skip ANN build - fallback to linear search
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
if (!this.annDirty && this.config.annIndexCache !== false) {
|
|
1401
|
+
const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
|
|
1402
|
+
if (loaded) return this.annIndex;
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
return await this.buildAnnIndex(HierarchicalNSW, dim);
|
|
1406
|
+
} finally {
|
|
1407
|
+
this.annLoading = null;
|
|
1408
|
+
}
|
|
1409
|
+
})();
|
|
1410
|
+
|
|
1411
|
+
return this.annLoading;
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1414
|
+
async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
|
|
1415
|
+
const { indexFile, metaFile } = this.getAnnIndexPaths();
|
|
1416
|
+
const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
|
|
1417
|
+
if (!metaData) return false;
|
|
1418
|
+
|
|
1419
|
+
let meta;
|
|
1420
|
+
try {
|
|
1421
|
+
meta = JSON.parse(metaData);
|
|
1422
|
+
} catch {
|
|
1423
|
+
console.warn('[ANN] Invalid ANN metadata, rebuilding');
|
|
1424
|
+
return false;
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
if (meta?.version !== ANN_META_VERSION) {
|
|
1428
|
+
console.warn(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
|
|
1429
|
+
return false;
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
1433
|
+
console.warn('[ANN] Embedding model changed for ANN index, rebuilding');
|
|
1434
|
+
return false;
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
|
|
1438
|
+
console.warn('[ANN] ANN index size mismatch, rebuilding');
|
|
1439
|
+
return false;
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
if (
|
|
1443
|
+
meta?.metric !== this.config.annMetric ||
|
|
1444
|
+
meta?.m !== this.config.annM ||
|
|
1445
|
+
meta?.efConstruction !== this.config.annEfConstruction
|
|
1446
|
+
) {
|
|
1447
|
+
console.warn('[ANN] ANN index config changed, rebuilding');
|
|
1448
|
+
return false;
|
|
1449
|
+
}
|
|
1450
|
+
|
|
1451
|
+
let maxElements = meta?.maxElements;
|
|
1452
|
+
if (!Number.isInteger(maxElements)) {
|
|
1453
|
+
maxElements = meta.count;
|
|
1454
|
+
} else if (maxElements < meta.count) {
|
|
1455
|
+
console.warn('[ANN] ANN capacity invalid, rebuilding');
|
|
1456
|
+
return false;
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
const index = new HierarchicalNSW(meta.metric, dim);
|
|
1460
|
+
const loaded = readHnswIndex(index, indexFile, maxElements);
|
|
1461
|
+
if (!loaded) {
|
|
1462
|
+
console.warn('[ANN] Failed to load ANN index file, rebuilding');
|
|
1463
|
+
return false;
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
if (typeof index.setEf === 'function') {
|
|
1467
|
+
index.setEf(this.config.annEfSearch);
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1470
|
+
this.annIndex = index;
|
|
1471
|
+
this.annMeta = { ...meta, maxElements };
|
|
1472
|
+
this.annDirty = false;
|
|
1473
|
+
this.annPersistDirty = false;
|
|
1474
|
+
|
|
1475
|
+
if (this.config.verbose) {
|
|
1476
|
+
console.info(`[ANN] Loaded ANN index (${meta.count} vectors, cap=${maxElements})`);
|
|
1477
|
+
}
|
|
1478
|
+
return true;
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
async buildAnnIndex(HierarchicalNSW, dim) {
|
|
1482
|
+
if (!Array.isArray(this.vectorStore)) return null;
|
|
1483
|
+
const total = this.vectorStore.length;
|
|
1484
|
+
if (total === 0) return null;
|
|
1485
|
+
|
|
1486
|
+
try {
|
|
1487
|
+
const index = new HierarchicalNSW(this.config.annMetric, dim);
|
|
1488
|
+
|
|
1489
|
+
const maxElements = computeAnnCapacity(total, this.config);
|
|
1490
|
+
initHnswIndex(index, maxElements, this.config.annM, this.config.annEfConstruction);
|
|
1491
|
+
|
|
1492
|
+
const yieldEvery = Number.isInteger(this.config.annBuildYieldEvery)
|
|
1493
|
+
? this.config.annBuildYieldEvery
|
|
1494
|
+
: 1000;
|
|
1495
|
+
|
|
1496
|
+
for (let i = 0; i < total; i++) {
|
|
1497
|
+
const vector = this.getAnnVector(i);
|
|
1498
|
+
if (!vector) throw new Error(`Missing vector for ANN index at position ${i}`);
|
|
1499
|
+
index.addPoint(vector, i);
|
|
1500
|
+
|
|
1501
|
+
if (yieldEvery > 0 && i > 0 && i % yieldEvery === 0) {
|
|
1502
|
+
await yieldToLoop();
|
|
1503
|
+
}
|
|
1504
|
+
}
|
|
1505
|
+
|
|
1506
|
+
if (typeof index.setEf === 'function') {
|
|
1507
|
+
index.setEf(this.config.annEfSearch);
|
|
1508
|
+
}
|
|
1509
|
+
|
|
1510
|
+
this.annIndex = index;
|
|
1511
|
+
this.annMeta = {
|
|
1512
|
+
version: ANN_META_VERSION,
|
|
1513
|
+
embeddingModel: this.config.embeddingModel,
|
|
1514
|
+
metric: this.config.annMetric,
|
|
1515
|
+
dim,
|
|
1516
|
+
count: total,
|
|
1517
|
+
maxElements,
|
|
1518
|
+
m: this.config.annM,
|
|
1519
|
+
efConstruction: this.config.annEfConstruction,
|
|
1520
|
+
efSearch: this.config.annEfSearch,
|
|
1521
|
+
};
|
|
1522
|
+
this.annDirty = false;
|
|
1523
|
+
this.annPersistDirty = true;
|
|
1524
|
+
|
|
1525
|
+
if (this.config.annIndexCache !== false) {
|
|
1526
|
+
try {
|
|
1527
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
1528
|
+
const { indexFile, metaFile } = this.getAnnIndexPaths();
|
|
1529
|
+
index.writeIndexSync(indexFile);
|
|
1530
|
+
await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
|
|
1531
|
+
this.annPersistDirty = false;
|
|
1532
|
+
if (this.config.verbose) {
|
|
1533
|
+
console.info(`[ANN] Saved ANN index (${total} vectors, cap=${maxElements})`);
|
|
1534
|
+
}
|
|
1535
|
+
} catch (error) {
|
|
1536
|
+
console.warn(`[ANN] Failed to save ANN index: ${error.message}`);
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
return index;
|
|
1541
|
+
} catch (error) {
|
|
1542
|
+
console.warn(`[ANN] Failed to build ANN index: ${error.message}`);
|
|
1543
|
+
this.addInitError('buildAnnIndex', error);
|
|
1544
|
+
this.annIndex = null;
|
|
1545
|
+
this.annMeta = null;
|
|
1546
|
+
this.annDirty = true;
|
|
1547
|
+
this.annPersistDirty = false;
|
|
1548
|
+
return null;
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
1551
|
+
|
|
1552
|
+
/**
|
|
1553
|
+
* Query the ANN index for k nearest neighbors.
|
|
1554
|
+
* Falls back gracefully to empty results if ANN is unavailable.
|
|
1555
|
+
*
|
|
1556
|
+
* @param {Float32Array|number[]} queryVector - Normalized query embedding
|
|
1557
|
+
* @param {number} k - Number of neighbors to return
|
|
1558
|
+
* @returns {Promise<number[]>} Array of chunk indices sorted by similarity (may be empty)
|
|
1559
|
+
* @throws Never throws - returns empty array on all error conditions
|
|
1560
|
+
* @note Automatically invalidates corrupted index and falls back to linear search on next query
|
|
1561
|
+
*/
|
|
1562
|
+
async queryAnn(queryVector, k) {
|
|
1563
|
+
if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
|
|
1564
|
+
const index = await this.ensureAnnIndex();
|
|
1565
|
+
if (!index) return [];
|
|
1566
|
+
|
|
1567
|
+
const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
|
|
1568
|
+
|
|
1569
|
+
// Wrap searchKnn in try-catch to handle corrupted index or dimension mismatches
|
|
1570
|
+
let results;
|
|
1571
|
+
try {
|
|
1572
|
+
results = index.searchKnn(qVec, k);
|
|
1573
|
+
} catch (err) {
|
|
1574
|
+
console.warn(`[ANN] searchKnn failed: ${err.message}. Falling back to linear search.`);
|
|
1575
|
+
this.addInitError('queryAnn', err);
|
|
1576
|
+
// Invalidate to trigger rebuild on next query
|
|
1577
|
+
this.invalidateAnnIndex();
|
|
1578
|
+
return [];
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
const labels = normalizeLabels(results);
|
|
1582
|
+
|
|
1583
|
+
if (labels.length === 0) return [];
|
|
1584
|
+
|
|
1585
|
+
const filtered = labels.filter(
|
|
1586
|
+
(label) => Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
|
|
1587
|
+
);
|
|
1588
|
+
|
|
1589
|
+
return filtered;
|
|
1590
|
+
}
|
|
1591
|
+
|
|
1592
|
+
async clear() {
|
|
1593
|
+
if (!this.config.enableCache) return;
|
|
1594
|
+
|
|
1595
|
+
try {
|
|
1596
|
+
await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
|
|
1597
|
+
this.vectorStore = [];
|
|
1598
|
+
if (this.binaryStore) {
|
|
1599
|
+
try {
|
|
1600
|
+
await this.binaryStore.close();
|
|
1601
|
+
} catch {
|
|
1602
|
+
// ignore close errors
|
|
1603
|
+
}
|
|
1604
|
+
}
|
|
1605
|
+
this.binaryStore = null;
|
|
1606
|
+
if (this.sqliteStore) {
|
|
1607
|
+
try {
|
|
1608
|
+
this.sqliteStore.close();
|
|
1609
|
+
} catch {
|
|
1610
|
+
// ignore close errors
|
|
1611
|
+
}
|
|
1612
|
+
}
|
|
1613
|
+
this.sqliteStore = null;
|
|
1614
|
+
this.fileHashes = new Map();
|
|
1615
|
+
this.invalidateAnnIndex();
|
|
1616
|
+
await this.clearCallGraphData();
|
|
1617
|
+
if (this.config.verbose) {
|
|
1618
|
+
console.info(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
|
|
1619
|
+
}
|
|
1620
|
+
} catch (error) {
|
|
1621
|
+
console.error('[Cache] Failed to clear cache:', error.message);
|
|
1622
|
+
throw error;
|
|
1623
|
+
}
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1626
|
+
/**
|
|
1627
|
+
* Adjust efSearch at runtime for speed/accuracy tradeoff.
|
|
1628
|
+
* Higher values = more accurate but slower.
|
|
1629
|
+
* @param {number} efSearch - New efSearch value (typically 16-512)
|
|
1630
|
+
* @returns {object} Result with success status and current config
|
|
1631
|
+
*/
|
|
1632
|
+
setEfSearch(efSearch) {
|
|
1633
|
+
if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
|
|
1634
|
+
return {
|
|
1635
|
+
success: false,
|
|
1636
|
+
error: 'efSearch must be a number between 1 and 1000',
|
|
1637
|
+
};
|
|
1638
|
+
}
|
|
1639
|
+
|
|
1640
|
+
this.config.annEfSearch = efSearch;
|
|
1641
|
+
|
|
1642
|
+
if (this.annIndex && typeof this.annIndex.setEf === 'function') {
|
|
1643
|
+
this.annIndex.setEf(efSearch);
|
|
1644
|
+
if (this.annMeta) this.annMeta.efSearch = efSearch;
|
|
1645
|
+
this.annPersistDirty = true;
|
|
1646
|
+
if (this.config.verbose) {
|
|
1647
|
+
console.info(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
|
|
1648
|
+
}
|
|
1649
|
+
return { success: true, applied: true, efSearch };
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
if (this.config.verbose) {
|
|
1653
|
+
console.info(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
|
|
1654
|
+
}
|
|
1655
|
+
return { success: true, applied: false, efSearch };
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1658
|
+
/**
|
|
1659
|
+
* Get current ANN index statistics for diagnostics.
|
|
1660
|
+
* @returns {object} ANN stats including index state, config, and vector count
|
|
1661
|
+
*/
|
|
1662
|
+
getAnnStats() {
|
|
1663
|
+
return {
|
|
1664
|
+
enabled: this.config.annEnabled ?? false,
|
|
1665
|
+
indexLoaded: this.annIndex !== null,
|
|
1666
|
+
dirty: this.annDirty,
|
|
1667
|
+
vectorCount: Array.isArray(this.vectorStore) ? this.vectorStore.length : 0,
|
|
1668
|
+
minChunksForAnn: this.config.annMinChunks ?? 5000,
|
|
1669
|
+
config: this.annMeta
|
|
1670
|
+
? {
|
|
1671
|
+
metric: this.annMeta.metric,
|
|
1672
|
+
dim: this.annMeta.dim,
|
|
1673
|
+
count: this.annMeta.count,
|
|
1674
|
+
m: this.annMeta.m,
|
|
1675
|
+
efConstruction: this.annMeta.efConstruction,
|
|
1676
|
+
efSearch: this.config.annEfSearch,
|
|
1677
|
+
}
|
|
1678
|
+
: null,
|
|
1679
|
+
};
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
// -------------------- Call Graph --------------------
|
|
1683
|
+
|
|
1684
|
+
async clearCallGraphData({ removeFile = false } = {}) {
|
|
1685
|
+
this.fileCallData.clear();
|
|
1686
|
+
this.callGraph = null;
|
|
1687
|
+
|
|
1688
|
+
if (removeFile && this.config.enableCache) {
|
|
1689
|
+
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
1690
|
+
try {
|
|
1691
|
+
await fs.rm(callGraphFile, { force: true });
|
|
1692
|
+
} catch (error) {
|
|
1693
|
+
if (this.config.verbose) {
|
|
1694
|
+
console.warn(`[Cache] Failed to remove call-graph cache: ${error.message}`);
|
|
1695
|
+
}
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
|
|
1700
|
+
pruneCallGraphData(validFiles) {
|
|
1701
|
+
if (!validFiles || this.fileCallData.size === 0) return 0;
|
|
1702
|
+
|
|
1703
|
+
let pruned = 0;
|
|
1704
|
+
for (const file of Array.from(this.fileCallData.keys())) {
|
|
1705
|
+
if (!validFiles.has(file)) {
|
|
1706
|
+
this.fileCallData.delete(file);
|
|
1707
|
+
pruned++;
|
|
1708
|
+
}
|
|
1709
|
+
}
|
|
1710
|
+
|
|
1711
|
+
if (pruned > 0) this.callGraph = null;
|
|
1712
|
+
return pruned;
|
|
1713
|
+
}
|
|
1714
|
+
|
|
1715
|
+
getFileCallData(file) {
|
|
1716
|
+
return this.fileCallData.get(file);
|
|
1717
|
+
}
|
|
1718
|
+
|
|
1719
|
+
hasFileCallData(file) {
|
|
1720
|
+
return this.fileCallData.has(file);
|
|
1721
|
+
}
|
|
1722
|
+
|
|
1723
|
+
getFileCallDataKeys() {
|
|
1724
|
+
return Array.from(this.fileCallData.keys());
|
|
1725
|
+
}
|
|
1726
|
+
|
|
1727
|
+
getFileCallDataCount() {
|
|
1728
|
+
return this.fileCallData.size;
|
|
1729
|
+
}
|
|
1730
|
+
|
|
1731
|
+
/**
|
|
1732
|
+
* Sets call data for a specific file.
|
|
1733
|
+
* @param {string} file
|
|
1734
|
+
* @param {object} data
|
|
1735
|
+
*/
|
|
1736
|
+
setFileCallData(file, data) {
|
|
1737
|
+
this.fileCallData.set(file, data);
|
|
1738
|
+
this.callGraph = null;
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
/**
|
|
1742
|
+
* Sets the entire file call data map.
|
|
1743
|
+
* @param {Map<string, object>|object} entries
|
|
1744
|
+
*/
|
|
1745
|
+
setFileCallDataEntries(entries) {
|
|
1746
|
+
if (entries instanceof Map) {
|
|
1747
|
+
this.fileCallData = entries;
|
|
1748
|
+
} else {
|
|
1749
|
+
this.fileCallData.clear();
|
|
1750
|
+
if (entries && typeof entries === 'object') {
|
|
1751
|
+
for (const [file, data] of Object.entries(entries)) {
|
|
1752
|
+
this.fileCallData.set(file, data);
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
}
|
|
1756
|
+
this.callGraph = null;
|
|
1757
|
+
}
|
|
1758
|
+
|
|
1759
|
+
clearFileCallData() {
|
|
1760
|
+
this.fileCallData.clear();
|
|
1761
|
+
this.callGraph = null;
|
|
1762
|
+
}
|
|
1763
|
+
|
|
1764
|
+
removeFileCallData(file) {
|
|
1765
|
+
this.fileCallData.delete(file);
|
|
1766
|
+
this.callGraph = null;
|
|
1767
|
+
}
|
|
1768
|
+
|
|
1769
|
+
async rebuildCallGraph() {
|
|
1770
|
+
if (this._callGraphBuild) return this._callGraphBuild;
|
|
1771
|
+
|
|
1772
|
+
this._callGraphBuild = (async () => {
|
|
1773
|
+
try {
|
|
1774
|
+
const { buildCallGraph } = await import('./call-graph.js');
|
|
1775
|
+
this.callGraph = buildCallGraph(this.fileCallData);
|
|
1776
|
+
if (this.config.verbose && this.callGraph) {
|
|
1777
|
+
console.info(
|
|
1778
|
+
`[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`
|
|
1779
|
+
);
|
|
1780
|
+
}
|
|
1781
|
+
} catch (err) {
|
|
1782
|
+
console.error(`[CallGraph] Failed to build: ${err.message}`);
|
|
1783
|
+
this.callGraph = null;
|
|
1784
|
+
} finally {
|
|
1785
|
+
this._callGraphBuild = null;
|
|
1786
|
+
}
|
|
1787
|
+
})();
|
|
1788
|
+
|
|
1789
|
+
return this._callGraphBuild;
|
|
1790
|
+
}
|
|
1791
|
+
|
|
1792
|
+
async getRelatedFiles(symbols) {
|
|
1793
|
+
if (!this.config.callGraphEnabled || symbols.length === 0) return new Map();
|
|
1794
|
+
if (!this.callGraph && this.fileCallData.size > 0) await this.rebuildCallGraph();
|
|
1795
|
+
if (!this.callGraph) return new Map();
|
|
1796
|
+
|
|
1797
|
+
const { getRelatedFiles } = await import('./call-graph.js');
|
|
1798
|
+
return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
getCallGraphStats() {
|
|
1802
|
+
return {
|
|
1803
|
+
enabled: this.config.callGraphEnabled ?? false,
|
|
1804
|
+
filesWithData: this.fileCallData.size,
|
|
1805
|
+
graphBuilt: this.callGraph !== null,
|
|
1806
|
+
definitions: this.callGraph?.defines.size ?? 0,
|
|
1807
|
+
callTargets: this.callGraph?.calledBy.size ?? 0,
|
|
1808
|
+
};
|
|
1809
|
+
}
|
|
1810
|
+
|
|
1811
|
+
// -------------------- Abstraction Layer --------------------
|
|
1812
|
+
|
|
1813
|
+
/**
|
|
1814
|
+
* Returns the total number of chunks in the store.
|
|
1815
|
+
* @returns {number}
|
|
1816
|
+
*/
|
|
1817
|
+
getStoreSize() {
|
|
1818
|
+
if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
|
|
1819
|
+
if (this.binaryStore) return this.binaryStore.length;
|
|
1820
|
+
if (this.sqliteStore) return this.sqliteStore.length();
|
|
1821
|
+
return 0;
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
/**
|
|
1825
|
+
* Retrieves a vector by its store index.
|
|
1826
|
+
* @param {number} index
|
|
1827
|
+
* @returns {Float32Array|null}
|
|
1828
|
+
*/
|
|
1829
|
+
getVector(index) {
|
|
1830
|
+
return this.getChunkVector(index);
|
|
1831
|
+
}
|
|
1832
|
+
|
|
1833
|
+
/**
|
|
1834
|
+
* Retrieves a chunk object by its store index.
|
|
1835
|
+
* @param {number} index
|
|
1836
|
+
* @returns {object|null}
|
|
1837
|
+
*/
|
|
1838
|
+
getChunk(index) {
|
|
1839
|
+
if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
|
|
1840
|
+
return this.vectorStore[index];
|
|
1841
|
+
}
|
|
1842
|
+
if (this.binaryStore) {
|
|
1843
|
+
const record = this.binaryStore.getRecord(index);
|
|
1844
|
+
if (record) {
|
|
1845
|
+
return {
|
|
1846
|
+
file: record.file,
|
|
1847
|
+
startLine: record.startLine,
|
|
1848
|
+
endLine: record.endLine,
|
|
1849
|
+
vector: this.binaryStore.getVector(index),
|
|
1850
|
+
_index: index,
|
|
1851
|
+
_binaryIndex: index,
|
|
1852
|
+
};
|
|
1853
|
+
}
|
|
1854
|
+
}
|
|
1855
|
+
if (this.sqliteStore) {
|
|
1856
|
+
const record = this.sqliteStore.getRecord(index);
|
|
1857
|
+
if (record) {
|
|
1858
|
+
return {
|
|
1859
|
+
file: record.file,
|
|
1860
|
+
startLine: record.startLine,
|
|
1861
|
+
endLine: record.endLine,
|
|
1862
|
+
vector: this.sqliteStore.getVector(index),
|
|
1863
|
+
_index: index,
|
|
1864
|
+
_sqliteIndex: index,
|
|
1865
|
+
};
|
|
1866
|
+
}
|
|
1867
|
+
}
|
|
1868
|
+
return null;
|
|
1869
|
+
}
|
|
1870
|
+
}
|