@softerist/heuristic-mcp 2.1.47 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agent/workflows/code-review.md +60 -0
- package/.prettierrc +7 -0
- package/ARCHITECTURE.md +105 -170
- package/CONTRIBUTING.md +32 -113
- package/GEMINI.md +73 -0
- package/LICENSE +21 -21
- package/README.md +161 -54
- package/config.json +876 -75
- package/debug-pids.js +27 -0
- package/eslint.config.js +36 -0
- package/features/ann-config.js +37 -26
- package/features/clear-cache.js +28 -19
- package/features/find-similar-code.js +142 -66
- package/features/hybrid-search.js +253 -93
- package/features/index-codebase.js +1455 -394
- package/features/lifecycle.js +813 -180
- package/features/register.js +58 -52
- package/index.js +450 -306
- package/lib/cache-ops.js +22 -0
- package/lib/cache-utils.js +68 -0
- package/lib/cache.js +1392 -587
- package/lib/call-graph.js +165 -50
- package/lib/cli.js +154 -0
- package/lib/config.js +462 -121
- package/lib/embedding-process.js +77 -0
- package/lib/embedding-worker.js +545 -30
- package/lib/ignore-patterns.js +61 -59
- package/lib/json-worker.js +14 -0
- package/lib/json-writer.js +344 -0
- package/lib/logging.js +88 -0
- package/lib/memory-logger.js +13 -0
- package/lib/project-detector.js +13 -17
- package/lib/server-lifecycle.js +38 -0
- package/lib/settings-editor.js +645 -0
- package/lib/tokenizer.js +207 -104
- package/lib/utils.js +273 -198
- package/lib/vector-store-binary.js +592 -0
- package/mcp_config.example.json +13 -0
- package/package.json +13 -2
- package/scripts/clear-cache.js +6 -17
- package/scripts/download-model.js +14 -9
- package/scripts/postinstall.js +5 -5
- package/search-configs.js +36 -0
- package/test/ann-config.test.js +179 -0
- package/test/ann-fallback.test.js +6 -6
- package/test/binary-store.test.js +69 -0
- package/test/cache-branches.test.js +120 -0
- package/test/cache-errors.test.js +264 -0
- package/test/cache-extra.test.js +300 -0
- package/test/cache-helpers.test.js +205 -0
- package/test/cache-hnsw-failure.test.js +40 -0
- package/test/cache-json-worker.test.js +190 -0
- package/test/cache-worker.test.js +102 -0
- package/test/cache.test.js +443 -0
- package/test/call-graph.test.js +103 -4
- package/test/clear-cache.test.js +69 -68
- package/test/code-review-workflow.test.js +50 -0
- package/test/config.test.js +418 -0
- package/test/coverage-gap.test.js +497 -0
- package/test/coverage-maximizer.test.js +236 -0
- package/test/debug-analysis.js +107 -0
- package/test/embedding-model.test.js +173 -103
- package/test/embedding-worker-extra.test.js +272 -0
- package/test/embedding-worker.test.js +158 -0
- package/test/features.test.js +139 -0
- package/test/final-boost.test.js +271 -0
- package/test/final-polish.test.js +183 -0
- package/test/final.test.js +95 -0
- package/test/find-similar-code.test.js +191 -0
- package/test/helpers.js +92 -11
- package/test/helpers.test.js +46 -0
- package/test/hybrid-search-basic.test.js +62 -0
- package/test/hybrid-search-branch.test.js +202 -0
- package/test/hybrid-search-callgraph.test.js +229 -0
- package/test/hybrid-search-extra.test.js +81 -0
- package/test/hybrid-search.test.js +484 -71
- package/test/index-cli.test.js +520 -0
- package/test/index-codebase-batch.test.js +119 -0
- package/test/index-codebase-branches.test.js +585 -0
- package/test/index-codebase-core.test.js +1032 -0
- package/test/index-codebase-edge-cases.test.js +254 -0
- package/test/index-codebase-errors.test.js +132 -0
- package/test/index-codebase-gap.test.js +239 -0
- package/test/index-codebase-lines.test.js +151 -0
- package/test/index-codebase-watcher.test.js +259 -0
- package/test/index-codebase-zone.test.js +259 -0
- package/test/index-codebase.test.js +371 -69
- package/test/index-memory.test.js +220 -0
- package/test/indexer-detailed.test.js +176 -0
- package/test/integration.test.js +148 -92
- package/test/json-worker.test.js +50 -0
- package/test/lifecycle.test.js +541 -0
- package/test/master.test.js +198 -0
- package/test/perfection.test.js +349 -0
- package/test/project-detector.test.js +65 -0
- package/test/register.test.js +262 -0
- package/test/tokenizer.test.js +55 -93
- package/test/ultra-maximizer.test.js +116 -0
- package/test/utils-branches.test.js +161 -0
- package/test/utils-extra.test.js +116 -0
- package/test/utils.test.js +131 -0
- package/test/verify_fixes.js +76 -0
- package/test/worker-errors.test.js +96 -0
- package/test/worker-init.test.js +102 -0
- package/test/worker_throttling.test.js +93 -0
- package/tools/scripts/benchmark-search.js +95 -0
- package/tools/scripts/cache-stats.js +71 -0
- package/tools/scripts/manual-search.js +34 -0
- package/vitest.config.js +19 -9
package/lib/cache.js
CHANGED
|
@@ -1,621 +1,1426 @@
|
|
|
1
|
-
import fs from
|
|
2
|
-
import path from
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
const
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { Worker } from 'worker_threads';
|
|
4
|
+
import { StreamingJsonWriter } from './json-writer.js';
|
|
5
|
+
import { BinaryVectorStore } from './vector-store-binary.js';
|
|
6
|
+
|
|
7
|
+
const CACHE_META_VERSION = 1;
|
|
8
|
+
const CACHE_META_FILE = 'meta.json';
|
|
9
|
+
|
|
10
|
+
// ANN meta version stays at 1 for compatibility; maxElements is optional.
|
|
11
|
+
const ANN_META_VERSION = 1;
|
|
12
|
+
const ANN_INDEX_FILE = 'ann-index.bin';
|
|
13
|
+
const ANN_META_FILE = 'ann-meta.json';
|
|
14
|
+
|
|
15
|
+
const CALL_GRAPH_FILE = 'call-graph.json';
|
|
16
|
+
|
|
17
|
+
const DEFAULT_JSON_WORKER_THRESHOLD = 5 * 1024 * 1024;
|
|
18
|
+
const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
|
|
19
|
+
|
|
20
|
+
// Yield to event loop to keep IDE/extension host responsive during heavy CPU loops
|
|
21
|
+
const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
|
|
22
|
+
|
|
23
|
+
let hnswlibPromise = null;
|
|
24
|
+
let hnswlibLoadError = null;
|
|
25
|
+
|
|
26
|
+
async function parseJsonInWorker(filePath) {
|
|
27
|
+
return new Promise((resolve, reject) => {
|
|
28
|
+
let settled = false;
|
|
29
|
+
const worker = new Worker(new URL('./json-worker.js', import.meta.url), {
|
|
30
|
+
workerData: { filePath },
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
const finish = (handler, value) => {
|
|
34
|
+
if (settled) return;
|
|
35
|
+
settled = true;
|
|
36
|
+
worker.removeAllListeners();
|
|
37
|
+
const termination = worker.terminate?.();
|
|
38
|
+
if (termination && typeof termination.catch === 'function') termination.catch(() => null);
|
|
39
|
+
handler(value);
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
worker.once('message', (msg) => {
|
|
43
|
+
if (msg?.ok) {
|
|
44
|
+
finish(resolve, msg.data);
|
|
45
|
+
} else {
|
|
46
|
+
const err = new Error(msg?.error || 'JSON worker failed');
|
|
47
|
+
console.warn(`[Cache] ${err.message}`);
|
|
48
|
+
finish(reject, err);
|
|
49
|
+
}
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
worker.once('error', (err) => {
|
|
53
|
+
console.error(`[Cache] JSON worker error: ${err.message}`);
|
|
54
|
+
finish(reject, err);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
worker.once('exit', (code) => {
|
|
58
|
+
if (code !== 0) {
|
|
59
|
+
const err = new Error(`JSON worker exited with code ${code}`);
|
|
60
|
+
console.error(`[Cache] ${err.message}`);
|
|
61
|
+
finish(reject, err);
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function readJsonFile(filePath, { workerThresholdBytes = DEFAULT_JSON_WORKER_THRESHOLD } = {}) {
|
|
68
|
+
let stats;
|
|
69
|
+
try {
|
|
70
|
+
stats = await fs.stat(filePath);
|
|
71
|
+
} catch {
|
|
72
|
+
return null;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
try {
|
|
76
|
+
const canUseWorker = typeof Worker === 'function';
|
|
77
|
+
const useWorker =
|
|
78
|
+
canUseWorker && stats && typeof stats.size === 'number'
|
|
79
|
+
? stats.size >= workerThresholdBytes
|
|
80
|
+
: false;
|
|
81
|
+
|
|
82
|
+
if (useWorker) return await parseJsonInWorker(filePath);
|
|
83
|
+
|
|
84
|
+
const data = await fs.readFile(filePath, 'utf-8');
|
|
85
|
+
return JSON.parse(data);
|
|
86
|
+
} catch (error) {
|
|
87
|
+
console.warn(`[Cache] Failed to parse ${path.basename(filePath)}: ${error.message}`);
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function loadHnswlib() {
|
|
93
|
+
if (hnswlibLoadError) return null;
|
|
94
|
+
|
|
95
|
+
if (!hnswlibPromise) {
|
|
96
|
+
hnswlibPromise = import('hnswlib-node')
|
|
97
|
+
.then((mod) => {
|
|
98
|
+
const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
|
|
99
|
+
if (!HierarchicalNSW) throw new Error('HierarchicalNSW export not found');
|
|
100
|
+
return HierarchicalNSW;
|
|
101
|
+
})
|
|
102
|
+
.catch((err) => {
|
|
103
|
+
hnswlibLoadError = err;
|
|
104
|
+
console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
|
|
105
|
+
return null;
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return hnswlibPromise;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function initHnswIndex(index, maxElements, m, efConstruction) {
|
|
113
|
+
try {
|
|
114
|
+
index.initIndex(maxElements, m, efConstruction, 100);
|
|
115
|
+
return;
|
|
116
|
+
} catch (err) { console.warn(`[ANN] Standard init failed: ${err.message}`); }
|
|
117
|
+
try {
|
|
118
|
+
index.initIndex(maxElements, m, efConstruction);
|
|
119
|
+
return;
|
|
120
|
+
} catch (err) { console.warn(`[ANN] Legacy init failed: ${err.message}`); }
|
|
121
|
+
index.initIndex(maxElements);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function readHnswIndex(index, filePath, maxElements) {
|
|
125
|
+
try {
|
|
126
|
+
index.readIndexSync(filePath, maxElements);
|
|
127
|
+
return true;
|
|
128
|
+
} catch {
|
|
129
|
+
/* ignore */
|
|
130
|
+
}
|
|
131
|
+
try {
|
|
132
|
+
index.readIndexSync(filePath);
|
|
133
|
+
return true;
|
|
134
|
+
} catch (err) {
|
|
135
|
+
console.warn(`[ANN] Read index failed: ${err.message}`);
|
|
136
|
+
}
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function normalizeLabels(result) {
|
|
141
|
+
if (!result) return [];
|
|
142
|
+
if (Array.isArray(result)) return result;
|
|
143
|
+
const labels = result.labels || result.neighbors || result.indices;
|
|
144
|
+
return labels ? Array.from(labels) : [];
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function ensureFloat32(vector) {
|
|
148
|
+
if (!vector) return null;
|
|
149
|
+
if (vector instanceof Float32Array) return vector;
|
|
150
|
+
|
|
151
|
+
// Convert values (do NOT reinterpret bytes)
|
|
152
|
+
if (ArrayBuffer.isView(vector)) {
|
|
153
|
+
return Float32Array.from(vector);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return new Float32Array(vector);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function normalizeChunkVector(chunk) {
|
|
160
|
+
if (chunk?.vector) chunk.vector = ensureFloat32(chunk.vector);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function assignChunkIndices(store) {
|
|
164
|
+
if (!Array.isArray(store)) return;
|
|
165
|
+
for (let i = 0; i < store.length; i += 1) {
|
|
166
|
+
const chunk = store[i];
|
|
167
|
+
if (chunk) {
|
|
168
|
+
chunk._index = i;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
function normalizeFileHashEntry(entry) {
|
|
174
|
+
if (!entry) return null;
|
|
175
|
+
if (typeof entry === 'string') return { hash: entry };
|
|
176
|
+
if (typeof entry !== 'object') return null;
|
|
177
|
+
if (typeof entry.hash !== 'string') return null;
|
|
178
|
+
const normalized = { hash: entry.hash };
|
|
179
|
+
if (Number.isFinite(entry.mtimeMs)) normalized.mtimeMs = entry.mtimeMs;
|
|
180
|
+
if (Number.isFinite(entry.size)) normalized.size = entry.size;
|
|
181
|
+
return normalized;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function serializeFileHashEntry(entry) {
|
|
185
|
+
if (!entry) return null;
|
|
186
|
+
if (typeof entry === 'string') return { hash: entry };
|
|
187
|
+
if (typeof entry !== 'object') return null;
|
|
188
|
+
if (typeof entry.hash !== 'string') return null;
|
|
189
|
+
const serialized = { hash: entry.hash };
|
|
190
|
+
if (Number.isFinite(entry.mtimeMs)) serialized.mtimeMs = entry.mtimeMs;
|
|
191
|
+
if (Number.isFinite(entry.size)) serialized.size = entry.size;
|
|
192
|
+
return serialized;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function computeAnnCapacity(total, config) {
|
|
196
|
+
const factor = typeof config.annCapacityFactor === 'number' ? config.annCapacityFactor : 1.2;
|
|
197
|
+
const extra = Number.isInteger(config.annCapacityExtra) ? config.annCapacityExtra : 1024;
|
|
198
|
+
const byFactor = Math.ceil(total * factor);
|
|
199
|
+
const byExtra = total + extra;
|
|
200
|
+
return Math.max(total, byFactor, byExtra);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
export class EmbeddingsCache {
|
|
204
|
+
constructor(config) {
|
|
205
|
+
this.config = config;
|
|
206
|
+
|
|
207
|
+
this.vectorStore = [];
|
|
208
|
+
this.fileHashes = new Map();
|
|
209
|
+
this.isSaving = false;
|
|
210
|
+
this.lastIndexDurationMs = null;
|
|
211
|
+
this.lastIndexStats = null;
|
|
212
|
+
|
|
213
|
+
this.cacheMeta = {
|
|
214
|
+
version: CACHE_META_VERSION,
|
|
215
|
+
embeddingModel: config.embeddingModel,
|
|
216
|
+
};
|
|
217
|
+
|
|
218
|
+
// Save coalescing / debounce (serialized via saveQueue)
|
|
219
|
+
this.saveQueue = Promise.resolve();
|
|
220
|
+
this._saveTimer = null;
|
|
221
|
+
this._saveRequested = false;
|
|
222
|
+
this._savePromise = null;
|
|
223
|
+
|
|
224
|
+
// ANN state
|
|
225
|
+
this.annIndex = null;
|
|
226
|
+
this.annMeta = null;
|
|
227
|
+
this.annDirty = false; // needs rebuild
|
|
228
|
+
this.annPersistDirty = false; // in-memory differs from disk
|
|
229
|
+
this.annLoading = null;
|
|
230
|
+
this.annVectorCache = null;
|
|
231
|
+
|
|
232
|
+
// Call graph
|
|
233
|
+
this.fileCallData = new Map();
|
|
234
|
+
this.callGraph = null;
|
|
235
|
+
this._callGraphBuild = null;
|
|
236
|
+
|
|
237
|
+
// Binary vector store (optional)
|
|
238
|
+
this.binaryStore = null;
|
|
239
|
+
|
|
240
|
+
// Error tracking
|
|
241
|
+
this.initErrors = [];
|
|
242
|
+
|
|
243
|
+
// Concurrency hooks (read tracking)
|
|
244
|
+
this.activeReads = 0;
|
|
245
|
+
this._readWaiters = [];
|
|
246
|
+
|
|
247
|
+
// Lazy reload support after dropping in-memory vectors
|
|
248
|
+
this._clearedAfterIndex = false;
|
|
249
|
+
this._loadPromise = null;
|
|
30
250
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
return;
|
|
38
|
-
} catch {}
|
|
39
|
-
try {
|
|
40
|
-
index.initIndex(maxElements, m, efConstruction);
|
|
41
|
-
return;
|
|
42
|
-
} catch {}
|
|
43
|
-
index.initIndex(maxElements);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function readHnswIndex(index, filePath, maxElements) {
|
|
47
|
-
try {
|
|
48
|
-
index.readIndexSync(filePath, maxElements);
|
|
49
|
-
return true;
|
|
50
|
-
} catch {}
|
|
51
|
-
try {
|
|
52
|
-
index.readIndexSync(filePath);
|
|
53
|
-
return true;
|
|
54
|
-
} catch {}
|
|
55
|
-
return false;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function normalizeLabels(result) {
|
|
59
|
-
if (!result) return [];
|
|
60
|
-
if (Array.isArray(result)) return result;
|
|
61
|
-
const labels = result.labels || result.neighbors || result.indices;
|
|
62
|
-
if (labels) {
|
|
63
|
-
return Array.from(labels);
|
|
64
|
-
}
|
|
65
|
-
return [];
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
function toFloat32Array(vector) {
|
|
69
|
-
if (vector instanceof Float32Array) {
|
|
70
|
-
return vector;
|
|
71
|
-
}
|
|
72
|
-
return Float32Array.from(vector);
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
export class EmbeddingsCache {
|
|
76
|
-
constructor(config) {
|
|
77
|
-
this.config = config;
|
|
78
|
-
this.vectorStore = [];
|
|
79
|
-
this.fileHashes = new Map();
|
|
80
|
-
this.isSaving = false;
|
|
81
|
-
this.cacheMeta = {
|
|
82
|
-
version: CACHE_META_VERSION,
|
|
83
|
-
embeddingModel: config.embeddingModel
|
|
84
|
-
};
|
|
85
|
-
this.annIndex = null;
|
|
86
|
-
this.annMeta = null;
|
|
87
|
-
this.annDirty = false;
|
|
88
|
-
this.annLoading = null;
|
|
89
|
-
this.annVectorCache = null;
|
|
90
|
-
// Call graph data
|
|
91
|
-
this.fileCallData = new Map(); // file -> { definitions: [], calls: [] }
|
|
92
|
-
this.callGraph = null; // { defines, calledBy, fileCalls }
|
|
251
|
+
|
|
252
|
+
async close() {
|
|
253
|
+
if (this.binaryStore) {
|
|
254
|
+
await this.binaryStore.close();
|
|
255
|
+
this.binaryStore = null;
|
|
256
|
+
}
|
|
93
257
|
}
|
|
94
258
|
|
|
95
|
-
async
|
|
259
|
+
async ensureLoaded() {
|
|
96
260
|
if (!this.config.enableCache) return;
|
|
261
|
+
if (!this._clearedAfterIndex) return;
|
|
262
|
+
if (this._loadPromise) return this._loadPromise;
|
|
263
|
+
|
|
264
|
+
this._loadPromise = (async () => {
|
|
265
|
+
await this.load();
|
|
266
|
+
this._clearedAfterIndex = false;
|
|
267
|
+
})().finally(() => {
|
|
268
|
+
this._loadPromise = null;
|
|
269
|
+
});
|
|
97
270
|
|
|
98
|
-
|
|
99
|
-
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
100
|
-
const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
|
|
101
|
-
const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
|
|
102
|
-
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
103
|
-
|
|
104
|
-
const [metaData, cacheData, hashData] = await Promise.all([
|
|
105
|
-
fs.readFile(metaFile, "utf-8").catch(() => null),
|
|
106
|
-
fs.readFile(cacheFile, "utf-8").catch(() => null),
|
|
107
|
-
fs.readFile(hashFile, "utf-8").catch(() => null)
|
|
108
|
-
]);
|
|
109
|
-
|
|
110
|
-
if (!metaData && !cacheData && !hashData) {
|
|
111
|
-
return;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
if (!metaData) {
|
|
115
|
-
console.error("[Cache] Missing cache metadata, ignoring cache");
|
|
116
|
-
return;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
let meta = null;
|
|
120
|
-
try {
|
|
121
|
-
meta = JSON.parse(metaData);
|
|
122
|
-
} catch {
|
|
123
|
-
console.error("[Cache] Invalid cache metadata, ignoring cache");
|
|
124
|
-
return;
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if (meta?.version !== CACHE_META_VERSION) {
|
|
128
|
-
console.error(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
|
|
129
|
-
return;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
133
|
-
console.error(`[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`);
|
|
134
|
-
return;
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
this.cacheMeta = meta;
|
|
138
|
-
|
|
139
|
-
if (cacheData && hashData) {
|
|
140
|
-
const rawVectorStore = JSON.parse(cacheData);
|
|
141
|
-
const rawHashes = new Map(Object.entries(JSON.parse(hashData)));
|
|
142
|
-
|
|
143
|
-
// Filter cache to only include files matching current extensions
|
|
144
|
-
const allowedExtensions = this.config.fileExtensions.map(ext => `.${ext}`);
|
|
145
|
-
|
|
146
|
-
this.vectorStore = rawVectorStore.filter(chunk => {
|
|
147
|
-
const ext = path.extname(chunk.file);
|
|
148
|
-
return allowedExtensions.includes(ext);
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
// Only keep hashes for files matching current extensions
|
|
152
|
-
for (const [file, hash] of rawHashes) {
|
|
153
|
-
const ext = path.extname(file);
|
|
154
|
-
if (allowedExtensions.includes(ext)) {
|
|
155
|
-
this.fileHashes.set(file, hash);
|
|
156
|
-
}
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
const filtered = rawVectorStore.length - this.vectorStore.length;
|
|
160
|
-
if (filtered > 0) {
|
|
161
|
-
console.error(`[Cache] Filtered ${filtered} outdated cache entries`);
|
|
162
|
-
}
|
|
163
|
-
console.error(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
|
|
164
|
-
this.annDirty = false;
|
|
165
|
-
this.annIndex = null;
|
|
166
|
-
this.annMeta = null;
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
// Load call-graph data if it exists
|
|
170
|
-
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
171
|
-
try {
|
|
172
|
-
const callGraphData = await fs.readFile(callGraphFile, "utf8");
|
|
173
|
-
const parsed = JSON.parse(callGraphData);
|
|
174
|
-
this.fileCallData = new Map(Object.entries(parsed));
|
|
175
|
-
if (this.config.verbose) {
|
|
176
|
-
console.error(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
|
|
177
|
-
}
|
|
178
|
-
} catch {
|
|
179
|
-
// Call-graph file doesn't exist yet, that's OK
|
|
180
|
-
}
|
|
181
|
-
} catch (error) {
|
|
182
|
-
console.error("[Cache] Failed to load cache:", error.message);
|
|
183
|
-
}
|
|
271
|
+
return this._loadPromise;
|
|
184
272
|
}
|
|
185
273
|
|
|
186
|
-
async
|
|
274
|
+
async dropInMemoryVectors() {
|
|
187
275
|
if (!this.config.enableCache) return;
|
|
188
276
|
|
|
189
|
-
this.
|
|
190
|
-
|
|
191
|
-
try {
|
|
192
|
-
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
193
|
-
const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
|
|
194
|
-
const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
|
|
195
|
-
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
196
|
-
|
|
197
|
-
// Include indexing stats in meta for verification
|
|
198
|
-
const uniqueFiles = new Set(this.vectorStore.map(chunk => chunk.file));
|
|
199
|
-
this.cacheMeta = {
|
|
200
|
-
version: CACHE_META_VERSION,
|
|
201
|
-
embeddingModel: this.config.embeddingModel,
|
|
202
|
-
lastSaveTime: new Date().toISOString(),
|
|
203
|
-
filesIndexed: uniqueFiles.size,
|
|
204
|
-
chunksStored: this.vectorStore.length,
|
|
205
|
-
workspace: this.config.searchDirectory || null
|
|
206
|
-
};
|
|
207
|
-
|
|
208
|
-
await Promise.all([
|
|
209
|
-
fs.writeFile(cacheFile, JSON.stringify(this.vectorStore, null, 2)),
|
|
210
|
-
fs.writeFile(hashFile, JSON.stringify(Object.fromEntries(this.fileHashes), null, 2)),
|
|
211
|
-
fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2))
|
|
212
|
-
]);
|
|
213
|
-
|
|
214
|
-
// Save call-graph data (or remove stale cache if empty)
|
|
215
|
-
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
216
|
-
if (this.fileCallData.size > 0) {
|
|
217
|
-
await fs.writeFile(callGraphFile, JSON.stringify(Object.fromEntries(this.fileCallData), null, 2));
|
|
218
|
-
} else {
|
|
219
|
-
await fs.rm(callGraphFile, { force: true });
|
|
220
|
-
}
|
|
221
|
-
} catch (error) {
|
|
222
|
-
console.error("[Cache] Failed to save cache:", error.message);
|
|
223
|
-
} finally {
|
|
224
|
-
this.isSaving = false;
|
|
277
|
+
if (this.activeReads > 0) {
|
|
278
|
+
await this.waitForReaders();
|
|
225
279
|
}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
getVectorStore() {
|
|
229
|
-
return this.vectorStore;
|
|
230
|
-
}
|
|
231
|
-
|
|
232
|
-
setVectorStore(store) {
|
|
233
|
-
this.vectorStore = store;
|
|
234
|
-
this.invalidateAnnIndex();
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
getFileHash(file) {
|
|
238
|
-
return this.fileHashes.get(file);
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
setFileHash(file, hash) {
|
|
242
|
-
this.fileHashes.set(file, hash);
|
|
243
|
-
}
|
|
244
|
-
|
|
245
|
-
deleteFileHash(file) {
|
|
246
|
-
this.fileHashes.delete(file);
|
|
247
|
-
}
|
|
248
280
|
|
|
249
|
-
|
|
250
|
-
this.
|
|
251
|
-
this.invalidateAnnIndex();
|
|
252
|
-
// Also clear call-graph data for this file
|
|
253
|
-
this.removeFileCallData(file);
|
|
254
|
-
}
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
addToStore(chunk) {
|
|
258
|
-
this.vectorStore.push(chunk);
|
|
259
|
-
this.invalidateAnnIndex();
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
invalidateAnnIndex() {
|
|
281
|
+
this.vectorStore = [];
|
|
282
|
+
this.annVectorCache = null;
|
|
263
283
|
this.annIndex = null;
|
|
264
284
|
this.annMeta = null;
|
|
265
285
|
this.annDirty = true;
|
|
266
|
-
this.
|
|
267
|
-
}
|
|
268
|
-
|
|
269
|
-
getAnnVector(index) {
|
|
270
|
-
if (!this.annVectorCache || this.annVectorCache.length !== this.vectorStore.length) {
|
|
271
|
-
this.annVectorCache = new Array(this.vectorStore.length);
|
|
272
|
-
}
|
|
273
|
-
|
|
274
|
-
let cached = this.annVectorCache[index];
|
|
275
|
-
if (!cached) {
|
|
276
|
-
const vector = this.vectorStore[index]?.vector;
|
|
277
|
-
if (!vector) {
|
|
278
|
-
return null;
|
|
279
|
-
}
|
|
280
|
-
cached = toFloat32Array(vector);
|
|
281
|
-
this.annVectorCache[index] = cached;
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
return cached;
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
getAnnIndexPaths() {
|
|
288
|
-
return {
|
|
289
|
-
indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
|
|
290
|
-
metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE)
|
|
291
|
-
};
|
|
292
|
-
}
|
|
286
|
+
this.annPersistDirty = false;
|
|
293
287
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
this.annLoading = (async () => {
|
|
301
|
-
const HierarchicalNSW = await loadHnswlib();
|
|
302
|
-
if (!HierarchicalNSW) return null;
|
|
303
|
-
|
|
304
|
-
const dim = this.vectorStore[0]?.vector?.length;
|
|
305
|
-
if (!dim) return null;
|
|
306
|
-
|
|
307
|
-
if (!this.annDirty && this.config.annIndexCache !== false) {
|
|
308
|
-
const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
|
|
309
|
-
if (loaded) return this.annIndex;
|
|
288
|
+
if (this.binaryStore) {
|
|
289
|
+
try {
|
|
290
|
+
await this.binaryStore.close();
|
|
291
|
+
} catch {
|
|
292
|
+
// ignore close errors
|
|
310
293
|
}
|
|
311
|
-
|
|
312
|
-
return await this.buildAnnIndex(HierarchicalNSW, dim);
|
|
313
|
-
})();
|
|
314
|
-
|
|
315
|
-
const index = await this.annLoading;
|
|
316
|
-
this.annLoading = null;
|
|
317
|
-
return index;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
|
|
321
|
-
const { indexFile, metaFile } = this.getAnnIndexPaths();
|
|
322
|
-
const metaData = await fs.readFile(metaFile, "utf-8").catch(() => null);
|
|
323
|
-
|
|
324
|
-
if (!metaData) {
|
|
325
|
-
return false;
|
|
294
|
+
this.binaryStore = null;
|
|
326
295
|
}
|
|
327
296
|
|
|
328
|
-
|
|
329
|
-
try {
|
|
330
|
-
meta = JSON.parse(metaData);
|
|
331
|
-
} catch {
|
|
332
|
-
console.error("[ANN] Invalid ANN metadata, rebuilding");
|
|
333
|
-
return false;
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
if (meta?.version !== ANN_META_VERSION) {
|
|
337
|
-
console.error(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
|
|
338
|
-
return false;
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
342
|
-
console.error(`[ANN] Embedding model changed for ANN index, rebuilding`);
|
|
343
|
-
return false;
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
|
|
347
|
-
console.error("[ANN] ANN index size mismatch, rebuilding");
|
|
348
|
-
return false;
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
if (meta?.metric !== this.config.annMetric ||
|
|
352
|
-
meta?.m !== this.config.annM ||
|
|
353
|
-
meta?.efConstruction !== this.config.annEfConstruction) {
|
|
354
|
-
console.error("[ANN] ANN index config changed, rebuilding");
|
|
355
|
-
return false;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
const index = new HierarchicalNSW(meta.metric, dim);
|
|
359
|
-
const loaded = readHnswIndex(index, indexFile, meta.count);
|
|
360
|
-
if (!loaded) {
|
|
361
|
-
console.error("[ANN] Failed to load ANN index file, rebuilding");
|
|
362
|
-
return false;
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
if (typeof index.setEf === "function") {
|
|
366
|
-
index.setEf(this.config.annEfSearch);
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
this.annIndex = index;
|
|
370
|
-
this.annMeta = meta;
|
|
371
|
-
this.annDirty = false;
|
|
372
|
-
console.error(`[ANN] Loaded ANN index (${meta.count} vectors)`);
|
|
373
|
-
return true;
|
|
297
|
+
this._clearedAfterIndex = true;
|
|
374
298
|
}
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
299
|
+
|
|
300
|
+
// -------------------- Concurrency Hooks --------------------
|
|
301
|
+
|
|
302
|
+
startRead() {
|
|
303
|
+
this.activeReads++;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
endRead() {
|
|
307
|
+
if (this.activeReads > 0) {
|
|
308
|
+
this.activeReads--;
|
|
309
|
+
if (this.activeReads === 0 && this._readWaiters.length > 0) {
|
|
310
|
+
const waiters = this._readWaiters;
|
|
311
|
+
this._readWaiters = [];
|
|
312
|
+
for (const resolve of waiters) {
|
|
313
|
+
resolve();
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
async waitForReaders() {
|
|
320
|
+
if (this.activeReads === 0) return;
|
|
321
|
+
await new Promise((resolve) => {
|
|
322
|
+
this._readWaiters.push(resolve);
|
|
323
|
+
});
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// -------------------- Reset --------------------
|
|
327
|
+
|
|
328
|
+
/**
|
|
329
|
+
* Resets the cache state (clears vectors, hashes, and call graph).
|
|
330
|
+
* Used for forced reindexing.
|
|
331
|
+
*/
|
|
332
|
+
async reset() {
|
|
333
|
+
this.vectorStore = [];
|
|
334
|
+
if (this.binaryStore) {
|
|
335
|
+
try {
|
|
336
|
+
await this.binaryStore.close();
|
|
337
|
+
} catch {
|
|
338
|
+
// ignore close errors
|
|
339
|
+
}
|
|
340
|
+
this.binaryStore = null;
|
|
341
|
+
}
|
|
342
|
+
this.fileHashes.clear();
|
|
343
|
+
this.invalidateAnnIndex();
|
|
344
|
+
await this.clearCallGraphData({ removeFile: true });
|
|
345
|
+
this.initErrors = [];
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// -------------------- Load --------------------
|
|
349
|
+
|
|
350
|
+
async load() {
|
|
351
|
+
if (!this.config.enableCache) return;
|
|
352
|
+
|
|
353
|
+
try {
|
|
354
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
355
|
+
|
|
356
|
+
const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
|
|
357
|
+
const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
|
|
358
|
+
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
359
|
+
|
|
360
|
+
const workerThresholdBytes =
|
|
361
|
+
Number.isInteger(this.config.jsonWorkerThresholdBytes) &&
|
|
362
|
+
this.config.jsonWorkerThresholdBytes > 0
|
|
363
|
+
? this.config.jsonWorkerThresholdBytes
|
|
364
|
+
: DEFAULT_JSON_WORKER_THRESHOLD;
|
|
365
|
+
|
|
366
|
+
const useBinary = this.config.vectorStoreFormat === 'binary';
|
|
367
|
+
|
|
368
|
+
const { vectorsPath, recordsPath, contentPath, filesPath } = BinaryVectorStore.getPaths(
|
|
369
|
+
this.config.cacheDirectory,
|
|
370
|
+
);
|
|
371
|
+
const pathExists = async (targetPath) => {
|
|
372
|
+
try {
|
|
373
|
+
await fs.access(targetPath);
|
|
374
|
+
return true;
|
|
375
|
+
} catch {
|
|
376
|
+
return false;
|
|
377
|
+
}
|
|
378
|
+
};
|
|
379
|
+
|
|
380
|
+
// In tests, read cache files eagerly to exercise worker paths.
|
|
381
|
+
let cacheData = null;
|
|
382
|
+
let hashData = null;
|
|
383
|
+
let prefetched = false;
|
|
384
|
+
if (IS_TEST_ENV) {
|
|
385
|
+
prefetched = true;
|
|
386
|
+
const cachePromise = useBinary
|
|
387
|
+
? Promise.resolve(null)
|
|
388
|
+
: readJsonFile(cacheFile, { workerThresholdBytes });
|
|
389
|
+
[cacheData, hashData] = await Promise.all([
|
|
390
|
+
cachePromise,
|
|
391
|
+
readJsonFile(hashFile, { workerThresholdBytes }),
|
|
392
|
+
]);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// Read meta first to avoid parsing huge cache files when invalid
|
|
396
|
+
const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
|
|
397
|
+
if (!metaData) {
|
|
398
|
+
console.warn('[Cache] Missing cache metadata, ignoring cache');
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
let meta;
|
|
403
|
+
try {
|
|
404
|
+
meta = JSON.parse(metaData);
|
|
405
|
+
} catch {
|
|
406
|
+
console.warn('[Cache] Invalid cache metadata, ignoring cache');
|
|
407
|
+
return;
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
if (meta?.version !== CACHE_META_VERSION) {
|
|
411
|
+
console.warn(`[Cache] Cache version mismatch (${meta?.version}), ignoring cache`);
|
|
412
|
+
return;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
416
|
+
console.warn(
|
|
417
|
+
`[Cache] Embedding model changed, ignoring cache (${meta?.embeddingModel} -> ${this.config.embeddingModel})`,
|
|
418
|
+
);
|
|
419
|
+
return;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
if (!prefetched) {
|
|
423
|
+
[cacheData, hashData] = await Promise.all([
|
|
424
|
+
useBinary ? Promise.resolve(null) : readJsonFile(cacheFile, { workerThresholdBytes }),
|
|
425
|
+
readJsonFile(hashFile, { workerThresholdBytes }),
|
|
426
|
+
]);
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
this.cacheMeta = meta;
|
|
430
|
+
|
|
431
|
+
const [binaryFilesPresent, jsonCachePresent] = await Promise.all([
|
|
432
|
+
(async () => {
|
|
433
|
+
const [vectorsOk, recordsOk, contentOk, filesOk] = await Promise.all([
|
|
434
|
+
pathExists(vectorsPath),
|
|
435
|
+
pathExists(recordsPath),
|
|
436
|
+
pathExists(contentPath),
|
|
437
|
+
pathExists(filesPath),
|
|
438
|
+
]);
|
|
439
|
+
return vectorsOk && recordsOk && contentOk && filesOk;
|
|
440
|
+
})(),
|
|
441
|
+
pathExists(cacheFile),
|
|
442
|
+
]);
|
|
443
|
+
|
|
444
|
+
if (useBinary && !binaryFilesPresent) {
|
|
445
|
+
if (jsonCachePresent) {
|
|
446
|
+
console.warn(
|
|
447
|
+
'[Cache] vectorStoreFormat=binary but binary cache files are missing; embeddings.json exists. If you switched formats, reindex or set vectorStoreFormat=json.',
|
|
448
|
+
);
|
|
449
|
+
} else {
|
|
450
|
+
console.warn(
|
|
451
|
+
'[Cache] vectorStoreFormat=binary but binary cache files are missing. Reindex to regenerate the cache.',
|
|
452
|
+
);
|
|
453
|
+
}
|
|
454
|
+
} else if (!useBinary && !jsonCachePresent) {
|
|
455
|
+
if (binaryFilesPresent) {
|
|
456
|
+
console.warn(
|
|
457
|
+
'[Cache] vectorStoreFormat=json but binary cache files exist. If you switched formats, set vectorStoreFormat=binary or reindex.',
|
|
458
|
+
);
|
|
459
|
+
} else {
|
|
460
|
+
console.warn(
|
|
461
|
+
'[Cache] vectorStoreFormat=json but embeddings.json is missing. Reindex to regenerate the cache.',
|
|
462
|
+
);
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
if (useBinary) {
|
|
410
467
|
try {
|
|
411
|
-
await
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
468
|
+
this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
|
|
469
|
+
contentCacheEntries: this.config.contentCacheEntries,
|
|
470
|
+
vectorCacheEntries: this.config.vectorCacheEntries,
|
|
471
|
+
vectorLoadMode: this.config.vectorStoreLoadMode,
|
|
472
|
+
});
|
|
473
|
+
cacheData = await this.binaryStore.toChunkViews({
|
|
474
|
+
includeContent: this.config.vectorStoreContentMode === 'inline',
|
|
475
|
+
includeVector: this.config.vectorStoreLoadMode !== 'disk',
|
|
476
|
+
});
|
|
477
|
+
} catch (err) {
|
|
478
|
+
this.binaryStore = null;
|
|
479
|
+
console.warn(`[Cache] Failed to load binary vector store: ${err.message}`);
|
|
418
480
|
}
|
|
419
481
|
}
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
this.
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
482
|
+
|
|
483
|
+
if (!cacheData) {
|
|
484
|
+
cacheData = await readJsonFile(cacheFile, { workerThresholdBytes });
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
const hasCacheData = Array.isArray(cacheData);
|
|
488
|
+
const hasHashData = hashData && typeof hashData === 'object';
|
|
489
|
+
|
|
490
|
+
if (hasCacheData) {
|
|
491
|
+
const allowedExtensions = new Set((this.config.fileExtensions || []).map((ext) => `.${ext}`));
|
|
492
|
+
const applyExtensionFilter = !this.binaryStore;
|
|
493
|
+
|
|
494
|
+
const rawHashes = hasHashData ? new Map(Object.entries(hashData)) : new Map();
|
|
495
|
+
this.vectorStore = [];
|
|
496
|
+
this.fileHashes.clear();
|
|
497
|
+
|
|
498
|
+
// Single-pass filter + normalization
|
|
499
|
+
for (const chunk of cacheData) {
|
|
500
|
+
if (applyExtensionFilter) {
|
|
501
|
+
const ext = path.extname(chunk.file);
|
|
502
|
+
if (!allowedExtensions.has(ext)) continue;
|
|
503
|
+
}
|
|
504
|
+
normalizeChunkVector(chunk);
|
|
505
|
+
this.vectorStore.push(chunk);
|
|
506
|
+
}
|
|
507
|
+
const filteredCount = cacheData.length - this.vectorStore.length;
|
|
508
|
+
if (filteredCount > 0 && this.config.verbose) {
|
|
509
|
+
console.info(`[Cache] Filtered ${filteredCount} outdated cache entries`);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
if (hasHashData) {
|
|
513
|
+
// Only keep hashes for allowed extensions
|
|
514
|
+
for (const [file, entry] of rawHashes) {
|
|
515
|
+
if (!applyExtensionFilter || allowedExtensions.has(path.extname(file))) {
|
|
516
|
+
const normalized = normalizeFileHashEntry(entry);
|
|
517
|
+
if (normalized) {
|
|
518
|
+
this.fileHashes.set(file, normalized);
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
} else {
|
|
523
|
+
console.warn('[Cache] Missing file-hashes.json; loaded embeddings but hashes were cleared');
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
assignChunkIndices(this.vectorStore);
|
|
527
|
+
|
|
528
|
+
if (this.config.verbose) {
|
|
529
|
+
console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
// ANN index is lazily loaded/built on first query
|
|
533
|
+
this.annDirty = false;
|
|
534
|
+
this.annPersistDirty = false;
|
|
535
|
+
this.annIndex = null;
|
|
536
|
+
this.annMeta = null;
|
|
537
|
+
this.annVectorCache = null;
|
|
538
|
+
} else if (cacheData) {
|
|
539
|
+
console.warn('[Cache] Cache data is not an array; ignoring cached embeddings');
|
|
540
|
+
} else if (hasHashData) {
|
|
541
|
+
console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// Load call-graph data if it exists
|
|
545
|
+
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
546
|
+
try {
|
|
547
|
+
const callGraphData = await fs.readFile(callGraphFile, 'utf8');
|
|
548
|
+
const parsed = JSON.parse(callGraphData);
|
|
549
|
+
this.fileCallData = new Map(Object.entries(parsed));
|
|
550
|
+
if (this.config.verbose) {
|
|
551
|
+
console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
|
|
552
|
+
}
|
|
553
|
+
} catch {
|
|
554
|
+
// no cache yet, OK
|
|
555
|
+
}
|
|
556
|
+
} catch (error) {
|
|
557
|
+
console.warn('[Cache] Failed to load cache:', error.message);
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
// -------------------- Save (debounced + serialized) --------------------
|
|
562
|
+
|
|
563
|
+
save() {
|
|
564
|
+
if (!this.config.enableCache) return Promise.resolve();
|
|
565
|
+
|
|
566
|
+
this._saveRequested = true;
|
|
567
|
+
|
|
568
|
+
if (this._saveTimer) return this._savePromise ?? Promise.resolve();
|
|
569
|
+
|
|
570
|
+
const debounceMs = Number.isInteger(this.config.saveDebounceMs)
|
|
571
|
+
? this.config.saveDebounceMs
|
|
572
|
+
: 250;
|
|
573
|
+
|
|
574
|
+
this._savePromise = new Promise((resolve, reject) => {
|
|
575
|
+
this._saveTimer = setTimeout(() => {
|
|
576
|
+
this._saveTimer = null;
|
|
577
|
+
|
|
578
|
+
this.saveQueue = this.saveQueue
|
|
579
|
+
.then(async () => {
|
|
580
|
+
while (this._saveRequested) {
|
|
581
|
+
this._saveRequested = false;
|
|
582
|
+
await this.performSave();
|
|
583
|
+
}
|
|
584
|
+
})
|
|
585
|
+
.then(resolve, reject)
|
|
586
|
+
.finally(() => {
|
|
587
|
+
this._savePromise = null;
|
|
588
|
+
});
|
|
589
|
+
}, debounceMs);
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
return this._savePromise;
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
async performSave() {
|
|
596
|
+
this.isSaving = true;
|
|
597
|
+
|
|
598
|
+
try {
|
|
599
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
600
|
+
|
|
601
|
+
const cacheFile = path.join(this.config.cacheDirectory, 'embeddings.json');
|
|
602
|
+
const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
|
|
603
|
+
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
604
|
+
|
|
605
|
+
// Snapshot to avoid race conditions during async write
|
|
606
|
+
const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
|
|
607
|
+
|
|
608
|
+
this.cacheMeta = {
|
|
609
|
+
version: CACHE_META_VERSION,
|
|
610
|
+
embeddingModel: this.config.embeddingModel,
|
|
611
|
+
lastSaveTime: new Date().toISOString(),
|
|
612
|
+
filesIndexed: this.fileHashes.size,
|
|
613
|
+
chunksStored: snapshotStore.length,
|
|
614
|
+
workspace: this.config.searchDirectory || null,
|
|
615
|
+
};
|
|
616
|
+
if (Number.isFinite(this.lastIndexDurationMs) && this.lastIndexDurationMs >= 0) {
|
|
617
|
+
this.cacheMeta.indexDurationMs = Math.round(this.lastIndexDurationMs);
|
|
618
|
+
}
|
|
619
|
+
if (this.lastIndexStats && typeof this.lastIndexStats === 'object') {
|
|
620
|
+
Object.assign(this.cacheMeta, this.lastIndexStats);
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
const total = snapshotStore.length;
|
|
624
|
+
if (this.config.vectorStoreFormat === 'binary') {
|
|
625
|
+
this.binaryStore = await BinaryVectorStore.write(
|
|
626
|
+
this.config.cacheDirectory,
|
|
627
|
+
snapshotStore,
|
|
628
|
+
{
|
|
629
|
+
contentCacheEntries: this.config.contentCacheEntries,
|
|
630
|
+
getContent: (chunk, index) => this.getChunkContent(chunk, index),
|
|
631
|
+
preRename: async () => {
|
|
632
|
+
if (this.binaryStore) await this.binaryStore.close();
|
|
633
|
+
},
|
|
634
|
+
},
|
|
635
|
+
);
|
|
636
|
+
if (this.binaryStore) {
|
|
637
|
+
this.cacheMeta.chunksStored = this.binaryStore.length;
|
|
522
638
|
}
|
|
639
|
+
} else {
|
|
640
|
+
const vectorWriter = new StreamingJsonWriter(cacheFile, {
|
|
641
|
+
highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
|
|
642
|
+
floatDigits:
|
|
643
|
+
this.config.cacheVectorFloatDigits === undefined
|
|
644
|
+
? 6
|
|
645
|
+
: this.config.cacheVectorFloatDigits,
|
|
646
|
+
flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
|
|
647
|
+
indent: '', // set to " " if you prefer pretty formatting
|
|
648
|
+
assumeFinite: this.config.cacheVectorAssumeFinite,
|
|
649
|
+
checkFinite: this.config.cacheVectorCheckFinite,
|
|
650
|
+
noMutation: this.config.cacheVectorNoMutation ?? false,
|
|
651
|
+
joinThreshold: this.config.cacheVectorJoinThreshold ?? 8192,
|
|
652
|
+
joinChunkSize: this.config.cacheVectorJoinChunkSize ?? 2048,
|
|
653
|
+
});
|
|
654
|
+
|
|
655
|
+
await vectorWriter.writeStart();
|
|
656
|
+
|
|
657
|
+
// Optional responsiveness yield (only for huge saves)
|
|
658
|
+
const yieldEvery = total >= 50_000 ? 5000 : 0;
|
|
659
|
+
|
|
660
|
+
try {
|
|
661
|
+
for (let i = 0; i < total; i++) {
|
|
662
|
+
const pending = vectorWriter.writeItem(snapshotStore[i]);
|
|
663
|
+
if (pending) await pending;
|
|
664
|
+
if (yieldEvery && i > 0 && i % yieldEvery === 0) await yieldToLoop();
|
|
665
|
+
}
|
|
666
|
+
await vectorWriter.writeEnd();
|
|
667
|
+
} catch (e) {
|
|
668
|
+
vectorWriter.abort(e);
|
|
669
|
+
throw e;
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
const hashEntries = {};
|
|
674
|
+
for (const [file, entry] of this.fileHashes) {
|
|
675
|
+
const serialized = serializeFileHashEntry(entry);
|
|
676
|
+
if (serialized) {
|
|
677
|
+
hashEntries[file] = serialized;
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
await Promise.all([
|
|
682
|
+
fs.writeFile(hashFile, JSON.stringify(hashEntries, null, 2)),
|
|
683
|
+
fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
|
|
684
|
+
]);
|
|
685
|
+
|
|
686
|
+
// Save call-graph data (or remove stale cache)
|
|
687
|
+
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
688
|
+
if (this.fileCallData.size > 0) {
|
|
689
|
+
await fs.writeFile(
|
|
690
|
+
callGraphFile,
|
|
691
|
+
JSON.stringify(Object.fromEntries(this.fileCallData), null, 2),
|
|
692
|
+
);
|
|
693
|
+
} else {
|
|
694
|
+
await fs.rm(callGraphFile, { force: true });
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Persist ANN index if it exists and changed in memory
|
|
698
|
+
if (
|
|
699
|
+
this.config.annIndexCache !== false &&
|
|
700
|
+
this.annPersistDirty &&
|
|
701
|
+
!this.annDirty &&
|
|
702
|
+
this.annIndex &&
|
|
703
|
+
this.annMeta
|
|
704
|
+
) {
|
|
705
|
+
try {
|
|
706
|
+
const { indexFile, metaFile: annMetaFile } = this.getAnnIndexPaths();
|
|
707
|
+
this.annIndex.writeIndexSync(indexFile);
|
|
708
|
+
await fs.writeFile(annMetaFile, JSON.stringify(this.annMeta, null, 2));
|
|
709
|
+
this.annPersistDirty = false;
|
|
710
|
+
if (this.config.verbose) {
|
|
711
|
+
console.info(`[ANN] Persisted updated ANN index (${this.annMeta.count} vectors)`);
|
|
712
|
+
}
|
|
713
|
+
} catch (error) {
|
|
714
|
+
console.warn(`[ANN] Failed to persist ANN index: ${error.message}`);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
} catch (error) {
|
|
718
|
+
console.warn('[Cache] Failed to save cache:', error.message);
|
|
719
|
+
// Attempt to recover binary store if it was closed during failed save
|
|
720
|
+
if (this.config.vectorStoreFormat === 'binary' && this.binaryStore && !this.binaryStore.vectorsBuffer) {
|
|
721
|
+
try {
|
|
722
|
+
console.info('[Cache] Attempting to recover binary store after failed save...');
|
|
723
|
+
this.binaryStore = await BinaryVectorStore.load(this.config.cacheDirectory, {
|
|
724
|
+
contentCacheEntries: this.config.contentCacheEntries,
|
|
725
|
+
});
|
|
726
|
+
console.info('[Cache] Binary store recovered.');
|
|
727
|
+
} catch (recoverErr) {
|
|
728
|
+
console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
|
|
729
|
+
this.binaryStore = null; // Ensure it's null if unusable
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
} finally {
|
|
733
|
+
this.isSaving = false;
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
// -------------------- Vector Store API --------------------
|
|
738
|
+
|
|
739
|
+
getVectorStore() {
|
|
740
|
+
return Array.isArray(this.vectorStore) ? this.vectorStore : [];
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
async setVectorStore(store) {
|
|
744
|
+
const previousBinaryStore = this.binaryStore;
|
|
745
|
+
this.vectorStore = store;
|
|
746
|
+
this.binaryStore = null;
|
|
747
|
+
if (Array.isArray(this.vectorStore)) {
|
|
748
|
+
for (const chunk of this.vectorStore) normalizeChunkVector(chunk);
|
|
749
|
+
assignChunkIndices(this.vectorStore);
|
|
750
|
+
}
|
|
751
|
+
this.invalidateAnnIndex();
|
|
752
|
+
if (previousBinaryStore) {
|
|
753
|
+
try {
|
|
754
|
+
await previousBinaryStore.close();
|
|
755
|
+
} catch {
|
|
756
|
+
// ignore close errors
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
setLastIndexDuration(durationMs) {
|
|
762
|
+
if (Number.isFinite(durationMs) && durationMs >= 0) {
|
|
763
|
+
this.lastIndexDurationMs = durationMs;
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
setLastIndexStats(stats) {
|
|
768
|
+
if (stats && typeof stats === 'object') {
|
|
769
|
+
this.lastIndexStats = { ...stats };
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
getFileHash(file) {
|
|
774
|
+
const entry = this.fileHashes.get(file);
|
|
775
|
+
if (typeof entry === 'string') return entry;
|
|
776
|
+
return entry?.hash;
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
getFileHashKeys() {
|
|
780
|
+
return Array.from(this.fileHashes.keys());
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
getFileHashCount() {
|
|
784
|
+
return this.fileHashes.size;
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
clearFileHashes() {
|
|
788
|
+
this.fileHashes.clear();
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
setFileHashes(entries) {
|
|
792
|
+
this.fileHashes.clear();
|
|
793
|
+
if (!entries) return;
|
|
794
|
+
const iterator =
|
|
795
|
+
entries instanceof Map
|
|
796
|
+
? entries.entries()
|
|
797
|
+
: typeof entries === 'object'
|
|
798
|
+
? Object.entries(entries)
|
|
799
|
+
: null;
|
|
800
|
+
if (!iterator) return;
|
|
801
|
+
for (const [file, entry] of iterator) {
|
|
802
|
+
const normalized = normalizeFileHashEntry(entry);
|
|
803
|
+
if (normalized) {
|
|
804
|
+
this.fileHashes.set(file, normalized);
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
setFileHash(file, hash, meta = null) {
|
|
810
|
+
const entry = { hash };
|
|
811
|
+
if (meta && typeof meta === 'object') {
|
|
812
|
+
if (Number.isFinite(meta.mtimeMs)) entry.mtimeMs = meta.mtimeMs;
|
|
813
|
+
if (Number.isFinite(meta.size)) entry.size = meta.size;
|
|
814
|
+
}
|
|
815
|
+
this.fileHashes.set(file, entry);
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
getFileMeta(file) {
|
|
819
|
+
const entry = this.fileHashes.get(file);
|
|
820
|
+
if (!entry) return null;
|
|
821
|
+
if (typeof entry === 'string') return { hash: entry };
|
|
822
|
+
return entry;
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
getChunkVector(chunk, index = null) {
|
|
826
|
+
if (typeof chunk === 'number') {
|
|
827
|
+
const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
|
|
828
|
+
const entry = store ? store[chunk] : null;
|
|
829
|
+
if (entry?.vector) return entry.vector;
|
|
830
|
+
if (this.binaryStore) {
|
|
831
|
+
const resolved = Number.isInteger(entry?._binaryIndex) ? entry._binaryIndex : chunk;
|
|
832
|
+
return this.binaryStore.getVector(resolved);
|
|
523
833
|
}
|
|
834
|
+
return null;
|
|
524
835
|
}
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
if (
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
836
|
+
|
|
837
|
+
if (chunk?.vector) return chunk.vector;
|
|
838
|
+
const resolved = Number.isInteger(index) ? index : chunk?._index;
|
|
839
|
+
if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
|
|
840
|
+
return this.binaryStore.getVector(chunk._binaryIndex);
|
|
841
|
+
}
|
|
842
|
+
if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
|
|
843
|
+
return this.binaryStore.getVector(resolved);
|
|
844
|
+
}
|
|
845
|
+
return null;
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
async getChunkContent(chunk, index = null) {
|
|
849
|
+
if (typeof chunk === 'number') {
|
|
850
|
+
const store = Array.isArray(this.vectorStore) ? this.vectorStore : null;
|
|
851
|
+
const entry = store ? store[chunk] : null;
|
|
852
|
+
if (entry) return await this.getChunkContent(entry, chunk);
|
|
853
|
+
if (!store && this.binaryStore) {
|
|
854
|
+
return await this.binaryStore.getContent(chunk);
|
|
855
|
+
}
|
|
856
|
+
return '';
|
|
857
|
+
}
|
|
858
|
+
if (chunk?.content !== undefined && chunk?.content !== null) {
|
|
859
|
+
return chunk.content;
|
|
860
|
+
}
|
|
861
|
+
if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
|
|
862
|
+
return await this.binaryStore.getContent(chunk._binaryIndex);
|
|
863
|
+
}
|
|
864
|
+
const resolved = Number.isInteger(index) ? index : chunk?._index;
|
|
865
|
+
if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
|
|
866
|
+
return await this.binaryStore.getContent(resolved);
|
|
867
|
+
}
|
|
868
|
+
return '';
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
deleteFileHash(file) {
|
|
872
|
+
this.fileHashes.delete(file);
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
removeFileFromStore(file) {
|
|
876
|
+
if (!Array.isArray(this.vectorStore)) return;
|
|
877
|
+
// In-place compaction to avoid allocating a new large array
|
|
878
|
+
let w = 0;
|
|
879
|
+
for (let r = 0; r < this.vectorStore.length; r++) {
|
|
880
|
+
const chunk = this.vectorStore[r];
|
|
881
|
+
if (chunk.file !== file) {
|
|
882
|
+
chunk._index = w;
|
|
883
|
+
this.vectorStore[w++] = chunk;
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
this.vectorStore.length = w;
|
|
887
|
+
|
|
888
|
+
// Removing shifts labels => rebuild ANN
|
|
889
|
+
this.invalidateAnnIndex();
|
|
890
|
+
this.removeFileCallData(file);
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
addToStore(chunk) {
|
|
894
|
+
normalizeChunkVector(chunk);
|
|
895
|
+
|
|
896
|
+
if (!Array.isArray(this.vectorStore)) {
|
|
897
|
+
this.vectorStore = [];
|
|
898
|
+
}
|
|
899
|
+
|
|
900
|
+
const label = this.vectorStore.length;
|
|
901
|
+
chunk._index = label;
|
|
902
|
+
this.vectorStore.push(chunk);
|
|
903
|
+
if (Array.isArray(this.annVectorCache) && this.annVectorCache.length === label) {
|
|
904
|
+
this.annVectorCache.push(chunk.vector);
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
// Best-effort incremental ANN append (fast path)
|
|
908
|
+
if (
|
|
909
|
+
this.annIndex &&
|
|
910
|
+
!this.annDirty &&
|
|
911
|
+
this.annMeta &&
|
|
912
|
+
typeof this.annIndex.addPoint === 'function' &&
|
|
913
|
+
this.annMeta.count === label &&
|
|
914
|
+
this.annMeta.maxElements > this.annMeta.count
|
|
915
|
+
) {
|
|
916
|
+
try {
|
|
917
|
+
this.annIndex.addPoint(chunk.vector, label);
|
|
918
|
+
this.annMeta.count += 1;
|
|
919
|
+
this.annPersistDirty = true;
|
|
920
|
+
return;
|
|
921
|
+
} catch {
|
|
922
|
+
// fall through
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
this.invalidateAnnIndex();
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
invalidateAnnIndex() {
|
|
930
|
+
this.annIndex = null;
|
|
931
|
+
this.annMeta = null;
|
|
932
|
+
this.annDirty = true;
|
|
933
|
+
this.annPersistDirty = false;
|
|
934
|
+
this.annVectorCache = null;
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
getAnnVector(index) {
|
|
938
|
+
if (!Array.isArray(this.vectorStore)) return null;
|
|
939
|
+
const chunk = this.vectorStore[index];
|
|
940
|
+
if (!chunk) return null;
|
|
540
941
|
|
|
541
|
-
if (
|
|
542
|
-
this.
|
|
942
|
+
if (!Array.isArray(this.annVectorCache) || this.annVectorCache.length !== this.vectorStore.length) {
|
|
943
|
+
this.annVectorCache = new Array(this.vectorStore.length);
|
|
543
944
|
}
|
|
544
945
|
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
/**
|
|
549
|
-
* Store call data for a file
|
|
550
|
-
*/
|
|
551
|
-
setFileCallData(file, data) {
|
|
552
|
-
this.fileCallData.set(file, data);
|
|
553
|
-
this.callGraph = null; // Invalidate cached graph
|
|
554
|
-
}
|
|
555
|
-
|
|
556
|
-
/**
|
|
557
|
-
* Get call data for a file
|
|
558
|
-
*/
|
|
559
|
-
getFileCallData(file) {
|
|
560
|
-
return this.fileCallData.get(file);
|
|
561
|
-
}
|
|
562
|
-
|
|
563
|
-
/**
|
|
564
|
-
* Remove call data for a file
|
|
565
|
-
*/
|
|
566
|
-
removeFileCallData(file) {
|
|
567
|
-
this.fileCallData.delete(file);
|
|
568
|
-
this.callGraph = null; // Invalidate cached graph
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
/**
|
|
572
|
-
* Rebuild the call graph from file data
|
|
573
|
-
*/
|
|
574
|
-
rebuildCallGraph() {
|
|
575
|
-
// Lazy import to avoid circular dependencies
|
|
576
|
-
import("./call-graph.js").then(({ buildCallGraph }) => {
|
|
577
|
-
this.callGraph = buildCallGraph(this.fileCallData);
|
|
578
|
-
if (this.config.verbose) {
|
|
579
|
-
console.error(`[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`);
|
|
580
|
-
}
|
|
581
|
-
}).catch(err => {
|
|
582
|
-
console.error(`[CallGraph] Failed to build: ${err.message}`);
|
|
583
|
-
this.callGraph = null;
|
|
584
|
-
});
|
|
585
|
-
}
|
|
946
|
+
const cached = this.annVectorCache[index];
|
|
947
|
+
if (cached) return cached;
|
|
586
948
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
return new Map();
|
|
949
|
+
let vec = null;
|
|
950
|
+
if (chunk.vector) {
|
|
951
|
+
vec = ensureFloat32(chunk.vector);
|
|
952
|
+
} else if (this.binaryStore && Number.isInteger(chunk._binaryIndex)) {
|
|
953
|
+
vec = this.binaryStore.getVector(chunk._binaryIndex);
|
|
593
954
|
}
|
|
594
955
|
|
|
595
|
-
|
|
596
|
-
if (!this.callGraph && this.fileCallData.size > 0) {
|
|
597
|
-
const { buildCallGraph } = await import("./call-graph.js");
|
|
598
|
-
this.callGraph = buildCallGraph(this.fileCallData);
|
|
599
|
-
}
|
|
956
|
+
if (!vec) return null;
|
|
600
957
|
|
|
601
|
-
if (
|
|
602
|
-
|
|
958
|
+
if (this.config.vectorStoreLoadMode !== 'disk') {
|
|
959
|
+
chunk.vector = vec;
|
|
603
960
|
}
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
|
|
607
|
-
}
|
|
608
|
-
|
|
609
|
-
/**
|
|
610
|
-
* Get call graph statistics
|
|
611
|
-
*/
|
|
612
|
-
getCallGraphStats() {
|
|
613
|
-
return {
|
|
614
|
-
enabled: this.config.callGraphEnabled ?? false,
|
|
615
|
-
filesWithData: this.fileCallData.size,
|
|
616
|
-
graphBuilt: this.callGraph !== null,
|
|
617
|
-
definitions: this.callGraph?.defines.size ?? 0,
|
|
618
|
-
callTargets: this.callGraph?.calledBy.size ?? 0
|
|
619
|
-
};
|
|
961
|
+
this.annVectorCache[index] = vec;
|
|
962
|
+
return vec;
|
|
620
963
|
}
|
|
621
|
-
|
|
964
|
+
|
|
965
|
+
getAnnIndexPaths() {
|
|
966
|
+
return {
|
|
967
|
+
indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
|
|
968
|
+
metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE),
|
|
969
|
+
};
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
// -------------------- ANN --------------------
|
|
973
|
+
|
|
974
|
+
async ensureAnnIndex() {
|
|
975
|
+
if (!this.config.annEnabled) return null;
|
|
976
|
+
if (!Array.isArray(this.vectorStore)) return null;
|
|
977
|
+
if (this.vectorStore.length < (this.config.annMinChunks ?? 5000)) return null;
|
|
978
|
+
if (this.annIndex && !this.annDirty) return this.annIndex;
|
|
979
|
+
if (this.annLoading) return this.annLoading;
|
|
980
|
+
|
|
981
|
+
this.annLoading = (async () => {
|
|
982
|
+
const HierarchicalNSW = await loadHnswlib();
|
|
983
|
+
if (!HierarchicalNSW) {
|
|
984
|
+
if (hnswlibLoadError) {
|
|
985
|
+
this.initErrors.push({
|
|
986
|
+
stage: 'loadHnswlib',
|
|
987
|
+
message: hnswlibLoadError.message,
|
|
988
|
+
stack: hnswlibLoadError.stack
|
|
989
|
+
});
|
|
990
|
+
}
|
|
991
|
+
return null;
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
const dim = this.vectorStore[0]?.vector?.length;
|
|
995
|
+
if (!dim) return null;
|
|
996
|
+
|
|
997
|
+
if (!this.annDirty && this.config.annIndexCache !== false) {
|
|
998
|
+
const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
|
|
999
|
+
if (loaded) return this.annIndex;
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
return await this.buildAnnIndex(HierarchicalNSW, dim);
|
|
1003
|
+
})();
|
|
1004
|
+
|
|
1005
|
+
const index = await this.annLoading;
|
|
1006
|
+
this.annLoading = null;
|
|
1007
|
+
return index;
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
|
|
1011
|
+
const { indexFile, metaFile } = this.getAnnIndexPaths();
|
|
1012
|
+
const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
|
|
1013
|
+
if (!metaData) return false;
|
|
1014
|
+
|
|
1015
|
+
let meta;
|
|
1016
|
+
try {
|
|
1017
|
+
meta = JSON.parse(metaData);
|
|
1018
|
+
} catch {
|
|
1019
|
+
console.warn('[ANN] Invalid ANN metadata, rebuilding');
|
|
1020
|
+
return false;
|
|
1021
|
+
}
|
|
1022
|
+
|
|
1023
|
+
if (meta?.version !== ANN_META_VERSION) {
|
|
1024
|
+
console.warn(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
|
|
1025
|
+
return false;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
if (meta?.embeddingModel !== this.config.embeddingModel) {
|
|
1029
|
+
console.warn('[ANN] Embedding model changed for ANN index, rebuilding');
|
|
1030
|
+
return false;
|
|
1031
|
+
}
|
|
1032
|
+
|
|
1033
|
+
if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
|
|
1034
|
+
console.warn('[ANN] ANN index size mismatch, rebuilding');
|
|
1035
|
+
return false;
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
if (
|
|
1039
|
+
meta?.metric !== this.config.annMetric ||
|
|
1040
|
+
meta?.m !== this.config.annM ||
|
|
1041
|
+
meta?.efConstruction !== this.config.annEfConstruction
|
|
1042
|
+
) {
|
|
1043
|
+
console.warn('[ANN] ANN index config changed, rebuilding');
|
|
1044
|
+
return false;
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
let maxElements = meta?.maxElements;
|
|
1048
|
+
if (!Number.isInteger(maxElements)) {
|
|
1049
|
+
maxElements = meta.count;
|
|
1050
|
+
} else if (maxElements < meta.count) {
|
|
1051
|
+
console.warn('[ANN] ANN capacity invalid, rebuilding');
|
|
1052
|
+
return false;
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
const index = new HierarchicalNSW(meta.metric, dim);
|
|
1056
|
+
const loaded = readHnswIndex(index, indexFile, maxElements);
|
|
1057
|
+
if (!loaded) {
|
|
1058
|
+
console.warn('[ANN] Failed to load ANN index file, rebuilding');
|
|
1059
|
+
return false;
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
if (typeof index.setEf === 'function') {
|
|
1063
|
+
index.setEf(this.config.annEfSearch);
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
this.annIndex = index;
|
|
1067
|
+
this.annMeta = { ...meta, maxElements };
|
|
1068
|
+
this.annDirty = false;
|
|
1069
|
+
this.annPersistDirty = false;
|
|
1070
|
+
|
|
1071
|
+
if (this.config.verbose) {
|
|
1072
|
+
console.info(`[ANN] Loaded ANN index (${meta.count} vectors, cap=${maxElements})`);
|
|
1073
|
+
}
|
|
1074
|
+
return true;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
async buildAnnIndex(HierarchicalNSW, dim) {
|
|
1078
|
+
if (!Array.isArray(this.vectorStore)) return null;
|
|
1079
|
+
const total = this.vectorStore.length;
|
|
1080
|
+
if (total === 0) return null;
|
|
1081
|
+
|
|
1082
|
+
try {
|
|
1083
|
+
const index = new HierarchicalNSW(this.config.annMetric, dim);
|
|
1084
|
+
|
|
1085
|
+
const maxElements = computeAnnCapacity(total, this.config);
|
|
1086
|
+
initHnswIndex(index, maxElements, this.config.annM, this.config.annEfConstruction);
|
|
1087
|
+
|
|
1088
|
+
const yieldEvery = Number.isInteger(this.config.annBuildYieldEvery)
|
|
1089
|
+
? this.config.annBuildYieldEvery
|
|
1090
|
+
: 1000;
|
|
1091
|
+
|
|
1092
|
+
for (let i = 0; i < total; i++) {
|
|
1093
|
+
const vector = this.getAnnVector(i);
|
|
1094
|
+
if (!vector) throw new Error(`Missing vector for ANN index at position ${i}`);
|
|
1095
|
+
index.addPoint(vector, i);
|
|
1096
|
+
|
|
1097
|
+
if (yieldEvery > 0 && i > 0 && i % yieldEvery === 0) {
|
|
1098
|
+
await yieldToLoop();
|
|
1099
|
+
}
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
if (typeof index.setEf === 'function') {
|
|
1103
|
+
index.setEf(this.config.annEfSearch);
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
this.annIndex = index;
|
|
1107
|
+
this.annMeta = {
|
|
1108
|
+
version: ANN_META_VERSION,
|
|
1109
|
+
embeddingModel: this.config.embeddingModel,
|
|
1110
|
+
metric: this.config.annMetric,
|
|
1111
|
+
dim,
|
|
1112
|
+
count: total,
|
|
1113
|
+
maxElements,
|
|
1114
|
+
m: this.config.annM,
|
|
1115
|
+
efConstruction: this.config.annEfConstruction,
|
|
1116
|
+
efSearch: this.config.annEfSearch,
|
|
1117
|
+
};
|
|
1118
|
+
this.annDirty = false;
|
|
1119
|
+
this.annPersistDirty = true;
|
|
1120
|
+
|
|
1121
|
+
if (this.config.annIndexCache !== false) {
|
|
1122
|
+
try {
|
|
1123
|
+
await fs.mkdir(this.config.cacheDirectory, { recursive: true });
|
|
1124
|
+
const { indexFile, metaFile } = this.getAnnIndexPaths();
|
|
1125
|
+
index.writeIndexSync(indexFile);
|
|
1126
|
+
await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
|
|
1127
|
+
this.annPersistDirty = false;
|
|
1128
|
+
if (this.config.verbose) {
|
|
1129
|
+
console.info(`[ANN] Saved ANN index (${total} vectors, cap=${maxElements})`);
|
|
1130
|
+
}
|
|
1131
|
+
} catch (error) {
|
|
1132
|
+
console.warn(`[ANN] Failed to save ANN index: ${error.message}`);
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
|
|
1136
|
+
return index;
|
|
1137
|
+
} catch (error) {
|
|
1138
|
+
console.warn(`[ANN] Failed to build ANN index: ${error.message}`);
|
|
1139
|
+
this.initErrors.push({
|
|
1140
|
+
stage: 'buildAnnIndex',
|
|
1141
|
+
message: error.message,
|
|
1142
|
+
stack: error.stack
|
|
1143
|
+
});
|
|
1144
|
+
this.annIndex = null;
|
|
1145
|
+
this.annMeta = null;
|
|
1146
|
+
this.annDirty = true;
|
|
1147
|
+
this.annPersistDirty = false;
|
|
1148
|
+
return null;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
async queryAnn(queryVector, k) {
|
|
1153
|
+
if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
|
|
1154
|
+
const index = await this.ensureAnnIndex();
|
|
1155
|
+
if (!index) return [];
|
|
1156
|
+
|
|
1157
|
+
const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
|
|
1158
|
+
const results = index.searchKnn(qVec, k);
|
|
1159
|
+
const labels = normalizeLabels(results);
|
|
1160
|
+
|
|
1161
|
+
if (labels.length === 0) return [];
|
|
1162
|
+
|
|
1163
|
+
const filtered = labels.filter(
|
|
1164
|
+
(label) => Number.isInteger(label) && label >= 0 && label < this.vectorStore.length,
|
|
1165
|
+
);
|
|
1166
|
+
|
|
1167
|
+
return filtered;
|
|
1168
|
+
}
|
|
1169
|
+
|
|
1170
|
+
async clear() {
|
|
1171
|
+
if (!this.config.enableCache) return;
|
|
1172
|
+
|
|
1173
|
+
try {
|
|
1174
|
+
await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
|
|
1175
|
+
this.vectorStore = [];
|
|
1176
|
+
if (this.binaryStore) {
|
|
1177
|
+
try {
|
|
1178
|
+
await this.binaryStore.close();
|
|
1179
|
+
} catch {
|
|
1180
|
+
// ignore close errors
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
this.binaryStore = null;
|
|
1184
|
+
this.fileHashes = new Map();
|
|
1185
|
+
this.invalidateAnnIndex();
|
|
1186
|
+
await this.clearCallGraphData();
|
|
1187
|
+
if (this.config.verbose) {
|
|
1188
|
+
console.info(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
|
|
1189
|
+
}
|
|
1190
|
+
} catch (error) {
|
|
1191
|
+
console.error('[Cache] Failed to clear cache:', error.message);
|
|
1192
|
+
throw error;
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
/**
|
|
1197
|
+
* Adjust efSearch at runtime for speed/accuracy tradeoff.
|
|
1198
|
+
* Higher values = more accurate but slower.
|
|
1199
|
+
* @param {number} efSearch - New efSearch value (typically 16-512)
|
|
1200
|
+
* @returns {object} Result with success status and current config
|
|
1201
|
+
*/
|
|
1202
|
+
setEfSearch(efSearch) {
|
|
1203
|
+
if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
|
|
1204
|
+
return {
|
|
1205
|
+
success: false,
|
|
1206
|
+
error: 'efSearch must be a number between 1 and 1000',
|
|
1207
|
+
};
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
this.config.annEfSearch = efSearch;
|
|
1211
|
+
|
|
1212
|
+
if (this.annIndex && typeof this.annIndex.setEf === 'function') {
|
|
1213
|
+
this.annIndex.setEf(efSearch);
|
|
1214
|
+
if (this.annMeta) this.annMeta.efSearch = efSearch;
|
|
1215
|
+
this.annPersistDirty = true;
|
|
1216
|
+
if (this.config.verbose) {
|
|
1217
|
+
console.info(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
|
|
1218
|
+
}
|
|
1219
|
+
return { success: true, applied: true, efSearch };
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
if (this.config.verbose) {
|
|
1223
|
+
console.info(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
|
|
1224
|
+
}
|
|
1225
|
+
return { success: true, applied: false, efSearch };
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
/**
|
|
1229
|
+
* Get current ANN index statistics for diagnostics.
|
|
1230
|
+
* @returns {object} ANN stats including index state, config, and vector count
|
|
1231
|
+
*/
|
|
1232
|
+
getAnnStats() {
|
|
1233
|
+
return {
|
|
1234
|
+
enabled: this.config.annEnabled ?? false,
|
|
1235
|
+
indexLoaded: this.annIndex !== null,
|
|
1236
|
+
dirty: this.annDirty,
|
|
1237
|
+
vectorCount: Array.isArray(this.vectorStore) ? this.vectorStore.length : 0,
|
|
1238
|
+
minChunksForAnn: this.config.annMinChunks ?? 5000,
|
|
1239
|
+
config: this.annMeta
|
|
1240
|
+
? {
|
|
1241
|
+
metric: this.annMeta.metric,
|
|
1242
|
+
dim: this.annMeta.dim,
|
|
1243
|
+
count: this.annMeta.count,
|
|
1244
|
+
m: this.annMeta.m,
|
|
1245
|
+
efConstruction: this.annMeta.efConstruction,
|
|
1246
|
+
efSearch: this.config.annEfSearch,
|
|
1247
|
+
}
|
|
1248
|
+
: null,
|
|
1249
|
+
};
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
// -------------------- Call Graph --------------------
|
|
1253
|
+
|
|
1254
|
+
async clearCallGraphData({ removeFile = false } = {}) {
|
|
1255
|
+
this.fileCallData.clear();
|
|
1256
|
+
this.callGraph = null;
|
|
1257
|
+
|
|
1258
|
+
if (removeFile && this.config.enableCache) {
|
|
1259
|
+
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
1260
|
+
try {
|
|
1261
|
+
await fs.rm(callGraphFile, { force: true });
|
|
1262
|
+
} catch (error) {
|
|
1263
|
+
if (this.config.verbose) {
|
|
1264
|
+
console.warn(`[Cache] Failed to remove call-graph cache: ${error.message}`);
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
pruneCallGraphData(validFiles) {
|
|
1271
|
+
if (!validFiles || this.fileCallData.size === 0) return 0;
|
|
1272
|
+
|
|
1273
|
+
let pruned = 0;
|
|
1274
|
+
for (const file of Array.from(this.fileCallData.keys())) {
|
|
1275
|
+
if (!validFiles.has(file)) {
|
|
1276
|
+
this.fileCallData.delete(file);
|
|
1277
|
+
pruned++;
|
|
1278
|
+
}
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
if (pruned > 0) this.callGraph = null;
|
|
1282
|
+
return pruned;
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
getFileCallData(file) {
|
|
1286
|
+
return this.fileCallData.get(file);
|
|
1287
|
+
}
|
|
1288
|
+
|
|
1289
|
+
hasFileCallData(file) {
|
|
1290
|
+
return this.fileCallData.has(file);
|
|
1291
|
+
}
|
|
1292
|
+
|
|
1293
|
+
getFileCallDataKeys() {
|
|
1294
|
+
return Array.from(this.fileCallData.keys());
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
getFileCallDataCount() {
|
|
1298
|
+
return this.fileCallData.size;
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
/**
|
|
1302
|
+
* Sets call data for a specific file.
|
|
1303
|
+
* @param {string} file
|
|
1304
|
+
* @param {object} data
|
|
1305
|
+
*/
|
|
1306
|
+
setFileCallData(file, data) {
|
|
1307
|
+
this.fileCallData.set(file, data);
|
|
1308
|
+
this.callGraph = null;
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
/**
|
|
1312
|
+
* Sets the entire file call data map.
|
|
1313
|
+
* @param {Map<string, object>|object} entries
|
|
1314
|
+
*/
|
|
1315
|
+
setFileCallDataEntries(entries) {
|
|
1316
|
+
if (entries instanceof Map) {
|
|
1317
|
+
this.fileCallData = entries;
|
|
1318
|
+
} else {
|
|
1319
|
+
this.fileCallData.clear();
|
|
1320
|
+
if (entries && typeof entries === 'object') {
|
|
1321
|
+
for (const [file, data] of Object.entries(entries)) {
|
|
1322
|
+
this.fileCallData.set(file, data);
|
|
1323
|
+
}
|
|
1324
|
+
}
|
|
1325
|
+
}
|
|
1326
|
+
this.callGraph = null;
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
clearFileCallData() {
|
|
1330
|
+
this.fileCallData.clear();
|
|
1331
|
+
this.callGraph = null;
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
removeFileCallData(file) {
|
|
1335
|
+
this.fileCallData.delete(file);
|
|
1336
|
+
this.callGraph = null;
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
async rebuildCallGraph() {
|
|
1340
|
+
if (this._callGraphBuild) return this._callGraphBuild;
|
|
1341
|
+
|
|
1342
|
+
this._callGraphBuild = (async () => {
|
|
1343
|
+
try {
|
|
1344
|
+
const { buildCallGraph } = await import('./call-graph.js');
|
|
1345
|
+
this.callGraph = buildCallGraph(this.fileCallData);
|
|
1346
|
+
if (this.config.verbose && this.callGraph) {
|
|
1347
|
+
console.info(
|
|
1348
|
+
`[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`,
|
|
1349
|
+
);
|
|
1350
|
+
}
|
|
1351
|
+
} catch (err) {
|
|
1352
|
+
console.error(`[CallGraph] Failed to build: ${err.message}`);
|
|
1353
|
+
this.callGraph = null;
|
|
1354
|
+
} finally {
|
|
1355
|
+
this._callGraphBuild = null;
|
|
1356
|
+
}
|
|
1357
|
+
})();
|
|
1358
|
+
|
|
1359
|
+
return this._callGraphBuild;
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
async getRelatedFiles(symbols) {
|
|
1363
|
+
if (!this.config.callGraphEnabled || symbols.length === 0) return new Map();
|
|
1364
|
+
if (!this.callGraph && this.fileCallData.size > 0) await this.rebuildCallGraph();
|
|
1365
|
+
if (!this.callGraph) return new Map();
|
|
1366
|
+
|
|
1367
|
+
const { getRelatedFiles } = await import('./call-graph.js');
|
|
1368
|
+
return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
getCallGraphStats() {
|
|
1372
|
+
return {
|
|
1373
|
+
enabled: this.config.callGraphEnabled ?? false,
|
|
1374
|
+
filesWithData: this.fileCallData.size,
|
|
1375
|
+
graphBuilt: this.callGraph !== null,
|
|
1376
|
+
definitions: this.callGraph?.defines.size ?? 0,
|
|
1377
|
+
callTargets: this.callGraph?.calledBy.size ?? 0,
|
|
1378
|
+
};
|
|
1379
|
+
}
|
|
1380
|
+
|
|
1381
|
+
// -------------------- Abstraction Layer --------------------
|
|
1382
|
+
|
|
1383
|
+
/**
|
|
1384
|
+
* Returns the total number of chunks in the store.
|
|
1385
|
+
* @returns {number}
|
|
1386
|
+
*/
|
|
1387
|
+
getStoreSize() {
|
|
1388
|
+
if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
|
|
1389
|
+
if (this.binaryStore) return this.binaryStore.length;
|
|
1390
|
+
return 0;
|
|
1391
|
+
}
|
|
1392
|
+
|
|
1393
|
+
/**
|
|
1394
|
+
* Retrieves a vector by its store index.
|
|
1395
|
+
* @param {number} index
|
|
1396
|
+
* @returns {Float32Array|null}
|
|
1397
|
+
*/
|
|
1398
|
+
getVector(index) {
|
|
1399
|
+
return this.getChunkVector(index);
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
/**
|
|
1403
|
+
* Retrieves a chunk object by its store index.
|
|
1404
|
+
* @param {number} index
|
|
1405
|
+
* @returns {object|null}
|
|
1406
|
+
*/
|
|
1407
|
+
getChunk(index) {
|
|
1408
|
+
if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
|
|
1409
|
+
return this.vectorStore[index];
|
|
1410
|
+
}
|
|
1411
|
+
if (this.binaryStore) {
|
|
1412
|
+
const record = this.binaryStore.getRecord(index);
|
|
1413
|
+
if (record) {
|
|
1414
|
+
return {
|
|
1415
|
+
file: record.file,
|
|
1416
|
+
startLine: record.startLine,
|
|
1417
|
+
endLine: record.endLine,
|
|
1418
|
+
vector: this.binaryStore.getVector(index),
|
|
1419
|
+
_index: index,
|
|
1420
|
+
_binaryIndex: index,
|
|
1421
|
+
};
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1424
|
+
return null;
|
|
1425
|
+
}
|
|
1426
|
+
}
|