@softerist/heuristic-mcp 2.0.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cache.js CHANGED
@@ -3,6 +3,73 @@ import path from "path";
3
3
 
4
4
  const CACHE_META_VERSION = 1;
5
5
  const CACHE_META_FILE = "meta.json";
6
+ const ANN_META_VERSION = 1;
7
+ const ANN_INDEX_FILE = "ann-index.bin";
8
+ const ANN_META_FILE = "ann-meta.json";
9
+
10
+ let hnswlibPromise = null;
11
+ let hnswlibLoadError = null;
12
+
13
+ async function loadHnswlib() {
14
+ if (hnswlibLoadError) return null;
15
+ if (!hnswlibPromise) {
16
+ hnswlibPromise = import("hnswlib-node")
17
+ .then((mod) => {
18
+ const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
19
+ if (!HierarchicalNSW) {
20
+ throw new Error("HierarchicalNSW export not found");
21
+ }
22
+ return HierarchicalNSW;
23
+ })
24
+ .catch((err) => {
25
+ hnswlibLoadError = err;
26
+ console.error(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
27
+ return null;
28
+ });
29
+ }
30
+ return hnswlibPromise;
31
+ }
32
+
33
+ function initHnswIndex(index, maxElements, m, efConstruction) {
34
+ try {
35
+ index.initIndex(maxElements, m, efConstruction, 100);
36
+ return;
37
+ } catch {}
38
+ try {
39
+ index.initIndex(maxElements, m, efConstruction);
40
+ return;
41
+ } catch {}
42
+ index.initIndex(maxElements);
43
+ }
44
+
45
+ function readHnswIndex(index, filePath, maxElements) {
46
+ try {
47
+ index.readIndexSync(filePath, maxElements);
48
+ return true;
49
+ } catch {}
50
+ try {
51
+ index.readIndexSync(filePath);
52
+ return true;
53
+ } catch {}
54
+ return false;
55
+ }
56
+
57
+ function normalizeLabels(result) {
58
+ if (!result) return [];
59
+ if (Array.isArray(result)) return result;
60
+ const labels = result.labels || result.neighbors || result.indices;
61
+ if (labels) {
62
+ return Array.from(labels);
63
+ }
64
+ return [];
65
+ }
66
+
67
+ function toFloat32Array(vector) {
68
+ if (vector instanceof Float32Array) {
69
+ return vector;
70
+ }
71
+ return Float32Array.from(vector);
72
+ }
6
73
 
7
74
  export class EmbeddingsCache {
8
75
  constructor(config) {
@@ -14,17 +81,25 @@ export class EmbeddingsCache {
14
81
  version: CACHE_META_VERSION,
15
82
  embeddingModel: config.embeddingModel
16
83
  };
84
+ this.annIndex = null;
85
+ this.annMeta = null;
86
+ this.annDirty = false;
87
+ this.annLoading = null;
88
+ this.annVectorCache = null;
89
+ // Call graph data
90
+ this.fileCallData = new Map(); // file -> { definitions: [], calls: [] }
91
+ this.callGraph = null; // { defines, calledBy, fileCalls }
17
92
  }
18
93
 
19
94
  async load() {
20
95
  if (!this.config.enableCache) return;
21
-
96
+
22
97
  try {
23
98
  await fs.mkdir(this.config.cacheDirectory, { recursive: true });
24
99
  const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
25
100
  const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
26
101
  const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
27
-
102
+
28
103
  const [metaData, cacheData, hashData] = await Promise.all([
29
104
  fs.readFile(metaFile, "utf-8").catch(() => null),
30
105
  fs.readFile(cacheFile, "utf-8").catch(() => null),
@@ -63,15 +138,15 @@ export class EmbeddingsCache {
63
138
  if (cacheData && hashData) {
64
139
  const rawVectorStore = JSON.parse(cacheData);
65
140
  const rawHashes = new Map(Object.entries(JSON.parse(hashData)));
66
-
141
+
67
142
  // Filter cache to only include files matching current extensions
68
143
  const allowedExtensions = this.config.fileExtensions.map(ext => `.${ext}`);
69
-
144
+
70
145
  this.vectorStore = rawVectorStore.filter(chunk => {
71
146
  const ext = path.extname(chunk.file);
72
147
  return allowedExtensions.includes(ext);
73
148
  });
74
-
149
+
75
150
  // Only keep hashes for files matching current extensions
76
151
  for (const [file, hash] of rawHashes) {
77
152
  const ext = path.extname(file);
@@ -79,12 +154,28 @@ export class EmbeddingsCache {
79
154
  this.fileHashes.set(file, hash);
80
155
  }
81
156
  }
82
-
157
+
83
158
  const filtered = rawVectorStore.length - this.vectorStore.length;
84
159
  if (filtered > 0) {
85
160
  console.error(`[Cache] Filtered ${filtered} outdated cache entries`);
86
161
  }
87
162
  console.error(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
163
+ this.annDirty = false;
164
+ this.annIndex = null;
165
+ this.annMeta = null;
166
+ }
167
+
168
+ // Load call-graph data if it exists
169
+ const callGraphFile = path.join(this.config.cacheDirectory, "call-graph.json");
170
+ try {
171
+ const callGraphData = await fs.readFile(callGraphFile, "utf8");
172
+ const parsed = JSON.parse(callGraphData);
173
+ this.fileCallData = new Map(Object.entries(parsed));
174
+ if (this.config.verbose) {
175
+ console.error(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
176
+ }
177
+ } catch {
178
+ // Call-graph file doesn't exist yet, that's OK
88
179
  }
89
180
  } catch (error) {
90
181
  console.error("[Cache] Failed to load cache:", error.message);
@@ -93,9 +184,9 @@ export class EmbeddingsCache {
93
184
 
94
185
  async save() {
95
186
  if (!this.config.enableCache) return;
96
-
187
+
97
188
  this.isSaving = true;
98
-
189
+
99
190
  try {
100
191
  await fs.mkdir(this.config.cacheDirectory, { recursive: true });
101
192
  const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
@@ -105,12 +196,18 @@ export class EmbeddingsCache {
105
196
  version: CACHE_META_VERSION,
106
197
  embeddingModel: this.config.embeddingModel
107
198
  };
108
-
199
+
109
200
  await Promise.all([
110
201
  fs.writeFile(cacheFile, JSON.stringify(this.vectorStore, null, 2)),
111
202
  fs.writeFile(hashFile, JSON.stringify(Object.fromEntries(this.fileHashes), null, 2)),
112
203
  fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2))
113
204
  ]);
205
+
206
+ // Save call-graph data
207
+ if (this.fileCallData.size > 0) {
208
+ const callGraphFile = path.join(this.config.cacheDirectory, "call-graph.json");
209
+ await fs.writeFile(callGraphFile, JSON.stringify(Object.fromEntries(this.fileCallData), null, 2));
210
+ }
114
211
  } catch (error) {
115
212
  console.error("[Cache] Failed to save cache:", error.message);
116
213
  } finally {
@@ -124,6 +221,7 @@ export class EmbeddingsCache {
124
221
 
125
222
  setVectorStore(store) {
126
223
  this.vectorStore = store;
224
+ this.invalidateAnnIndex();
127
225
  }
128
226
 
129
227
  getFileHash(file) {
@@ -140,24 +238,336 @@ export class EmbeddingsCache {
140
238
 
141
239
  removeFileFromStore(file) {
142
240
  this.vectorStore = this.vectorStore.filter(chunk => chunk.file !== file);
241
+ this.invalidateAnnIndex();
242
+ // Also clear call-graph data for this file
243
+ this.removeFileCallData(file);
143
244
  }
144
245
 
145
246
 
146
247
  addToStore(chunk) {
147
248
  this.vectorStore.push(chunk);
249
+ this.invalidateAnnIndex();
250
+ }
251
+
252
+ invalidateAnnIndex() {
253
+ this.annIndex = null;
254
+ this.annMeta = null;
255
+ this.annDirty = true;
256
+ this.annVectorCache = null;
257
+ }
258
+
259
+ getAnnVector(index) {
260
+ if (!this.annVectorCache || this.annVectorCache.length !== this.vectorStore.length) {
261
+ this.annVectorCache = new Array(this.vectorStore.length);
262
+ }
263
+
264
+ let cached = this.annVectorCache[index];
265
+ if (!cached) {
266
+ const vector = this.vectorStore[index]?.vector;
267
+ if (!vector) {
268
+ return null;
269
+ }
270
+ cached = toFloat32Array(vector);
271
+ this.annVectorCache[index] = cached;
272
+ }
273
+
274
+ return cached;
275
+ }
276
+
277
+ getAnnIndexPaths() {
278
+ return {
279
+ indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
280
+ metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE)
281
+ };
282
+ }
283
+
284
+ async ensureAnnIndex() {
285
+ if (!this.config.annEnabled) return null;
286
+ if (this.vectorStore.length < this.config.annMinChunks) return null;
287
+ if (this.annIndex && !this.annDirty) return this.annIndex;
288
+ if (this.annLoading) return this.annLoading;
289
+
290
+ this.annLoading = (async () => {
291
+ const HierarchicalNSW = await loadHnswlib();
292
+ if (!HierarchicalNSW) return null;
293
+
294
+ const dim = this.vectorStore[0]?.vector?.length;
295
+ if (!dim) return null;
296
+
297
+ if (!this.annDirty && this.config.annIndexCache !== false) {
298
+ const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
299
+ if (loaded) return this.annIndex;
300
+ }
301
+
302
+ return await this.buildAnnIndex(HierarchicalNSW, dim);
303
+ })();
304
+
305
+ const index = await this.annLoading;
306
+ this.annLoading = null;
307
+ return index;
308
+ }
309
+
310
+ async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
311
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
312
+ const metaData = await fs.readFile(metaFile, "utf-8").catch(() => null);
313
+
314
+ if (!metaData) {
315
+ return false;
316
+ }
317
+
318
+ let meta = null;
319
+ try {
320
+ meta = JSON.parse(metaData);
321
+ } catch {
322
+ console.error("[ANN] Invalid ANN metadata, rebuilding");
323
+ return false;
324
+ }
325
+
326
+ if (meta?.version !== ANN_META_VERSION) {
327
+ console.error(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
328
+ return false;
329
+ }
330
+
331
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
332
+ console.error(`[ANN] Embedding model changed for ANN index, rebuilding`);
333
+ return false;
334
+ }
335
+
336
+ if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
337
+ console.error("[ANN] ANN index size mismatch, rebuilding");
338
+ return false;
339
+ }
340
+
341
+ if (meta?.metric !== this.config.annMetric ||
342
+ meta?.m !== this.config.annM ||
343
+ meta?.efConstruction !== this.config.annEfConstruction) {
344
+ console.error("[ANN] ANN index config changed, rebuilding");
345
+ return false;
346
+ }
347
+
348
+ const index = new HierarchicalNSW(meta.metric, dim);
349
+ const loaded = readHnswIndex(index, indexFile, meta.count);
350
+ if (!loaded) {
351
+ console.error("[ANN] Failed to load ANN index file, rebuilding");
352
+ return false;
353
+ }
354
+
355
+ if (typeof index.setEf === "function") {
356
+ index.setEf(this.config.annEfSearch);
357
+ }
358
+
359
+ this.annIndex = index;
360
+ this.annMeta = meta;
361
+ this.annDirty = false;
362
+ console.error(`[ANN] Loaded ANN index (${meta.count} vectors)`);
363
+ return true;
364
+ }
365
+
366
+ async buildAnnIndex(HierarchicalNSW, dim) {
367
+ const total = this.vectorStore.length;
368
+ if (total === 0) return null;
369
+
370
+ try {
371
+ const index = new HierarchicalNSW(this.config.annMetric, dim);
372
+ initHnswIndex(index, total, this.config.annM, this.config.annEfConstruction);
373
+
374
+ for (let i = 0; i < total; i++) {
375
+ const vector = this.getAnnVector(i);
376
+ if (!vector) {
377
+ throw new Error(`Missing vector for ANN index at position ${i}`);
378
+ }
379
+ index.addPoint(vector, i);
380
+ }
381
+
382
+ if (typeof index.setEf === "function") {
383
+ index.setEf(this.config.annEfSearch);
384
+ }
385
+
386
+ this.annIndex = index;
387
+ this.annMeta = {
388
+ version: ANN_META_VERSION,
389
+ embeddingModel: this.config.embeddingModel,
390
+ metric: this.config.annMetric,
391
+ dim,
392
+ count: total,
393
+ m: this.config.annM,
394
+ efConstruction: this.config.annEfConstruction,
395
+ efSearch: this.config.annEfSearch
396
+ };
397
+ this.annDirty = false;
398
+
399
+ if (this.config.annIndexCache !== false) {
400
+ try {
401
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
402
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
403
+ index.writeIndexSync(indexFile);
404
+ await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
405
+ console.error(`[ANN] Saved ANN index (${total} vectors)`);
406
+ } catch (error) {
407
+ console.error(`[ANN] Failed to save ANN index: ${error.message}`);
408
+ }
409
+ }
410
+
411
+ return index;
412
+ } catch (error) {
413
+ console.error(`[ANN] Failed to build ANN index: ${error.message}`);
414
+ this.annIndex = null;
415
+ this.annMeta = null;
416
+ this.annDirty = true;
417
+ return null;
418
+ }
419
+ }
420
+
421
+ async queryAnn(queryVector, k) {
422
+ const index = await this.ensureAnnIndex();
423
+ if (!index) return null;
424
+
425
+ const results = index.searchKnn(toFloat32Array(queryVector), k);
426
+ const labels = normalizeLabels(results);
427
+
428
+ if (labels.length === 0) return null;
429
+ const filtered = labels.filter((label) =>
430
+ Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
431
+ );
432
+ return filtered.length > 0 ? filtered : null;
148
433
  }
149
434
 
150
435
  async clear() {
151
436
  if (!this.config.enableCache) return;
152
-
437
+
153
438
  try {
154
439
  await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
155
440
  this.vectorStore = [];
156
441
  this.fileHashes = new Map();
442
+ this.invalidateAnnIndex();
443
+ // Clear call-graph data
444
+ this.fileCallData.clear();
445
+ this.callGraph = null;
157
446
  console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
158
447
  } catch (error) {
159
448
  console.error("[Cache] Failed to clear cache:", error.message);
160
449
  throw error;
161
450
  }
162
451
  }
452
+
453
+ /**
454
+ * Adjust efSearch at runtime for speed/accuracy tradeoff.
455
+ * Higher values = more accurate but slower.
456
+ * @param {number} efSearch - New efSearch value (typically 16-512)
457
+ * @returns {object} Result with success status and current config
458
+ */
459
+ setEfSearch(efSearch) {
460
+ if (typeof efSearch !== "number" || efSearch < 1 || efSearch > 1000) {
461
+ return { success: false, error: "efSearch must be a number between 1 and 1000" };
462
+ }
463
+
464
+ this.config.annEfSearch = efSearch;
465
+
466
+ if (this.annIndex && typeof this.annIndex.setEf === "function") {
467
+ this.annIndex.setEf(efSearch);
468
+ console.error(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
469
+ return { success: true, applied: true, efSearch };
470
+ } else {
471
+ console.error(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
472
+ return { success: true, applied: false, efSearch };
473
+ }
474
+ }
475
+
476
+ /**
477
+ * Get current ANN index statistics for diagnostics.
478
+ * @returns {object} ANN stats including index state, config, and vector count
479
+ */
480
+ getAnnStats() {
481
+ return {
482
+ enabled: this.config.annEnabled ?? false,
483
+ indexLoaded: this.annIndex !== null,
484
+ dirty: this.annDirty,
485
+ vectorCount: this.vectorStore.length,
486
+ minChunksForAnn: this.config.annMinChunks ?? 5000,
487
+ config: this.annMeta ? {
488
+ metric: this.annMeta.metric,
489
+ dim: this.annMeta.dim,
490
+ count: this.annMeta.count,
491
+ m: this.annMeta.m,
492
+ efConstruction: this.annMeta.efConstruction,
493
+ efSearch: this.config.annEfSearch
494
+ } : null
495
+ };
496
+ }
497
+
498
+ // ========== Call Graph Methods ==========
499
+
500
+ /**
501
+ * Store call data for a file
502
+ */
503
+ setFileCallData(file, data) {
504
+ this.fileCallData.set(file, data);
505
+ this.callGraph = null; // Invalidate cached graph
506
+ }
507
+
508
+ /**
509
+ * Get call data for a file
510
+ */
511
+ getFileCallData(file) {
512
+ return this.fileCallData.get(file);
513
+ }
514
+
515
+ /**
516
+ * Remove call data for a file
517
+ */
518
+ removeFileCallData(file) {
519
+ this.fileCallData.delete(file);
520
+ this.callGraph = null; // Invalidate cached graph
521
+ }
522
+
523
+ /**
524
+ * Rebuild the call graph from file data
525
+ */
526
+ rebuildCallGraph() {
527
+ // Lazy import to avoid circular dependencies
528
+ import("./call-graph.js").then(({ buildCallGraph }) => {
529
+ this.callGraph = buildCallGraph(this.fileCallData);
530
+ if (this.config.verbose) {
531
+ console.error(`[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`);
532
+ }
533
+ }).catch(err => {
534
+ console.error(`[CallGraph] Failed to build: ${err.message}`);
535
+ this.callGraph = null;
536
+ });
537
+ }
538
+
539
+ /**
540
+ * Get files related to symbols via call graph
541
+ */
542
+ async getRelatedFiles(symbols) {
543
+ if (!this.config.callGraphEnabled || symbols.length === 0) {
544
+ return new Map();
545
+ }
546
+
547
+ // Rebuild graph if needed
548
+ if (!this.callGraph && this.fileCallData.size > 0) {
549
+ const { buildCallGraph } = await import("./call-graph.js");
550
+ this.callGraph = buildCallGraph(this.fileCallData);
551
+ }
552
+
553
+ if (!this.callGraph) {
554
+ return new Map();
555
+ }
556
+
557
+ const { getRelatedFiles } = await import("./call-graph.js");
558
+ return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
559
+ }
560
+
561
+ /**
562
+ * Get call graph statistics
563
+ */
564
+ getCallGraphStats() {
565
+ return {
566
+ enabled: this.config.callGraphEnabled ?? false,
567
+ filesWithData: this.fileCallData.size,
568
+ graphBuilt: this.callGraph !== null,
569
+ definitions: this.callGraph?.defines.size ?? 0,
570
+ callTargets: this.callGraph?.calledBy.size ?? 0
571
+ };
572
+ }
163
573
  }