@softerist/heuristic-mcp 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/cache.js CHANGED
@@ -3,6 +3,73 @@ import path from "path";
3
3
 
4
4
  const CACHE_META_VERSION = 1;
5
5
  const CACHE_META_FILE = "meta.json";
6
+ const ANN_META_VERSION = 1;
7
+ const ANN_INDEX_FILE = "ann-index.bin";
8
+ const ANN_META_FILE = "ann-meta.json";
9
+
10
+ let hnswlibPromise = null;
11
+ let hnswlibLoadError = null;
12
+
13
+ async function loadHnswlib() {
14
+ if (hnswlibLoadError) return null;
15
+ if (!hnswlibPromise) {
16
+ hnswlibPromise = import("hnswlib-node")
17
+ .then((mod) => {
18
+ const HierarchicalNSW = mod?.HierarchicalNSW || mod?.default?.HierarchicalNSW;
19
+ if (!HierarchicalNSW) {
20
+ throw new Error("HierarchicalNSW export not found");
21
+ }
22
+ return HierarchicalNSW;
23
+ })
24
+ .catch((err) => {
25
+ hnswlibLoadError = err;
26
+ console.error(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
27
+ return null;
28
+ });
29
+ }
30
+ return hnswlibPromise;
31
+ }
32
+
33
+ function initHnswIndex(index, maxElements, m, efConstruction) {
34
+ try {
35
+ index.initIndex(maxElements, m, efConstruction, 100);
36
+ return;
37
+ } catch {}
38
+ try {
39
+ index.initIndex(maxElements, m, efConstruction);
40
+ return;
41
+ } catch {}
42
+ index.initIndex(maxElements);
43
+ }
44
+
45
+ function readHnswIndex(index, filePath, maxElements) {
46
+ try {
47
+ index.readIndexSync(filePath, maxElements);
48
+ return true;
49
+ } catch {}
50
+ try {
51
+ index.readIndexSync(filePath);
52
+ return true;
53
+ } catch {}
54
+ return false;
55
+ }
56
+
57
+ function normalizeLabels(result) {
58
+ if (!result) return [];
59
+ if (Array.isArray(result)) return result;
60
+ const labels = result.labels || result.neighbors || result.indices;
61
+ if (labels) {
62
+ return Array.from(labels);
63
+ }
64
+ return [];
65
+ }
66
+
67
+ function toFloat32Array(vector) {
68
+ if (vector instanceof Float32Array) {
69
+ return vector;
70
+ }
71
+ return Float32Array.from(vector);
72
+ }
6
73
 
7
74
  export class EmbeddingsCache {
8
75
  constructor(config) {
@@ -14,17 +81,25 @@ export class EmbeddingsCache {
14
81
  version: CACHE_META_VERSION,
15
82
  embeddingModel: config.embeddingModel
16
83
  };
84
+ this.annIndex = null;
85
+ this.annMeta = null;
86
+ this.annDirty = false;
87
+ this.annLoading = null;
88
+ this.annVectorCache = null;
89
+ // Call graph data
90
+ this.fileCallData = new Map(); // file -> { definitions: [], calls: [] }
91
+ this.callGraph = null; // { defines, calledBy, fileCalls }
17
92
  }
18
93
 
19
94
  async load() {
20
95
  if (!this.config.enableCache) return;
21
-
96
+
22
97
  try {
23
98
  await fs.mkdir(this.config.cacheDirectory, { recursive: true });
24
99
  const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
25
100
  const hashFile = path.join(this.config.cacheDirectory, "file-hashes.json");
26
101
  const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
27
-
102
+
28
103
  const [metaData, cacheData, hashData] = await Promise.all([
29
104
  fs.readFile(metaFile, "utf-8").catch(() => null),
30
105
  fs.readFile(cacheFile, "utf-8").catch(() => null),
@@ -63,15 +138,15 @@ export class EmbeddingsCache {
63
138
  if (cacheData && hashData) {
64
139
  const rawVectorStore = JSON.parse(cacheData);
65
140
  const rawHashes = new Map(Object.entries(JSON.parse(hashData)));
66
-
141
+
67
142
  // Filter cache to only include files matching current extensions
68
143
  const allowedExtensions = this.config.fileExtensions.map(ext => `.${ext}`);
69
-
144
+
70
145
  this.vectorStore = rawVectorStore.filter(chunk => {
71
146
  const ext = path.extname(chunk.file);
72
147
  return allowedExtensions.includes(ext);
73
148
  });
74
-
149
+
75
150
  // Only keep hashes for files matching current extensions
76
151
  for (const [file, hash] of rawHashes) {
77
152
  const ext = path.extname(file);
@@ -79,12 +154,15 @@ export class EmbeddingsCache {
79
154
  this.fileHashes.set(file, hash);
80
155
  }
81
156
  }
82
-
157
+
83
158
  const filtered = rawVectorStore.length - this.vectorStore.length;
84
159
  if (filtered > 0) {
85
160
  console.error(`[Cache] Filtered ${filtered} outdated cache entries`);
86
161
  }
87
162
  console.error(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
163
+ this.annDirty = false;
164
+ this.annIndex = null;
165
+ this.annMeta = null;
88
166
  }
89
167
  } catch (error) {
90
168
  console.error("[Cache] Failed to load cache:", error.message);
@@ -93,9 +171,9 @@ export class EmbeddingsCache {
93
171
 
94
172
  async save() {
95
173
  if (!this.config.enableCache) return;
96
-
174
+
97
175
  this.isSaving = true;
98
-
176
+
99
177
  try {
100
178
  await fs.mkdir(this.config.cacheDirectory, { recursive: true });
101
179
  const cacheFile = path.join(this.config.cacheDirectory, "embeddings.json");
@@ -105,7 +183,7 @@ export class EmbeddingsCache {
105
183
  version: CACHE_META_VERSION,
106
184
  embeddingModel: this.config.embeddingModel
107
185
  };
108
-
186
+
109
187
  await Promise.all([
110
188
  fs.writeFile(cacheFile, JSON.stringify(this.vectorStore, null, 2)),
111
189
  fs.writeFile(hashFile, JSON.stringify(Object.fromEntries(this.fileHashes), null, 2)),
@@ -124,6 +202,7 @@ export class EmbeddingsCache {
124
202
 
125
203
  setVectorStore(store) {
126
204
  this.vectorStore = store;
205
+ this.invalidateAnnIndex();
127
206
  }
128
207
 
129
208
  getFileHash(file) {
@@ -140,24 +219,331 @@ export class EmbeddingsCache {
140
219
 
141
220
  removeFileFromStore(file) {
142
221
  this.vectorStore = this.vectorStore.filter(chunk => chunk.file !== file);
222
+ this.invalidateAnnIndex();
143
223
  }
144
224
 
145
225
 
146
226
  addToStore(chunk) {
147
227
  this.vectorStore.push(chunk);
228
+ this.invalidateAnnIndex();
229
+ }
230
+
231
+ invalidateAnnIndex() {
232
+ this.annIndex = null;
233
+ this.annMeta = null;
234
+ this.annDirty = true;
235
+ this.annVectorCache = null;
236
+ }
237
+
238
+ getAnnVector(index) {
239
+ if (!this.annVectorCache || this.annVectorCache.length !== this.vectorStore.length) {
240
+ this.annVectorCache = new Array(this.vectorStore.length);
241
+ }
242
+
243
+ let cached = this.annVectorCache[index];
244
+ if (!cached) {
245
+ const vector = this.vectorStore[index]?.vector;
246
+ if (!vector) {
247
+ return null;
248
+ }
249
+ cached = toFloat32Array(vector);
250
+ this.annVectorCache[index] = cached;
251
+ }
252
+
253
+ return cached;
254
+ }
255
+
256
+ getAnnIndexPaths() {
257
+ return {
258
+ indexFile: path.join(this.config.cacheDirectory, ANN_INDEX_FILE),
259
+ metaFile: path.join(this.config.cacheDirectory, ANN_META_FILE)
260
+ };
261
+ }
262
+
263
+ async ensureAnnIndex() {
264
+ if (!this.config.annEnabled) return null;
265
+ if (this.vectorStore.length < this.config.annMinChunks) return null;
266
+ if (this.annIndex && !this.annDirty) return this.annIndex;
267
+ if (this.annLoading) return this.annLoading;
268
+
269
+ this.annLoading = (async () => {
270
+ const HierarchicalNSW = await loadHnswlib();
271
+ if (!HierarchicalNSW) return null;
272
+
273
+ const dim = this.vectorStore[0]?.vector?.length;
274
+ if (!dim) return null;
275
+
276
+ if (!this.annDirty && this.config.annIndexCache !== false) {
277
+ const loaded = await this.loadAnnIndexFromDisk(HierarchicalNSW, dim);
278
+ if (loaded) return this.annIndex;
279
+ }
280
+
281
+ return await this.buildAnnIndex(HierarchicalNSW, dim);
282
+ })();
283
+
284
+ const index = await this.annLoading;
285
+ this.annLoading = null;
286
+ return index;
287
+ }
288
+
289
+ async loadAnnIndexFromDisk(HierarchicalNSW, dim) {
290
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
291
+ const metaData = await fs.readFile(metaFile, "utf-8").catch(() => null);
292
+
293
+ if (!metaData) {
294
+ return false;
295
+ }
296
+
297
+ let meta = null;
298
+ try {
299
+ meta = JSON.parse(metaData);
300
+ } catch {
301
+ console.error("[ANN] Invalid ANN metadata, rebuilding");
302
+ return false;
303
+ }
304
+
305
+ if (meta?.version !== ANN_META_VERSION) {
306
+ console.error(`[ANN] ANN index version mismatch (${meta?.version}), rebuilding`);
307
+ return false;
308
+ }
309
+
310
+ if (meta?.embeddingModel !== this.config.embeddingModel) {
311
+ console.error(`[ANN] Embedding model changed for ANN index, rebuilding`);
312
+ return false;
313
+ }
314
+
315
+ if (meta?.dim !== dim || meta?.count !== this.vectorStore.length) {
316
+ console.error("[ANN] ANN index size mismatch, rebuilding");
317
+ return false;
318
+ }
319
+
320
+ if (meta?.metric !== this.config.annMetric ||
321
+ meta?.m !== this.config.annM ||
322
+ meta?.efConstruction !== this.config.annEfConstruction) {
323
+ console.error("[ANN] ANN index config changed, rebuilding");
324
+ return false;
325
+ }
326
+
327
+ const index = new HierarchicalNSW(meta.metric, dim);
328
+ const loaded = readHnswIndex(index, indexFile, meta.count);
329
+ if (!loaded) {
330
+ console.error("[ANN] Failed to load ANN index file, rebuilding");
331
+ return false;
332
+ }
333
+
334
+ if (typeof index.setEf === "function") {
335
+ index.setEf(this.config.annEfSearch);
336
+ }
337
+
338
+ this.annIndex = index;
339
+ this.annMeta = meta;
340
+ this.annDirty = false;
341
+ console.error(`[ANN] Loaded ANN index (${meta.count} vectors)`);
342
+ return true;
343
+ }
344
+
345
+ async buildAnnIndex(HierarchicalNSW, dim) {
346
+ const total = this.vectorStore.length;
347
+ if (total === 0) return null;
348
+
349
+ try {
350
+ const index = new HierarchicalNSW(this.config.annMetric, dim);
351
+ initHnswIndex(index, total, this.config.annM, this.config.annEfConstruction);
352
+
353
+ for (let i = 0; i < total; i++) {
354
+ const vector = this.getAnnVector(i);
355
+ if (!vector) {
356
+ throw new Error(`Missing vector for ANN index at position ${i}`);
357
+ }
358
+ index.addPoint(vector, i);
359
+ }
360
+
361
+ if (typeof index.setEf === "function") {
362
+ index.setEf(this.config.annEfSearch);
363
+ }
364
+
365
+ this.annIndex = index;
366
+ this.annMeta = {
367
+ version: ANN_META_VERSION,
368
+ embeddingModel: this.config.embeddingModel,
369
+ metric: this.config.annMetric,
370
+ dim,
371
+ count: total,
372
+ m: this.config.annM,
373
+ efConstruction: this.config.annEfConstruction,
374
+ efSearch: this.config.annEfSearch
375
+ };
376
+ this.annDirty = false;
377
+
378
+ if (this.config.annIndexCache !== false) {
379
+ try {
380
+ await fs.mkdir(this.config.cacheDirectory, { recursive: true });
381
+ const { indexFile, metaFile } = this.getAnnIndexPaths();
382
+ index.writeIndexSync(indexFile);
383
+ await fs.writeFile(metaFile, JSON.stringify(this.annMeta, null, 2));
384
+ console.error(`[ANN] Saved ANN index (${total} vectors)`);
385
+ } catch (error) {
386
+ console.error(`[ANN] Failed to save ANN index: ${error.message}`);
387
+ }
388
+ }
389
+
390
+ return index;
391
+ } catch (error) {
392
+ console.error(`[ANN] Failed to build ANN index: ${error.message}`);
393
+ this.annIndex = null;
394
+ this.annMeta = null;
395
+ this.annDirty = true;
396
+ return null;
397
+ }
398
+ }
399
+
400
+ async queryAnn(queryVector, k) {
401
+ const index = await this.ensureAnnIndex();
402
+ if (!index) return null;
403
+
404
+ const results = index.searchKnn(toFloat32Array(queryVector), k);
405
+ const labels = normalizeLabels(results);
406
+
407
+ if (labels.length === 0) return null;
408
+ const filtered = labels.filter((label) =>
409
+ Number.isInteger(label) && label >= 0 && label < this.vectorStore.length
410
+ );
411
+ return filtered.length > 0 ? filtered : null;
148
412
  }
149
413
 
150
414
  async clear() {
151
415
  if (!this.config.enableCache) return;
152
-
416
+
153
417
  try {
154
418
  await fs.rm(this.config.cacheDirectory, { recursive: true, force: true });
155
419
  this.vectorStore = [];
156
420
  this.fileHashes = new Map();
421
+ this.invalidateAnnIndex();
157
422
  console.error(`[Cache] Cache cleared successfully: ${this.config.cacheDirectory}`);
158
423
  } catch (error) {
159
424
  console.error("[Cache] Failed to clear cache:", error.message);
160
425
  throw error;
161
426
  }
162
427
  }
428
+
429
+ /**
430
+ * Adjust efSearch at runtime for speed/accuracy tradeoff.
431
+ * Higher values = more accurate but slower.
432
+ * @param {number} efSearch - New efSearch value (typically 16-512)
433
+ * @returns {object} Result with success status and current config
434
+ */
435
+ setEfSearch(efSearch) {
436
+ if (typeof efSearch !== "number" || efSearch < 1 || efSearch > 1000) {
437
+ return { success: false, error: "efSearch must be a number between 1 and 1000" };
438
+ }
439
+
440
+ this.config.annEfSearch = efSearch;
441
+
442
+ if (this.annIndex && typeof this.annIndex.setEf === "function") {
443
+ this.annIndex.setEf(efSearch);
444
+ console.error(`[ANN] efSearch updated to ${efSearch} (applied to active index)`);
445
+ return { success: true, applied: true, efSearch };
446
+ } else {
447
+ console.error(`[ANN] efSearch updated to ${efSearch} (will apply on next index build)`);
448
+ return { success: true, applied: false, efSearch };
449
+ }
450
+ }
451
+
452
+ /**
453
+ * Get current ANN index statistics for diagnostics.
454
+ * @returns {object} ANN stats including index state, config, and vector count
455
+ */
456
+ getAnnStats() {
457
+ return {
458
+ enabled: this.config.annEnabled ?? false,
459
+ indexLoaded: this.annIndex !== null,
460
+ dirty: this.annDirty,
461
+ vectorCount: this.vectorStore.length,
462
+ minChunksForAnn: this.config.annMinChunks ?? 5000,
463
+ config: this.annMeta ? {
464
+ metric: this.annMeta.metric,
465
+ dim: this.annMeta.dim,
466
+ count: this.annMeta.count,
467
+ m: this.annMeta.m,
468
+ efConstruction: this.annMeta.efConstruction,
469
+ efSearch: this.config.annEfSearch
470
+ } : null
471
+ };
472
+ }
473
+
474
+ // ========== Call Graph Methods ==========
475
+
476
+ /**
477
+ * Store call data for a file
478
+ */
479
+ setFileCallData(file, data) {
480
+ this.fileCallData.set(file, data);
481
+ this.callGraph = null; // Invalidate cached graph
482
+ }
483
+
484
+ /**
485
+ * Get call data for a file
486
+ */
487
+ getFileCallData(file) {
488
+ return this.fileCallData.get(file);
489
+ }
490
+
491
+ /**
492
+ * Remove call data for a file
493
+ */
494
+ removeFileCallData(file) {
495
+ this.fileCallData.delete(file);
496
+ this.callGraph = null; // Invalidate cached graph
497
+ }
498
+
499
+ /**
500
+ * Rebuild the call graph from file data
501
+ */
502
+ rebuildCallGraph() {
503
+ // Lazy import to avoid circular dependencies
504
+ import("./call-graph.js").then(({ buildCallGraph }) => {
505
+ this.callGraph = buildCallGraph(this.fileCallData);
506
+ if (this.config.verbose) {
507
+ console.error(`[CallGraph] Built graph: ${this.callGraph.defines.size} definitions, ${this.callGraph.calledBy.size} call targets`);
508
+ }
509
+ }).catch(err => {
510
+ console.error(`[CallGraph] Failed to build: ${err.message}`);
511
+ this.callGraph = null;
512
+ });
513
+ }
514
+
515
+ /**
516
+ * Get files related to symbols via call graph
517
+ */
518
+ async getRelatedFiles(symbols) {
519
+ if (!this.config.callGraphEnabled || symbols.length === 0) {
520
+ return new Map();
521
+ }
522
+
523
+ // Rebuild graph if needed
524
+ if (!this.callGraph && this.fileCallData.size > 0) {
525
+ const { buildCallGraph } = await import("./call-graph.js");
526
+ this.callGraph = buildCallGraph(this.fileCallData);
527
+ }
528
+
529
+ if (!this.callGraph) {
530
+ return new Map();
531
+ }
532
+
533
+ const { getRelatedFiles } = await import("./call-graph.js");
534
+ return getRelatedFiles(this.callGraph, symbols, this.config.callGraphMaxHops);
535
+ }
536
+
537
+ /**
538
+ * Get call graph statistics
539
+ */
540
+ getCallGraphStats() {
541
+ return {
542
+ enabled: this.config.callGraphEnabled ?? false,
543
+ filesWithData: this.fileCallData.size,
544
+ graphBuilt: this.callGraph !== null,
545
+ definitions: this.callGraph?.defines.size ?? 0,
546
+ callTargets: this.callGraph?.calledBy.size ?? 0
547
+ };
548
+ }
163
549
  }