@softerist/heuristic-mcp 3.0.17 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config.jsonc +23 -6
- package/features/ann-config.js +7 -14
- package/features/clear-cache.js +3 -3
- package/features/find-similar-code.js +17 -22
- package/features/hybrid-search.js +59 -67
- package/features/index-codebase.js +305 -268
- package/features/lifecycle.js +370 -176
- package/features/package-version.js +15 -26
- package/features/register.js +75 -57
- package/features/resources.js +21 -47
- package/features/set-workspace.js +31 -43
- package/index.js +912 -200
- package/lib/cache-utils.js +95 -99
- package/lib/cache.js +121 -166
- package/lib/cli.js +246 -238
- package/lib/config.js +232 -62
- package/lib/constants.js +22 -2
- package/lib/embed-query-process.js +13 -29
- package/lib/embedding-process.js +29 -19
- package/lib/embedding-worker.js +166 -149
- package/lib/ignore-patterns.js +39 -39
- package/lib/json-writer.js +7 -34
- package/lib/logging.js +52 -48
- package/lib/onnx-backend.js +4 -4
- package/lib/path-utils.js +4 -21
- package/lib/project-detector.js +3 -3
- package/lib/server-lifecycle.js +148 -35
- package/lib/settings-editor.js +25 -18
- package/lib/slice-normalize.js +6 -16
- package/lib/tokenizer.js +56 -109
- package/lib/utils.js +62 -81
- package/lib/vector-store-binary.js +7 -7
- package/lib/vector-store-sqlite.js +35 -67
- package/lib/workspace-cache-key.js +36 -0
- package/lib/workspace-env.js +55 -14
- package/package.json +86 -86
package/lib/cache.js
CHANGED
|
@@ -1,20 +1,21 @@
|
|
|
1
1
|
import fs from 'fs/promises';
|
|
2
2
|
import path from 'path';
|
|
3
|
-
import { Worker } from 'worker_threads';
|
|
4
|
-
import { StreamingJsonWriter } from './json-writer.js';
|
|
5
|
-
import { BinaryVectorStore } from './vector-store-binary.js';
|
|
6
|
-
import { SqliteVectorStore } from './vector-store-sqlite.js';
|
|
7
|
-
import {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
3
|
+
import { Worker } from 'worker_threads';
|
|
4
|
+
import { StreamingJsonWriter } from './json-writer.js';
|
|
5
|
+
import { BinaryVectorStore } from './vector-store-binary.js';
|
|
6
|
+
import { SqliteVectorStore } from './vector-store-sqlite.js';
|
|
7
|
+
import { isNonProjectDirectory } from './config.js';
|
|
8
|
+
import {
|
|
9
|
+
JSON_WORKER_THRESHOLD_BYTES,
|
|
10
|
+
ANN_DIMENSION_SAMPLE_SIZE,
|
|
11
|
+
HNSWLIB_ERROR_RESET_MS,
|
|
11
12
|
DEFAULT_READER_WAIT_TIMEOUT_MS,
|
|
12
13
|
} from './constants.js';
|
|
13
14
|
|
|
14
15
|
const CACHE_META_VERSION = 1;
|
|
15
16
|
const CACHE_META_FILE = 'meta.json';
|
|
16
17
|
|
|
17
|
-
|
|
18
|
+
|
|
18
19
|
const ANN_META_VERSION = 1;
|
|
19
20
|
const ANN_INDEX_FILE = 'ann-index.bin';
|
|
20
21
|
const ANN_META_FILE = 'ann-meta.json';
|
|
@@ -23,7 +24,7 @@ const CALL_GRAPH_FILE = 'call-graph.json';
|
|
|
23
24
|
|
|
24
25
|
const IS_TEST_ENV = process.env.VITEST === 'true' || process.env.NODE_ENV === 'test';
|
|
25
26
|
|
|
26
|
-
|
|
27
|
+
|
|
27
28
|
const yieldToLoop = () => new Promise((resolve) => setImmediate(resolve));
|
|
28
29
|
|
|
29
30
|
let hnswlibPromise = null;
|
|
@@ -36,9 +37,9 @@ async function parseJsonInWorker(filePath) {
|
|
|
36
37
|
workerData: { filePath },
|
|
37
38
|
});
|
|
38
39
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
42
43
|
const finish = (handler, value) => {
|
|
43
44
|
if (settled) return;
|
|
44
45
|
settled = true;
|
|
@@ -87,7 +88,7 @@ async function readJsonFile(
|
|
|
87
88
|
try {
|
|
88
89
|
stats = await fs.stat(filePath);
|
|
89
90
|
} catch {
|
|
90
|
-
|
|
91
|
+
|
|
91
92
|
return null;
|
|
92
93
|
}
|
|
93
94
|
|
|
@@ -109,7 +110,7 @@ async function readJsonFile(
|
|
|
109
110
|
}
|
|
110
111
|
|
|
111
112
|
async function loadHnswlib() {
|
|
112
|
-
|
|
113
|
+
|
|
113
114
|
if (hnswlibLoadError) {
|
|
114
115
|
if (hnswlibLoadError._timestamp && Date.now() - hnswlibLoadError._timestamp > HNSWLIB_ERROR_RESET_MS) {
|
|
115
116
|
hnswlibLoadError = null;
|
|
@@ -127,7 +128,7 @@ async function loadHnswlib() {
|
|
|
127
128
|
return HierarchicalNSW;
|
|
128
129
|
})
|
|
129
130
|
.catch((err) => {
|
|
130
|
-
|
|
131
|
+
|
|
131
132
|
err._timestamp = Date.now();
|
|
132
133
|
hnswlibLoadError = err;
|
|
133
134
|
console.warn(`[ANN] hnswlib-node unavailable, using linear search (${err.message})`);
|
|
@@ -159,7 +160,7 @@ function readHnswIndex(index, filePath, maxElements) {
|
|
|
159
160
|
index.readIndexSync(filePath, maxElements);
|
|
160
161
|
return true;
|
|
161
162
|
} catch {
|
|
162
|
-
|
|
163
|
+
|
|
163
164
|
}
|
|
164
165
|
try {
|
|
165
166
|
index.readIndexSync(filePath);
|
|
@@ -181,7 +182,7 @@ function ensureFloat32(vector) {
|
|
|
181
182
|
if (!vector) return null;
|
|
182
183
|
if (vector instanceof Float32Array) return vector;
|
|
183
184
|
|
|
184
|
-
|
|
185
|
+
|
|
185
186
|
let result;
|
|
186
187
|
if (ArrayBuffer.isView(vector)) {
|
|
187
188
|
result = Float32Array.from(vector);
|
|
@@ -189,7 +190,7 @@ function ensureFloat32(vector) {
|
|
|
189
190
|
result = new Float32Array(vector);
|
|
190
191
|
}
|
|
191
192
|
|
|
192
|
-
|
|
193
|
+
|
|
193
194
|
if (IS_TEST_ENV && result.length > 0) {
|
|
194
195
|
for (let i = 0; i < result.length; i++) {
|
|
195
196
|
if (!Number.isFinite(result[i])) {
|
|
@@ -264,49 +265,45 @@ export class EmbeddingsCache {
|
|
|
264
265
|
embeddingDimension: config.embeddingDimension ?? null,
|
|
265
266
|
};
|
|
266
267
|
|
|
267
|
-
|
|
268
|
+
|
|
268
269
|
this.saveQueue = Promise.resolve();
|
|
269
270
|
this._saveTimer = null;
|
|
270
271
|
this._saveRequested = false;
|
|
271
272
|
this._savePromise = null;
|
|
272
273
|
|
|
273
|
-
|
|
274
|
+
|
|
274
275
|
this.annIndex = null;
|
|
275
276
|
this.annMeta = null;
|
|
276
|
-
this.annDirty = false;
|
|
277
|
-
this.annPersistDirty = false;
|
|
277
|
+
this.annDirty = false;
|
|
278
|
+
this.annPersistDirty = false;
|
|
278
279
|
this.annLoading = null;
|
|
279
280
|
this.annVectorCache = null;
|
|
280
281
|
|
|
281
|
-
|
|
282
|
+
|
|
282
283
|
this.fileCallData = new Map();
|
|
283
284
|
this.callGraph = null;
|
|
284
285
|
this._callGraphBuild = null;
|
|
285
286
|
|
|
286
|
-
|
|
287
|
+
|
|
287
288
|
this.binaryStore = null;
|
|
288
289
|
|
|
289
|
-
|
|
290
|
+
|
|
290
291
|
this.sqliteStore = null;
|
|
291
292
|
|
|
292
|
-
|
|
293
|
+
|
|
293
294
|
this.initErrors = [];
|
|
294
295
|
|
|
295
|
-
|
|
296
|
+
|
|
296
297
|
this.activeReads = 0;
|
|
297
298
|
this._readWaiters = [];
|
|
298
|
-
this._saveInProgress = false;
|
|
299
|
+
this._saveInProgress = false;
|
|
299
300
|
|
|
300
|
-
|
|
301
|
+
|
|
301
302
|
this._clearedAfterIndex = false;
|
|
302
303
|
this._loadPromise = null;
|
|
303
304
|
}
|
|
304
305
|
|
|
305
|
-
|
|
306
|
-
* Add an initialization error with consistent structure.
|
|
307
|
-
* @param {string} stage - The stage where the error occurred (e.g., 'loadHnswlib', 'ensureAnnIndex')
|
|
308
|
-
* @param {Error|string} error - The error object or message
|
|
309
|
-
*/
|
|
306
|
+
|
|
310
307
|
addInitError(stage, error) {
|
|
311
308
|
this.initErrors.push({
|
|
312
309
|
stage,
|
|
@@ -327,7 +324,7 @@ export class EmbeddingsCache {
|
|
|
327
324
|
try {
|
|
328
325
|
this.binaryStore.close?.();
|
|
329
326
|
} catch {
|
|
330
|
-
|
|
327
|
+
|
|
331
328
|
}
|
|
332
329
|
this.binaryStore = null;
|
|
333
330
|
}
|
|
@@ -335,7 +332,7 @@ export class EmbeddingsCache {
|
|
|
335
332
|
try {
|
|
336
333
|
this.sqliteStore.close?.();
|
|
337
334
|
} catch {
|
|
338
|
-
|
|
335
|
+
|
|
339
336
|
}
|
|
340
337
|
this.sqliteStore = null;
|
|
341
338
|
}
|
|
@@ -350,7 +347,7 @@ export class EmbeddingsCache {
|
|
|
350
347
|
try {
|
|
351
348
|
this.sqliteStore.close();
|
|
352
349
|
} catch {
|
|
353
|
-
|
|
350
|
+
|
|
354
351
|
}
|
|
355
352
|
this.sqliteStore = null;
|
|
356
353
|
}
|
|
@@ -394,7 +391,7 @@ export class EmbeddingsCache {
|
|
|
394
391
|
try {
|
|
395
392
|
await this.binaryStore.close();
|
|
396
393
|
} catch {
|
|
397
|
-
|
|
394
|
+
|
|
398
395
|
}
|
|
399
396
|
this.binaryStore = null;
|
|
400
397
|
}
|
|
@@ -403,7 +400,7 @@ export class EmbeddingsCache {
|
|
|
403
400
|
try {
|
|
404
401
|
this.sqliteStore.close();
|
|
405
402
|
} catch {
|
|
406
|
-
|
|
403
|
+
|
|
407
404
|
}
|
|
408
405
|
this.sqliteStore = null;
|
|
409
406
|
}
|
|
@@ -411,10 +408,10 @@ export class EmbeddingsCache {
|
|
|
411
408
|
this._clearedAfterIndex = true;
|
|
412
409
|
}
|
|
413
410
|
|
|
414
|
-
|
|
411
|
+
|
|
415
412
|
|
|
416
413
|
startRead() {
|
|
417
|
-
|
|
414
|
+
|
|
418
415
|
if (this._saveInProgress) {
|
|
419
416
|
throw new Error('Cache save in progress, try again shortly');
|
|
420
417
|
}
|
|
@@ -462,7 +459,7 @@ export class EmbeddingsCache {
|
|
|
462
459
|
if (!resolved) {
|
|
463
460
|
resolved = true;
|
|
464
461
|
timedOut = true;
|
|
465
|
-
|
|
462
|
+
|
|
466
463
|
const idx = this._readWaiters.indexOf(waiterResolve);
|
|
467
464
|
if (idx >= 0) this._readWaiters.splice(idx, 1);
|
|
468
465
|
resolve();
|
|
@@ -471,7 +468,7 @@ export class EmbeddingsCache {
|
|
|
471
468
|
}),
|
|
472
469
|
]);
|
|
473
470
|
if (timedOut) {
|
|
474
|
-
|
|
471
|
+
|
|
475
472
|
console.warn(
|
|
476
473
|
`[Cache] Timed out waiting for ${this.activeReads} active reader(s); proceeding with save anyway. ` +
|
|
477
474
|
'This may cause data inconsistency if readers access the store during write.'
|
|
@@ -480,19 +477,16 @@ export class EmbeddingsCache {
|
|
|
480
477
|
return !timedOut;
|
|
481
478
|
}
|
|
482
479
|
|
|
483
|
-
|
|
480
|
+
|
|
484
481
|
|
|
485
|
-
|
|
486
|
-
* Resets the cache state (clears vectors, hashes, and call graph).
|
|
487
|
-
* Used for forced reindexing.
|
|
488
|
-
*/
|
|
482
|
+
|
|
489
483
|
async reset() {
|
|
490
484
|
this.vectorStore = [];
|
|
491
485
|
if (this.binaryStore) {
|
|
492
486
|
try {
|
|
493
487
|
await this.binaryStore.close();
|
|
494
488
|
} catch {
|
|
495
|
-
|
|
489
|
+
|
|
496
490
|
}
|
|
497
491
|
this.binaryStore = null;
|
|
498
492
|
}
|
|
@@ -500,7 +494,7 @@ export class EmbeddingsCache {
|
|
|
500
494
|
try {
|
|
501
495
|
this.sqliteStore.close();
|
|
502
496
|
} catch {
|
|
503
|
-
|
|
497
|
+
|
|
504
498
|
}
|
|
505
499
|
this.sqliteStore = null;
|
|
506
500
|
}
|
|
@@ -510,7 +504,7 @@ export class EmbeddingsCache {
|
|
|
510
504
|
this.initErrors = [];
|
|
511
505
|
}
|
|
512
506
|
|
|
513
|
-
|
|
507
|
+
|
|
514
508
|
|
|
515
509
|
async load({ forceVectorLoadMode } = {}) {
|
|
516
510
|
if (!this.config.enableCache) return;
|
|
@@ -543,7 +537,7 @@ export class EmbeddingsCache {
|
|
|
543
537
|
}
|
|
544
538
|
};
|
|
545
539
|
|
|
546
|
-
|
|
540
|
+
|
|
547
541
|
let cacheData = null;
|
|
548
542
|
let hashData = null;
|
|
549
543
|
let prefetched = false;
|
|
@@ -558,7 +552,7 @@ export class EmbeddingsCache {
|
|
|
558
552
|
]);
|
|
559
553
|
}
|
|
560
554
|
|
|
561
|
-
|
|
555
|
+
|
|
562
556
|
const metaData = await fs.readFile(metaFile, 'utf-8').catch(() => null);
|
|
563
557
|
if (!metaData) {
|
|
564
558
|
console.warn('[Cache] Missing cache metadata, ignoring cache');
|
|
@@ -668,7 +662,7 @@ export class EmbeddingsCache {
|
|
|
668
662
|
}
|
|
669
663
|
}
|
|
670
664
|
|
|
671
|
-
|
|
665
|
+
|
|
672
666
|
if (useSqlite) {
|
|
673
667
|
try {
|
|
674
668
|
this.sqliteStore = await SqliteVectorStore.load(this.config.cacheDirectory);
|
|
@@ -678,7 +672,7 @@ export class EmbeddingsCache {
|
|
|
678
672
|
includeVector: effectiveVectorLoadMode !== 'disk',
|
|
679
673
|
});
|
|
680
674
|
} else {
|
|
681
|
-
|
|
675
|
+
|
|
682
676
|
console.warn('[Cache] vectorStoreFormat=sqlite but vectors.sqlite is missing. Reindex to regenerate the cache.');
|
|
683
677
|
}
|
|
684
678
|
} catch (err) {
|
|
@@ -710,7 +704,7 @@ export class EmbeddingsCache {
|
|
|
710
704
|
this.vectorStore = [];
|
|
711
705
|
this.fileHashes.clear();
|
|
712
706
|
|
|
713
|
-
|
|
707
|
+
|
|
714
708
|
for (const chunk of cacheData) {
|
|
715
709
|
if (applyExtensionFilter) {
|
|
716
710
|
if (!shouldKeepFile(chunk.file)) continue;
|
|
@@ -724,7 +718,7 @@ export class EmbeddingsCache {
|
|
|
724
718
|
}
|
|
725
719
|
|
|
726
720
|
if (hasHashData) {
|
|
727
|
-
|
|
721
|
+
|
|
728
722
|
for (const [file, entry] of rawHashes) {
|
|
729
723
|
if (!applyExtensionFilter || shouldKeepFile(file)) {
|
|
730
724
|
const normalized = normalizeFileHashEntry(entry);
|
|
@@ -745,7 +739,7 @@ export class EmbeddingsCache {
|
|
|
745
739
|
console.info(`[Cache] Loaded ${this.vectorStore.length} cached embeddings`);
|
|
746
740
|
}
|
|
747
741
|
|
|
748
|
-
|
|
742
|
+
|
|
749
743
|
this.annDirty = false;
|
|
750
744
|
this.annPersistDirty = false;
|
|
751
745
|
this.annIndex = null;
|
|
@@ -757,7 +751,7 @@ export class EmbeddingsCache {
|
|
|
757
751
|
console.warn('[Cache] Hashes exist without embeddings; ignoring file-hashes.json');
|
|
758
752
|
}
|
|
759
753
|
|
|
760
|
-
|
|
754
|
+
|
|
761
755
|
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
762
756
|
try {
|
|
763
757
|
const callGraphData = await fs.readFile(callGraphFile, 'utf8');
|
|
@@ -767,7 +761,7 @@ export class EmbeddingsCache {
|
|
|
767
761
|
console.info(`[Cache] Loaded call-graph data for ${this.fileCallData.size} files`);
|
|
768
762
|
}
|
|
769
763
|
} catch {
|
|
770
|
-
|
|
764
|
+
|
|
771
765
|
}
|
|
772
766
|
} catch (error) {
|
|
773
767
|
console.warn('[Cache] Failed to load cache:', error.message);
|
|
@@ -775,7 +769,7 @@ export class EmbeddingsCache {
|
|
|
775
769
|
}
|
|
776
770
|
}
|
|
777
771
|
|
|
778
|
-
|
|
772
|
+
|
|
779
773
|
|
|
780
774
|
save() {
|
|
781
775
|
if (!this.config.enableCache) return Promise.resolve();
|
|
@@ -809,18 +803,30 @@ export class EmbeddingsCache {
|
|
|
809
803
|
return this._savePromise;
|
|
810
804
|
}
|
|
811
805
|
|
|
812
|
-
async performSave() {
|
|
813
|
-
|
|
814
|
-
this._saveInProgress = true;
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
806
|
+
async performSave() {
|
|
807
|
+
|
|
808
|
+
this._saveInProgress = true;
|
|
809
|
+
if (
|
|
810
|
+
this.config.allowSystemWorkspaceCache !== true &&
|
|
811
|
+
this.config.searchDirectory &&
|
|
812
|
+
isNonProjectDirectory(this.config.searchDirectory)
|
|
813
|
+
) {
|
|
814
|
+
const source = this.config.workspaceResolution?.source || 'unknown';
|
|
815
|
+
console.warn(
|
|
816
|
+
`[Cache] Skipping cache save for non-project workspace (${source}): ${this.config.searchDirectory}`
|
|
817
|
+
);
|
|
818
|
+
this._saveInProgress = false;
|
|
819
|
+
return;
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
if (this.activeReads > 0) {
|
|
818
824
|
const timeoutMs = this.config.saveReaderWaitTimeoutMs ?? DEFAULT_READER_WAIT_TIMEOUT_MS;
|
|
819
825
|
const allReadersFinished = await this.waitForReadersWithTimeout(timeoutMs);
|
|
820
826
|
if (!allReadersFinished && !this.config.forceSaveWithActiveReaders) {
|
|
821
827
|
console.warn('[Cache] Aborting save - active readers still present after timeout');
|
|
822
|
-
this._saveInProgress = false;
|
|
823
|
-
return;
|
|
828
|
+
this._saveInProgress = false;
|
|
829
|
+
return;
|
|
824
830
|
}
|
|
825
831
|
}
|
|
826
832
|
|
|
@@ -833,8 +839,8 @@ export class EmbeddingsCache {
|
|
|
833
839
|
const hashFile = path.join(this.config.cacheDirectory, 'file-hashes.json');
|
|
834
840
|
const metaFile = path.join(this.config.cacheDirectory, CACHE_META_FILE);
|
|
835
841
|
|
|
836
|
-
|
|
837
|
-
|
|
842
|
+
|
|
843
|
+
|
|
838
844
|
const snapshotStore = Array.isArray(this.vectorStore) ? [...this.vectorStore] : [];
|
|
839
845
|
const supportsBackendVectorResolve =
|
|
840
846
|
this.config.vectorStoreFormat === 'binary' || this.config.vectorStoreFormat === 'sqlite';
|
|
@@ -896,12 +902,12 @@ export class EmbeddingsCache {
|
|
|
896
902
|
this.cacheMeta.chunksStored = this.binaryStore.length;
|
|
897
903
|
}
|
|
898
904
|
} else if (this.config.vectorStoreFormat === 'sqlite') {
|
|
899
|
-
|
|
905
|
+
|
|
900
906
|
if (this.sqliteStore) {
|
|
901
907
|
try {
|
|
902
908
|
this.sqliteStore.close();
|
|
903
909
|
} catch {
|
|
904
|
-
|
|
910
|
+
|
|
905
911
|
}
|
|
906
912
|
this.sqliteStore = null;
|
|
907
913
|
}
|
|
@@ -930,7 +936,7 @@ export class EmbeddingsCache {
|
|
|
930
936
|
highWaterMark: this.config.cacheWriteHighWaterMark ?? 256 * 1024,
|
|
931
937
|
floatDigits: this.config.cacheVectorFloatDigits ?? 6,
|
|
932
938
|
flushChars: this.config.cacheVectorFlushChars ?? 256 * 1024,
|
|
933
|
-
indent: '',
|
|
939
|
+
indent: '',
|
|
934
940
|
assumeFinite: this.config.cacheVectorAssumeFinite,
|
|
935
941
|
checkFinite: this.config.cacheVectorCheckFinite,
|
|
936
942
|
noMutation: this.config.cacheVectorNoMutation ?? false,
|
|
@@ -940,7 +946,7 @@ export class EmbeddingsCache {
|
|
|
940
946
|
|
|
941
947
|
await vectorWriter.writeStart();
|
|
942
948
|
|
|
943
|
-
|
|
949
|
+
|
|
944
950
|
const yieldEvery = total >= 50_000 ? 5000 : 0;
|
|
945
951
|
|
|
946
952
|
try {
|
|
@@ -969,7 +975,7 @@ export class EmbeddingsCache {
|
|
|
969
975
|
fs.writeFile(metaFile, JSON.stringify(this.cacheMeta, null, 2)),
|
|
970
976
|
]);
|
|
971
977
|
|
|
972
|
-
|
|
978
|
+
|
|
973
979
|
const callGraphFile = path.join(this.config.cacheDirectory, CALL_GRAPH_FILE);
|
|
974
980
|
if (this.fileCallData.size > 0) {
|
|
975
981
|
await fs.writeFile(
|
|
@@ -980,8 +986,8 @@ export class EmbeddingsCache {
|
|
|
980
986
|
await fs.rm(callGraphFile, { force: true });
|
|
981
987
|
}
|
|
982
988
|
|
|
983
|
-
|
|
984
|
-
|
|
989
|
+
|
|
990
|
+
|
|
985
991
|
if (
|
|
986
992
|
this.config.annIndexCache !== false &&
|
|
987
993
|
this.annPersistDirty &&
|
|
@@ -1007,7 +1013,7 @@ export class EmbeddingsCache {
|
|
|
1007
1013
|
}
|
|
1008
1014
|
} catch (error) {
|
|
1009
1015
|
console.warn('[Cache] Failed to save cache:', error.message);
|
|
1010
|
-
|
|
1016
|
+
|
|
1011
1017
|
if (
|
|
1012
1018
|
this.config.vectorStoreFormat === 'binary' &&
|
|
1013
1019
|
this.binaryStore &&
|
|
@@ -1021,10 +1027,10 @@ export class EmbeddingsCache {
|
|
|
1021
1027
|
console.info('[Cache] Binary store recovered.');
|
|
1022
1028
|
} catch (recoverErr) {
|
|
1023
1029
|
console.warn(`[Cache] Failed to recover binary store: ${recoverErr.message}`);
|
|
1024
|
-
this.binaryStore = null;
|
|
1030
|
+
this.binaryStore = null;
|
|
1025
1031
|
}
|
|
1026
1032
|
}
|
|
1027
|
-
|
|
1033
|
+
|
|
1028
1034
|
if (
|
|
1029
1035
|
this.config.vectorStoreFormat === 'sqlite' &&
|
|
1030
1036
|
!this.sqliteStore
|
|
@@ -1042,11 +1048,11 @@ export class EmbeddingsCache {
|
|
|
1042
1048
|
}
|
|
1043
1049
|
} finally {
|
|
1044
1050
|
this.isSaving = false;
|
|
1045
|
-
this._saveInProgress = false;
|
|
1051
|
+
this._saveInProgress = false;
|
|
1046
1052
|
}
|
|
1047
1053
|
}
|
|
1048
1054
|
|
|
1049
|
-
|
|
1055
|
+
|
|
1050
1056
|
|
|
1051
1057
|
getVectorStore() {
|
|
1052
1058
|
return Array.isArray(this.vectorStore) ? this.vectorStore : [];
|
|
@@ -1067,14 +1073,14 @@ export class EmbeddingsCache {
|
|
|
1067
1073
|
try {
|
|
1068
1074
|
await previousBinaryStore.close();
|
|
1069
1075
|
} catch {
|
|
1070
|
-
|
|
1076
|
+
|
|
1071
1077
|
}
|
|
1072
1078
|
}
|
|
1073
1079
|
if (previousSqliteStore) {
|
|
1074
1080
|
try {
|
|
1075
1081
|
previousSqliteStore.close();
|
|
1076
1082
|
} catch {
|
|
1077
|
-
|
|
1083
|
+
|
|
1078
1084
|
}
|
|
1079
1085
|
}
|
|
1080
1086
|
}
|
|
@@ -1185,7 +1191,7 @@ export class EmbeddingsCache {
|
|
|
1185
1191
|
if (entry) return await this.getChunkContent(entry, chunk);
|
|
1186
1192
|
if (!store && this.binaryStore) {
|
|
1187
1193
|
const content = await this.binaryStore.getContent(chunk);
|
|
1188
|
-
return content ?? '';
|
|
1194
|
+
return content ?? '';
|
|
1189
1195
|
}
|
|
1190
1196
|
if (!store && this.sqliteStore) {
|
|
1191
1197
|
return this.sqliteStore.getContent(chunk) ?? '';
|
|
@@ -1197,12 +1203,12 @@ export class EmbeddingsCache {
|
|
|
1197
1203
|
}
|
|
1198
1204
|
if (this.binaryStore && Number.isInteger(chunk?._binaryIndex)) {
|
|
1199
1205
|
const content = await this.binaryStore.getContent(chunk._binaryIndex);
|
|
1200
|
-
return content ?? '';
|
|
1206
|
+
return content ?? '';
|
|
1201
1207
|
}
|
|
1202
1208
|
const resolved = Number.isInteger(index) ? index : chunk?._index;
|
|
1203
1209
|
if (this.binaryStore && !Array.isArray(this.vectorStore) && Number.isInteger(resolved)) {
|
|
1204
1210
|
const content = await this.binaryStore.getContent(resolved);
|
|
1205
|
-
return content ?? '';
|
|
1211
|
+
return content ?? '';
|
|
1206
1212
|
}
|
|
1207
1213
|
if (this.sqliteStore) {
|
|
1208
1214
|
const sqliteIndex = Number.isInteger(chunk?._sqliteIndex)
|
|
@@ -1221,15 +1227,10 @@ export class EmbeddingsCache {
|
|
|
1221
1227
|
this.fileHashes.delete(file);
|
|
1222
1228
|
}
|
|
1223
1229
|
|
|
1224
|
-
|
|
1225
|
-
* Remove all chunks for a given file from the vector store.
|
|
1226
|
-
* Note: This is async to support future backend-specific cleanup.
|
|
1227
|
-
* For binary/SQLite stores, actual removal happens on next full save.
|
|
1228
|
-
* @param {string} file - Absolute path of file to remove
|
|
1229
|
-
*/
|
|
1230
|
+
|
|
1230
1231
|
async removeFileFromStore(file) {
|
|
1231
1232
|
if (!Array.isArray(this.vectorStore)) return;
|
|
1232
|
-
|
|
1233
|
+
|
|
1233
1234
|
let w = 0;
|
|
1234
1235
|
for (let r = 0; r < this.vectorStore.length; r++) {
|
|
1235
1236
|
const chunk = this.vectorStore[r];
|
|
@@ -1240,10 +1241,10 @@ export class EmbeddingsCache {
|
|
|
1240
1241
|
}
|
|
1241
1242
|
this.vectorStore.length = w;
|
|
1242
1243
|
|
|
1243
|
-
|
|
1244
|
+
|
|
1244
1245
|
this.invalidateAnnIndex();
|
|
1245
1246
|
this.removeFileCallData(file);
|
|
1246
|
-
|
|
1247
|
+
|
|
1247
1248
|
this.fileHashes.delete(file);
|
|
1248
1249
|
}
|
|
1249
1250
|
|
|
@@ -1261,7 +1262,7 @@ export class EmbeddingsCache {
|
|
|
1261
1262
|
this.annVectorCache.push(chunk.vector);
|
|
1262
1263
|
}
|
|
1263
1264
|
|
|
1264
|
-
|
|
1265
|
+
|
|
1265
1266
|
if (
|
|
1266
1267
|
this.annIndex &&
|
|
1267
1268
|
!this.annDirty &&
|
|
@@ -1276,7 +1277,7 @@ export class EmbeddingsCache {
|
|
|
1276
1277
|
this.annPersistDirty = true;
|
|
1277
1278
|
return;
|
|
1278
1279
|
} catch {
|
|
1279
|
-
|
|
1280
|
+
|
|
1280
1281
|
}
|
|
1281
1282
|
}
|
|
1282
1283
|
|
|
@@ -1338,20 +1339,9 @@ export class EmbeddingsCache {
|
|
|
1338
1339
|
};
|
|
1339
1340
|
}
|
|
1340
1341
|
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
* Ensure ANN (Approximate Nearest Neighbor) index is built and ready.
|
|
1345
|
-
* Loads from disk cache if available and valid, otherwise builds a new index.
|
|
1346
|
-
*
|
|
1347
|
-
* @returns {Promise<HierarchicalNSW|null>} The HNSW index, or null if:
|
|
1348
|
-
* - ANN is disabled in config
|
|
1349
|
-
* - vectorStore is not an array
|
|
1350
|
-
* - vectorStore size is below annMinChunks threshold
|
|
1351
|
-
* - hnswlib-node is not available
|
|
1352
|
-
* - Vector dimension mismatch detected
|
|
1353
|
-
* @note This method is safe to call multiple times; concurrent calls share the same promise.
|
|
1354
|
-
*/
|
|
1342
|
+
|
|
1343
|
+
|
|
1344
|
+
|
|
1355
1345
|
async ensureAnnIndex() {
|
|
1356
1346
|
if (!this.config.annEnabled) return null;
|
|
1357
1347
|
if (!Array.isArray(this.vectorStore)) return null;
|
|
@@ -1375,8 +1365,8 @@ export class EmbeddingsCache {
|
|
|
1375
1365
|
this.sqliteStore?.dim;
|
|
1376
1366
|
if (!dim) return null;
|
|
1377
1367
|
|
|
1378
|
-
|
|
1379
|
-
|
|
1368
|
+
|
|
1369
|
+
|
|
1380
1370
|
let dimensionMismatch = false;
|
|
1381
1371
|
const sampleSize = Math.min(ANN_DIMENSION_SAMPLE_SIZE, this.vectorStore.length);
|
|
1382
1372
|
const step = Math.max(1, Math.floor(this.vectorStore.length / sampleSize));
|
|
@@ -1394,7 +1384,7 @@ export class EmbeddingsCache {
|
|
|
1394
1384
|
|
|
1395
1385
|
if (dimensionMismatch) {
|
|
1396
1386
|
this.addInitError('ensureAnnIndex', `Vector dimension inconsistency detected. Expected ${dim}. Full reindex required.`);
|
|
1397
|
-
return null;
|
|
1387
|
+
return null;
|
|
1398
1388
|
}
|
|
1399
1389
|
|
|
1400
1390
|
if (!this.annDirty && this.config.annIndexCache !== false) {
|
|
@@ -1549,16 +1539,7 @@ export class EmbeddingsCache {
|
|
|
1549
1539
|
}
|
|
1550
1540
|
}
|
|
1551
1541
|
|
|
1552
|
-
|
|
1553
|
-
* Query the ANN index for k nearest neighbors.
|
|
1554
|
-
* Falls back gracefully to empty results if ANN is unavailable.
|
|
1555
|
-
*
|
|
1556
|
-
* @param {Float32Array|number[]} queryVector - Normalized query embedding
|
|
1557
|
-
* @param {number} k - Number of neighbors to return
|
|
1558
|
-
* @returns {Promise<number[]>} Array of chunk indices sorted by similarity (may be empty)
|
|
1559
|
-
* @throws Never throws - returns empty array on all error conditions
|
|
1560
|
-
* @note Automatically invalidates corrupted index and falls back to linear search on next query
|
|
1561
|
-
*/
|
|
1542
|
+
|
|
1562
1543
|
async queryAnn(queryVector, k) {
|
|
1563
1544
|
if (!Array.isArray(this.vectorStore) || this.vectorStore.length === 0) return [];
|
|
1564
1545
|
const index = await this.ensureAnnIndex();
|
|
@@ -1566,14 +1547,14 @@ export class EmbeddingsCache {
|
|
|
1566
1547
|
|
|
1567
1548
|
const qVec = queryVector instanceof Float32Array ? queryVector : new Float32Array(queryVector);
|
|
1568
1549
|
|
|
1569
|
-
|
|
1550
|
+
|
|
1570
1551
|
let results;
|
|
1571
1552
|
try {
|
|
1572
1553
|
results = index.searchKnn(qVec, k);
|
|
1573
1554
|
} catch (err) {
|
|
1574
1555
|
console.warn(`[ANN] searchKnn failed: ${err.message}. Falling back to linear search.`);
|
|
1575
1556
|
this.addInitError('queryAnn', err);
|
|
1576
|
-
|
|
1557
|
+
|
|
1577
1558
|
this.invalidateAnnIndex();
|
|
1578
1559
|
return [];
|
|
1579
1560
|
}
|
|
@@ -1599,7 +1580,7 @@ export class EmbeddingsCache {
|
|
|
1599
1580
|
try {
|
|
1600
1581
|
await this.binaryStore.close();
|
|
1601
1582
|
} catch {
|
|
1602
|
-
|
|
1583
|
+
|
|
1603
1584
|
}
|
|
1604
1585
|
}
|
|
1605
1586
|
this.binaryStore = null;
|
|
@@ -1607,7 +1588,7 @@ export class EmbeddingsCache {
|
|
|
1607
1588
|
try {
|
|
1608
1589
|
this.sqliteStore.close();
|
|
1609
1590
|
} catch {
|
|
1610
|
-
|
|
1591
|
+
|
|
1611
1592
|
}
|
|
1612
1593
|
}
|
|
1613
1594
|
this.sqliteStore = null;
|
|
@@ -1623,12 +1604,7 @@ export class EmbeddingsCache {
|
|
|
1623
1604
|
}
|
|
1624
1605
|
}
|
|
1625
1606
|
|
|
1626
|
-
|
|
1627
|
-
* Adjust efSearch at runtime for speed/accuracy tradeoff.
|
|
1628
|
-
* Higher values = more accurate but slower.
|
|
1629
|
-
* @param {number} efSearch - New efSearch value (typically 16-512)
|
|
1630
|
-
* @returns {object} Result with success status and current config
|
|
1631
|
-
*/
|
|
1607
|
+
|
|
1632
1608
|
setEfSearch(efSearch) {
|
|
1633
1609
|
if (typeof efSearch !== 'number' || efSearch < 1 || efSearch > 1000) {
|
|
1634
1610
|
return {
|
|
@@ -1655,10 +1631,7 @@ export class EmbeddingsCache {
|
|
|
1655
1631
|
return { success: true, applied: false, efSearch };
|
|
1656
1632
|
}
|
|
1657
1633
|
|
|
1658
|
-
|
|
1659
|
-
* Get current ANN index statistics for diagnostics.
|
|
1660
|
-
* @returns {object} ANN stats including index state, config, and vector count
|
|
1661
|
-
*/
|
|
1634
|
+
|
|
1662
1635
|
getAnnStats() {
|
|
1663
1636
|
return {
|
|
1664
1637
|
enabled: this.config.annEnabled ?? false,
|
|
@@ -1679,7 +1652,7 @@ export class EmbeddingsCache {
|
|
|
1679
1652
|
};
|
|
1680
1653
|
}
|
|
1681
1654
|
|
|
1682
|
-
|
|
1655
|
+
|
|
1683
1656
|
|
|
1684
1657
|
async clearCallGraphData({ removeFile = false } = {}) {
|
|
1685
1658
|
this.fileCallData.clear();
|
|
@@ -1728,20 +1701,13 @@ export class EmbeddingsCache {
|
|
|
1728
1701
|
return this.fileCallData.size;
|
|
1729
1702
|
}
|
|
1730
1703
|
|
|
1731
|
-
|
|
1732
|
-
* Sets call data for a specific file.
|
|
1733
|
-
* @param {string} file
|
|
1734
|
-
* @param {object} data
|
|
1735
|
-
*/
|
|
1704
|
+
|
|
1736
1705
|
setFileCallData(file, data) {
|
|
1737
1706
|
this.fileCallData.set(file, data);
|
|
1738
1707
|
this.callGraph = null;
|
|
1739
1708
|
}
|
|
1740
1709
|
|
|
1741
|
-
|
|
1742
|
-
* Sets the entire file call data map.
|
|
1743
|
-
* @param {Map<string, object>|object} entries
|
|
1744
|
-
*/
|
|
1710
|
+
|
|
1745
1711
|
setFileCallDataEntries(entries) {
|
|
1746
1712
|
if (entries instanceof Map) {
|
|
1747
1713
|
this.fileCallData = entries;
|
|
@@ -1808,12 +1774,9 @@ export class EmbeddingsCache {
|
|
|
1808
1774
|
};
|
|
1809
1775
|
}
|
|
1810
1776
|
|
|
1811
|
-
|
|
1777
|
+
|
|
1812
1778
|
|
|
1813
|
-
|
|
1814
|
-
* Returns the total number of chunks in the store.
|
|
1815
|
-
* @returns {number}
|
|
1816
|
-
*/
|
|
1779
|
+
|
|
1817
1780
|
getStoreSize() {
|
|
1818
1781
|
if (Array.isArray(this.vectorStore)) return this.vectorStore.length;
|
|
1819
1782
|
if (this.binaryStore) return this.binaryStore.length;
|
|
@@ -1821,20 +1784,12 @@ export class EmbeddingsCache {
|
|
|
1821
1784
|
return 0;
|
|
1822
1785
|
}
|
|
1823
1786
|
|
|
1824
|
-
|
|
1825
|
-
* Retrieves a vector by its store index.
|
|
1826
|
-
* @param {number} index
|
|
1827
|
-
* @returns {Float32Array|null}
|
|
1828
|
-
*/
|
|
1787
|
+
|
|
1829
1788
|
getVector(index) {
|
|
1830
1789
|
return this.getChunkVector(index);
|
|
1831
1790
|
}
|
|
1832
1791
|
|
|
1833
|
-
|
|
1834
|
-
* Retrieves a chunk object by its store index.
|
|
1835
|
-
* @param {number} index
|
|
1836
|
-
* @returns {object|null}
|
|
1837
|
-
*/
|
|
1792
|
+
|
|
1838
1793
|
getChunk(index) {
|
|
1839
1794
|
if (Array.isArray(this.vectorStore) && index >= 0 && index < this.vectorStore.length) {
|
|
1840
1795
|
return this.vectorStore[index];
|