@softerist/heuristic-mcp 3.0.12 → 3.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -56
- package/config.jsonc +173 -102
- package/index.js +69 -57
- package/lib/cache.js +55 -26
- package/lib/config.js +528 -79
- package/lib/constants.js +27 -0
- package/lib/embed-query-process.js +7 -6
- package/lib/embedding-process.js +113 -27
- package/lib/embedding-worker.js +299 -180
- package/lib/project-detector.js +1 -1
- package/lib/vector-store-binary.js +64 -55
- package/lib/vector-store-sqlite.js +83 -73
- package/package.json +1 -1
|
@@ -433,7 +433,18 @@ export class BinaryVectorStore {
|
|
|
433
433
|
return map;
|
|
434
434
|
}
|
|
435
435
|
|
|
436
|
-
static async write(
|
|
436
|
+
static async write(
|
|
437
|
+
cacheDir,
|
|
438
|
+
chunks,
|
|
439
|
+
{
|
|
440
|
+
contentCacheEntries,
|
|
441
|
+
vectorCacheEntries,
|
|
442
|
+
vectorLoadMode,
|
|
443
|
+
getContent,
|
|
444
|
+
getVector,
|
|
445
|
+
preRename,
|
|
446
|
+
} = {}
|
|
447
|
+
) {
|
|
437
448
|
ensureLittleEndian();
|
|
438
449
|
const { vectorsPath, recordsPath, contentPath, filesPath } =
|
|
439
450
|
BinaryVectorStore.getPaths(cacheDir);
|
|
@@ -446,8 +457,6 @@ export class BinaryVectorStore {
|
|
|
446
457
|
|
|
447
458
|
const fileIds = new Map();
|
|
448
459
|
const files = [];
|
|
449
|
-
let dim = null;
|
|
450
|
-
|
|
451
460
|
const denseChunks = [];
|
|
452
461
|
const denseSourceIndices = [];
|
|
453
462
|
for (let i = 0; i < chunks.length; i += 1) {
|
|
@@ -457,43 +466,56 @@ export class BinaryVectorStore {
|
|
|
457
466
|
denseSourceIndices.push(i);
|
|
458
467
|
}
|
|
459
468
|
|
|
460
|
-
const
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
files.push(file);
|
|
469
|
+
const resolveVector = async (chunk, sourceIndex) => {
|
|
470
|
+
let vectorSource = chunk.vector;
|
|
471
|
+
if (
|
|
472
|
+
(vectorSource === undefined || vectorSource === null) &&
|
|
473
|
+
typeof getVector === 'function'
|
|
474
|
+
) {
|
|
475
|
+
vectorSource = getVector(chunk, sourceIndex);
|
|
476
|
+
if (vectorSource && typeof vectorSource.then === 'function') {
|
|
477
|
+
vectorSource = await vectorSource;
|
|
478
|
+
}
|
|
471
479
|
}
|
|
472
|
-
|
|
473
|
-
if (chunk.vector === undefined || chunk.vector === null) {
|
|
480
|
+
if (vectorSource === undefined || vectorSource === null) {
|
|
474
481
|
throw new Error(`Missing vector data for binary cache write at index ${sourceIndex}`);
|
|
475
482
|
}
|
|
476
483
|
const vector =
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
484
|
+
vectorSource instanceof Float32Array
|
|
485
|
+
? vectorSource
|
|
486
|
+
: ArrayBuffer.isView(vectorSource)
|
|
487
|
+
? Float32Array.from(vectorSource)
|
|
488
|
+
: new Float32Array(vectorSource);
|
|
489
|
+
if (!vector || vector.length === 0) {
|
|
482
490
|
throw new Error(`Empty vector data for binary cache write at index ${sourceIndex}`);
|
|
483
491
|
}
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
} else if (vector.length !== dim) {
|
|
487
|
-
throw new Error('Vector dimension mismatch in binary cache write');
|
|
488
|
-
}
|
|
492
|
+
return vector;
|
|
493
|
+
};
|
|
489
494
|
|
|
495
|
+
const resolveContent = async (chunk, sourceIndex) => {
|
|
490
496
|
const contentSource =
|
|
491
497
|
chunk.content !== undefined && chunk.content !== null
|
|
492
498
|
? chunk.content
|
|
493
499
|
: getContent
|
|
494
500
|
? await getContent(chunk, sourceIndex)
|
|
495
501
|
: '';
|
|
496
|
-
|
|
502
|
+
return normalizeContent(contentSource);
|
|
503
|
+
};
|
|
504
|
+
|
|
505
|
+
const recordEntries = new Array(denseChunks.length);
|
|
506
|
+
let contentOffset = 0;
|
|
507
|
+
|
|
508
|
+
for (let i = 0; i < denseChunks.length; i += 1) {
|
|
509
|
+
const chunk = denseChunks[i];
|
|
510
|
+
const sourceIndex = denseSourceIndices[i];
|
|
511
|
+
|
|
512
|
+
const file = chunk.file;
|
|
513
|
+
if (!fileIds.has(file)) {
|
|
514
|
+
fileIds.set(file, files.length);
|
|
515
|
+
files.push(file);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
const contentValue = await resolveContent(chunk, sourceIndex);
|
|
497
519
|
const contentLength = Buffer.byteLength(contentValue, 'utf-8');
|
|
498
520
|
|
|
499
521
|
recordEntries[i] = {
|
|
@@ -502,14 +524,14 @@ export class BinaryVectorStore {
|
|
|
502
524
|
endLine: chunk.endLine ?? 0,
|
|
503
525
|
contentOffset,
|
|
504
526
|
contentLength,
|
|
505
|
-
vector,
|
|
506
527
|
};
|
|
507
528
|
|
|
508
529
|
contentOffset += contentLength;
|
|
509
530
|
}
|
|
510
531
|
|
|
511
|
-
if (!dim) dim = 0;
|
|
512
532
|
const count = denseChunks.length;
|
|
533
|
+
const dim =
|
|
534
|
+
count > 0 ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length : 0;
|
|
513
535
|
|
|
514
536
|
await fs.writeFile(filesTmp, JSON.stringify(files));
|
|
515
537
|
|
|
@@ -555,25 +577,23 @@ export class BinaryVectorStore {
|
|
|
555
577
|
await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
|
|
556
578
|
recordPos += recordBuffer.length;
|
|
557
579
|
|
|
580
|
+
const chunk = denseChunks[i];
|
|
581
|
+
const sourceIndex = denseSourceIndices[i];
|
|
582
|
+
const vector = await resolveVector(chunk, sourceIndex);
|
|
583
|
+
if (vector.length !== dim) {
|
|
584
|
+
throw new Error('Vector dimension mismatch in binary cache write');
|
|
585
|
+
}
|
|
558
586
|
const vectorBuffer = Buffer.from(
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
587
|
+
vector.buffer,
|
|
588
|
+
vector.byteOffset,
|
|
589
|
+
vector.byteLength
|
|
562
590
|
);
|
|
563
591
|
await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
|
|
564
592
|
vectorPos += vectorBuffer.length;
|
|
565
593
|
|
|
566
594
|
if (entry.contentLength > 0) {
|
|
567
595
|
// Re-fetch content to avoid holding all strings in memory
|
|
568
|
-
const
|
|
569
|
-
const sourceIndex = denseSourceIndices[i];
|
|
570
|
-
const contentSource =
|
|
571
|
-
chunk.content !== undefined && chunk.content !== null
|
|
572
|
-
? chunk.content
|
|
573
|
-
: getContent
|
|
574
|
-
? await getContent(chunk, sourceIndex)
|
|
575
|
-
: '';
|
|
576
|
-
const val = normalizeContent(contentSource);
|
|
596
|
+
const val = await resolveContent(chunk, sourceIndex);
|
|
577
597
|
const contentBuffer = Buffer.from(val, 'utf-8');
|
|
578
598
|
await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
|
|
579
599
|
contentPos += contentBuffer.length;
|
|
@@ -598,21 +618,10 @@ export class BinaryVectorStore {
|
|
|
598
618
|
renameWithRetry(filesTmp, filesPath),
|
|
599
619
|
]);
|
|
600
620
|
|
|
601
|
-
|
|
602
|
-
fs.readFile(vectorsPath),
|
|
603
|
-
fs.readFile(recordsPath),
|
|
604
|
-
]);
|
|
605
|
-
const contentReadHandle = await fs.open(contentPath, 'r');
|
|
606
|
-
|
|
607
|
-
return new BinaryVectorStore({
|
|
608
|
-
vectorsBuffer,
|
|
609
|
-
recordsBuffer,
|
|
610
|
-
contentHandle: contentReadHandle,
|
|
611
|
-
contentSize: contentOffset,
|
|
612
|
-
files,
|
|
613
|
-
dim,
|
|
614
|
-
count,
|
|
621
|
+
return BinaryVectorStore.load(cacheDir, {
|
|
615
622
|
contentCacheEntries,
|
|
623
|
+
vectorCacheEntries,
|
|
624
|
+
vectorLoadMode,
|
|
616
625
|
});
|
|
617
626
|
}
|
|
618
627
|
}
|
|
@@ -264,7 +264,7 @@ export class SqliteVectorStore {
|
|
|
264
264
|
* @param {Array} chunks - Array of chunk objects with vector, file, startLine, endLine, content
|
|
265
265
|
* @param {Object} options - { getContent, preRename }
|
|
266
266
|
*/
|
|
267
|
-
static async write(cacheDir, chunks, { getContent, preRename } = {}) {
|
|
267
|
+
static async write(cacheDir, chunks, { getContent, getVector, preRename } = {}) {
|
|
268
268
|
if (!chunks || chunks.length === 0) {
|
|
269
269
|
return null;
|
|
270
270
|
}
|
|
@@ -286,10 +286,10 @@ export class SqliteVectorStore {
|
|
|
286
286
|
denseSourceIndices.push(i);
|
|
287
287
|
}
|
|
288
288
|
|
|
289
|
-
const resolveContent = async (chunk, sourceIndex) => {
|
|
290
|
-
if (chunk.content !== undefined && chunk.content !== null) {
|
|
291
|
-
return chunk.content;
|
|
292
|
-
}
|
|
289
|
+
const resolveContent = async (chunk, sourceIndex) => {
|
|
290
|
+
if (chunk.content !== undefined && chunk.content !== null) {
|
|
291
|
+
return chunk.content;
|
|
292
|
+
}
|
|
293
293
|
if (typeof getContent === 'function') {
|
|
294
294
|
const value = getContent(chunk, sourceIndex);
|
|
295
295
|
if (value && typeof value.then === 'function') {
|
|
@@ -297,46 +297,39 @@ export class SqliteVectorStore {
|
|
|
297
297
|
}
|
|
298
298
|
return value;
|
|
299
299
|
}
|
|
300
|
-
return null;
|
|
301
|
-
};
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
endLine: chunk.endLine ?? 0,
|
|
334
|
-
content,
|
|
335
|
-
vectorBlob: Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength),
|
|
336
|
-
};
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
if (!dim) dim = 0;
|
|
300
|
+
return null;
|
|
301
|
+
};
|
|
302
|
+
|
|
303
|
+
const resolveVector = async (chunk, sourceIndex) => {
|
|
304
|
+
let vectorSource = chunk.vector;
|
|
305
|
+
if (
|
|
306
|
+
(vectorSource === undefined || vectorSource === null) &&
|
|
307
|
+
typeof getVector === 'function'
|
|
308
|
+
) {
|
|
309
|
+
vectorSource = getVector(chunk, sourceIndex);
|
|
310
|
+
if (vectorSource && typeof vectorSource.then === 'function') {
|
|
311
|
+
vectorSource = await vectorSource;
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
if (vectorSource === undefined || vectorSource === null) {
|
|
315
|
+
throw new Error(`Missing vector data for sqlite cache write at index ${sourceIndex}`);
|
|
316
|
+
}
|
|
317
|
+
const vector =
|
|
318
|
+
vectorSource instanceof Float32Array
|
|
319
|
+
? vectorSource
|
|
320
|
+
: ArrayBuffer.isView(vectorSource)
|
|
321
|
+
? Float32Array.from(vectorSource)
|
|
322
|
+
: Float32Array.from(vectorSource);
|
|
323
|
+
if (!vector || vector.length === 0) {
|
|
324
|
+
throw new Error(`Empty vector data for sqlite cache write at index ${sourceIndex}`);
|
|
325
|
+
}
|
|
326
|
+
return vector;
|
|
327
|
+
};
|
|
328
|
+
|
|
329
|
+
const dim =
|
|
330
|
+
denseChunks.length > 0
|
|
331
|
+
? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length
|
|
332
|
+
: 0;
|
|
340
333
|
|
|
341
334
|
// Create new database
|
|
342
335
|
const db = new Database(writePath);
|
|
@@ -371,34 +364,51 @@ export class SqliteVectorStore {
|
|
|
371
364
|
CREATE INDEX idx_chunks_file ON chunks(file);
|
|
372
365
|
`);
|
|
373
366
|
|
|
374
|
-
// Insert metadata
|
|
375
|
-
const insertMeta = db.prepare(`INSERT INTO metadata (key, value) VALUES (?, ?)`);
|
|
376
|
-
insertMeta.run('version', String(STORE_VERSION));
|
|
377
|
-
insertMeta.run('dim', String(dim));
|
|
378
|
-
insertMeta.run('count', String(
|
|
379
|
-
insertMeta.run('createdAt', new Date().toISOString());
|
|
380
|
-
|
|
381
|
-
// Insert chunks in a transaction for speed
|
|
382
|
-
const insertChunk = db.prepare(`
|
|
383
|
-
INSERT INTO chunks (id, file, startLine, endLine, content, vector)
|
|
384
|
-
VALUES (?, ?, ?, ?, ?, ?)
|
|
385
|
-
`);
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
);
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
367
|
+
// Insert metadata
|
|
368
|
+
const insertMeta = db.prepare(`INSERT INTO metadata (key, value) VALUES (?, ?)`);
|
|
369
|
+
insertMeta.run('version', String(STORE_VERSION));
|
|
370
|
+
insertMeta.run('dim', String(dim));
|
|
371
|
+
insertMeta.run('count', String(denseChunks.length));
|
|
372
|
+
insertMeta.run('createdAt', new Date().toISOString());
|
|
373
|
+
|
|
374
|
+
// Insert chunks in a transaction for speed without pre-materializing all vectors/content.
|
|
375
|
+
const insertChunk = db.prepare(`
|
|
376
|
+
INSERT INTO chunks (id, file, startLine, endLine, content, vector)
|
|
377
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
378
|
+
`);
|
|
379
|
+
|
|
380
|
+
db.exec('BEGIN');
|
|
381
|
+
try {
|
|
382
|
+
for (let i = 0; i < denseChunks.length; i += 1) {
|
|
383
|
+
const chunk = denseChunks[i];
|
|
384
|
+
const sourceIndex = denseSourceIndices[i];
|
|
385
|
+
const vector = await resolveVector(chunk, sourceIndex);
|
|
386
|
+
if (vector.length !== dim) {
|
|
387
|
+
throw new Error('Vector dimension mismatch in sqlite cache write');
|
|
388
|
+
}
|
|
389
|
+
let content = await resolveContent(chunk, sourceIndex);
|
|
390
|
+
if (content === undefined) content = null;
|
|
391
|
+
if (content !== null && typeof content !== 'string') {
|
|
392
|
+
content = String(content);
|
|
393
|
+
}
|
|
394
|
+
insertChunk.run(
|
|
395
|
+
i,
|
|
396
|
+
chunk.file,
|
|
397
|
+
chunk.startLine ?? 0,
|
|
398
|
+
chunk.endLine ?? 0,
|
|
399
|
+
content,
|
|
400
|
+
Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength)
|
|
401
|
+
);
|
|
402
|
+
}
|
|
403
|
+
db.exec('COMMIT');
|
|
404
|
+
} catch (error) {
|
|
405
|
+
try {
|
|
406
|
+
db.exec('ROLLBACK');
|
|
407
|
+
} catch {
|
|
408
|
+
// ignore rollback errors
|
|
409
|
+
}
|
|
410
|
+
throw error;
|
|
411
|
+
}
|
|
402
412
|
|
|
403
413
|
// Optimize the database
|
|
404
414
|
db.exec('ANALYZE');
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@softerist/heuristic-mcp",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.14",
|
|
4
4
|
"description": "An enhanced MCP server providing intelligent semantic code search with find-similar-code, recency ranking, and improved chunking. Fork of smart-coding-mcp.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "index.js",
|