@softerist/heuristic-mcp 3.0.12 → 3.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -433,7 +433,18 @@ export class BinaryVectorStore {
433
433
  return map;
434
434
  }
435
435
 
436
- static async write(cacheDir, chunks, { contentCacheEntries, getContent, preRename } = {}) {
436
+ static async write(
437
+ cacheDir,
438
+ chunks,
439
+ {
440
+ contentCacheEntries,
441
+ vectorCacheEntries,
442
+ vectorLoadMode,
443
+ getContent,
444
+ getVector,
445
+ preRename,
446
+ } = {}
447
+ ) {
437
448
  ensureLittleEndian();
438
449
  const { vectorsPath, recordsPath, contentPath, filesPath } =
439
450
  BinaryVectorStore.getPaths(cacheDir);
@@ -446,8 +457,6 @@ export class BinaryVectorStore {
446
457
 
447
458
  const fileIds = new Map();
448
459
  const files = [];
449
- let dim = null;
450
-
451
460
  const denseChunks = [];
452
461
  const denseSourceIndices = [];
453
462
  for (let i = 0; i < chunks.length; i += 1) {
@@ -457,43 +466,56 @@ export class BinaryVectorStore {
457
466
  denseSourceIndices.push(i);
458
467
  }
459
468
 
460
- const recordEntries = new Array(denseChunks.length);
461
- let contentOffset = 0;
462
-
463
- for (let i = 0; i < denseChunks.length; i += 1) {
464
- const chunk = denseChunks[i];
465
- const sourceIndex = denseSourceIndices[i];
466
-
467
- const file = chunk.file;
468
- if (!fileIds.has(file)) {
469
- fileIds.set(file, files.length);
470
- files.push(file);
469
+ const resolveVector = async (chunk, sourceIndex) => {
470
+ let vectorSource = chunk.vector;
471
+ if (
472
+ (vectorSource === undefined || vectorSource === null) &&
473
+ typeof getVector === 'function'
474
+ ) {
475
+ vectorSource = getVector(chunk, sourceIndex);
476
+ if (vectorSource && typeof vectorSource.then === 'function') {
477
+ vectorSource = await vectorSource;
478
+ }
471
479
  }
472
-
473
- if (chunk.vector === undefined || chunk.vector === null) {
480
+ if (vectorSource === undefined || vectorSource === null) {
474
481
  throw new Error(`Missing vector data for binary cache write at index ${sourceIndex}`);
475
482
  }
476
483
  const vector =
477
- chunk.vector instanceof Float32Array ? chunk.vector : new Float32Array(chunk.vector);
478
- if (!vector) {
479
- throw new Error('Missing vector data for binary cache write');
480
- }
481
- if (vector.length === 0) {
484
+ vectorSource instanceof Float32Array
485
+ ? vectorSource
486
+ : ArrayBuffer.isView(vectorSource)
487
+ ? Float32Array.from(vectorSource)
488
+ : new Float32Array(vectorSource);
489
+ if (!vector || vector.length === 0) {
482
490
  throw new Error(`Empty vector data for binary cache write at index ${sourceIndex}`);
483
491
  }
484
- if (dim === null) {
485
- dim = vector.length;
486
- } else if (vector.length !== dim) {
487
- throw new Error('Vector dimension mismatch in binary cache write');
488
- }
492
+ return vector;
493
+ };
489
494
 
495
+ const resolveContent = async (chunk, sourceIndex) => {
490
496
  const contentSource =
491
497
  chunk.content !== undefined && chunk.content !== null
492
498
  ? chunk.content
493
499
  : getContent
494
500
  ? await getContent(chunk, sourceIndex)
495
501
  : '';
496
- const contentValue = normalizeContent(contentSource);
502
+ return normalizeContent(contentSource);
503
+ };
504
+
505
+ const recordEntries = new Array(denseChunks.length);
506
+ let contentOffset = 0;
507
+
508
+ for (let i = 0; i < denseChunks.length; i += 1) {
509
+ const chunk = denseChunks[i];
510
+ const sourceIndex = denseSourceIndices[i];
511
+
512
+ const file = chunk.file;
513
+ if (!fileIds.has(file)) {
514
+ fileIds.set(file, files.length);
515
+ files.push(file);
516
+ }
517
+
518
+ const contentValue = await resolveContent(chunk, sourceIndex);
497
519
  const contentLength = Buffer.byteLength(contentValue, 'utf-8');
498
520
 
499
521
  recordEntries[i] = {
@@ -502,14 +524,14 @@ export class BinaryVectorStore {
502
524
  endLine: chunk.endLine ?? 0,
503
525
  contentOffset,
504
526
  contentLength,
505
- vector,
506
527
  };
507
528
 
508
529
  contentOffset += contentLength;
509
530
  }
510
531
 
511
- if (!dim) dim = 0;
512
532
  const count = denseChunks.length;
533
+ const dim =
534
+ count > 0 ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length : 0;
513
535
 
514
536
  await fs.writeFile(filesTmp, JSON.stringify(files));
515
537
 
@@ -555,25 +577,23 @@ export class BinaryVectorStore {
555
577
  await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
556
578
  recordPos += recordBuffer.length;
557
579
 
580
+ const chunk = denseChunks[i];
581
+ const sourceIndex = denseSourceIndices[i];
582
+ const vector = await resolveVector(chunk, sourceIndex);
583
+ if (vector.length !== dim) {
584
+ throw new Error('Vector dimension mismatch in binary cache write');
585
+ }
558
586
  const vectorBuffer = Buffer.from(
559
- entry.vector.buffer,
560
- entry.vector.byteOffset,
561
- entry.vector.byteLength
587
+ vector.buffer,
588
+ vector.byteOffset,
589
+ vector.byteLength
562
590
  );
563
591
  await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
564
592
  vectorPos += vectorBuffer.length;
565
593
 
566
594
  if (entry.contentLength > 0) {
567
595
  // Re-fetch content to avoid holding all strings in memory
568
- const chunk = denseChunks[i];
569
- const sourceIndex = denseSourceIndices[i];
570
- const contentSource =
571
- chunk.content !== undefined && chunk.content !== null
572
- ? chunk.content
573
- : getContent
574
- ? await getContent(chunk, sourceIndex)
575
- : '';
576
- const val = normalizeContent(contentSource);
596
+ const val = await resolveContent(chunk, sourceIndex);
577
597
  const contentBuffer = Buffer.from(val, 'utf-8');
578
598
  await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
579
599
  contentPos += contentBuffer.length;
@@ -598,21 +618,10 @@ export class BinaryVectorStore {
598
618
  renameWithRetry(filesTmp, filesPath),
599
619
  ]);
600
620
 
601
- const [vectorsBuffer, recordsBuffer] = await Promise.all([
602
- fs.readFile(vectorsPath),
603
- fs.readFile(recordsPath),
604
- ]);
605
- const contentReadHandle = await fs.open(contentPath, 'r');
606
-
607
- return new BinaryVectorStore({
608
- vectorsBuffer,
609
- recordsBuffer,
610
- contentHandle: contentReadHandle,
611
- contentSize: contentOffset,
612
- files,
613
- dim,
614
- count,
621
+ return BinaryVectorStore.load(cacheDir, {
615
622
  contentCacheEntries,
623
+ vectorCacheEntries,
624
+ vectorLoadMode,
616
625
  });
617
626
  }
618
627
  }
@@ -264,7 +264,7 @@ export class SqliteVectorStore {
264
264
  * @param {Array} chunks - Array of chunk objects with vector, file, startLine, endLine, content
265
265
  * @param {Object} options - { getContent, preRename }
266
266
  */
267
- static async write(cacheDir, chunks, { getContent, preRename } = {}) {
267
+ static async write(cacheDir, chunks, { getContent, getVector, preRename } = {}) {
268
268
  if (!chunks || chunks.length === 0) {
269
269
  return null;
270
270
  }
@@ -286,10 +286,10 @@ export class SqliteVectorStore {
286
286
  denseSourceIndices.push(i);
287
287
  }
288
288
 
289
- const resolveContent = async (chunk, sourceIndex) => {
290
- if (chunk.content !== undefined && chunk.content !== null) {
291
- return chunk.content;
292
- }
289
+ const resolveContent = async (chunk, sourceIndex) => {
290
+ if (chunk.content !== undefined && chunk.content !== null) {
291
+ return chunk.content;
292
+ }
293
293
  if (typeof getContent === 'function') {
294
294
  const value = getContent(chunk, sourceIndex);
295
295
  if (value && typeof value.then === 'function') {
@@ -297,46 +297,39 @@ export class SqliteVectorStore {
297
297
  }
298
298
  return value;
299
299
  }
300
- return null;
301
- };
302
-
303
- let dim = null;
304
- const resolvedData = new Array(denseChunks.length);
305
- for (let i = 0; i < denseChunks.length; i += 1) {
306
- const chunk = denseChunks[i];
307
- const sourceIndex = denseSourceIndices[i];
308
-
309
- if (chunk.vector === undefined || chunk.vector === null) {
310
- throw new Error(`Missing vector data for sqlite cache write at index ${sourceIndex}`);
311
- }
312
-
313
- const vector =
314
- chunk.vector instanceof Float32Array ? chunk.vector : Float32Array.from(chunk.vector);
315
- if (!vector || vector.length === 0) {
316
- throw new Error(`Empty vector data for sqlite cache write at index ${sourceIndex}`);
317
- }
318
- if (dim === null) {
319
- dim = vector.length;
320
- } else if (vector.length !== dim) {
321
- throw new Error('Vector dimension mismatch in sqlite cache write');
322
- }
323
-
324
- let content = await resolveContent(chunk, sourceIndex);
325
- if (content === undefined) content = null;
326
- if (content !== null && typeof content !== 'string') {
327
- content = String(content);
328
- }
329
-
330
- resolvedData[i] = {
331
- file: chunk.file,
332
- startLine: chunk.startLine ?? 0,
333
- endLine: chunk.endLine ?? 0,
334
- content,
335
- vectorBlob: Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength),
336
- };
337
- }
338
-
339
- if (!dim) dim = 0;
300
+ return null;
301
+ };
302
+
303
+ const resolveVector = async (chunk, sourceIndex) => {
304
+ let vectorSource = chunk.vector;
305
+ if (
306
+ (vectorSource === undefined || vectorSource === null) &&
307
+ typeof getVector === 'function'
308
+ ) {
309
+ vectorSource = getVector(chunk, sourceIndex);
310
+ if (vectorSource && typeof vectorSource.then === 'function') {
311
+ vectorSource = await vectorSource;
312
+ }
313
+ }
314
+ if (vectorSource === undefined || vectorSource === null) {
315
+ throw new Error(`Missing vector data for sqlite cache write at index ${sourceIndex}`);
316
+ }
317
+ const vector =
318
+ vectorSource instanceof Float32Array
319
+ ? vectorSource
320
+ : ArrayBuffer.isView(vectorSource)
321
+ ? Float32Array.from(vectorSource)
322
+ : Float32Array.from(vectorSource);
323
+ if (!vector || vector.length === 0) {
324
+ throw new Error(`Empty vector data for sqlite cache write at index ${sourceIndex}`);
325
+ }
326
+ return vector;
327
+ };
328
+
329
+ const dim =
330
+ denseChunks.length > 0
331
+ ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length
332
+ : 0;
340
333
 
341
334
  // Create new database
342
335
  const db = new Database(writePath);
@@ -371,34 +364,51 @@ export class SqliteVectorStore {
371
364
  CREATE INDEX idx_chunks_file ON chunks(file);
372
365
  `);
373
366
 
374
- // Insert metadata
375
- const insertMeta = db.prepare(`INSERT INTO metadata (key, value) VALUES (?, ?)`);
376
- insertMeta.run('version', String(STORE_VERSION));
377
- insertMeta.run('dim', String(dim));
378
- insertMeta.run('count', String(resolvedData.length));
379
- insertMeta.run('createdAt', new Date().toISOString());
380
-
381
- // Insert chunks in a transaction for speed
382
- const insertChunk = db.prepare(`
383
- INSERT INTO chunks (id, file, startLine, endLine, content, vector)
384
- VALUES (?, ?, ?, ?, ?, ?)
385
- `);
386
-
387
- const insertMany = db.transaction((items) => {
388
- for (let i = 0; i < items.length; i++) {
389
- const item = items[i];
390
- insertChunk.run(
391
- i,
392
- item.file,
393
- item.startLine,
394
- item.endLine,
395
- item.content,
396
- item.vectorBlob
397
- );
398
- }
399
- });
400
-
401
- insertMany(resolvedData);
367
+ // Insert metadata
368
+ const insertMeta = db.prepare(`INSERT INTO metadata (key, value) VALUES (?, ?)`);
369
+ insertMeta.run('version', String(STORE_VERSION));
370
+ insertMeta.run('dim', String(dim));
371
+ insertMeta.run('count', String(denseChunks.length));
372
+ insertMeta.run('createdAt', new Date().toISOString());
373
+
374
+ // Insert chunks in a transaction for speed without pre-materializing all vectors/content.
375
+ const insertChunk = db.prepare(`
376
+ INSERT INTO chunks (id, file, startLine, endLine, content, vector)
377
+ VALUES (?, ?, ?, ?, ?, ?)
378
+ `);
379
+
380
+ db.exec('BEGIN');
381
+ try {
382
+ for (let i = 0; i < denseChunks.length; i += 1) {
383
+ const chunk = denseChunks[i];
384
+ const sourceIndex = denseSourceIndices[i];
385
+ const vector = await resolveVector(chunk, sourceIndex);
386
+ if (vector.length !== dim) {
387
+ throw new Error('Vector dimension mismatch in sqlite cache write');
388
+ }
389
+ let content = await resolveContent(chunk, sourceIndex);
390
+ if (content === undefined) content = null;
391
+ if (content !== null && typeof content !== 'string') {
392
+ content = String(content);
393
+ }
394
+ insertChunk.run(
395
+ i,
396
+ chunk.file,
397
+ chunk.startLine ?? 0,
398
+ chunk.endLine ?? 0,
399
+ content,
400
+ Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength)
401
+ );
402
+ }
403
+ db.exec('COMMIT');
404
+ } catch (error) {
405
+ try {
406
+ db.exec('ROLLBACK');
407
+ } catch {
408
+ // ignore rollback errors
409
+ }
410
+ throw error;
411
+ }
402
412
 
403
413
  // Optimize the database
404
414
  db.exec('ANALYZE');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@softerist/heuristic-mcp",
3
- "version": "3.0.12",
3
+ "version": "3.0.14",
4
4
  "description": "An enhanced MCP server providing intelligent semantic code search with find-similar-code, recency ranking, and improved chunking. Fork of smart-coding-mcp.",
5
5
  "type": "module",
6
6
  "main": "index.js",