@softerist/heuristic-mcp 3.0.15 → 3.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +104 -104
  2. package/config.jsonc +173 -173
  3. package/features/ann-config.js +131 -0
  4. package/features/clear-cache.js +84 -0
  5. package/features/find-similar-code.js +291 -0
  6. package/features/hybrid-search.js +544 -0
  7. package/features/index-codebase.js +3268 -0
  8. package/features/lifecycle.js +1189 -0
  9. package/features/package-version.js +302 -0
  10. package/features/register.js +408 -0
  11. package/features/resources.js +156 -0
  12. package/features/set-workspace.js +265 -0
  13. package/index.js +96 -96
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +565 -565
  16. package/lib/cache.js +1870 -1870
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +1 -1
  19. package/lib/config.js +517 -517
  20. package/lib/constants.js +39 -39
  21. package/lib/embed-query-process.js +7 -7
  22. package/lib/embedding-process.js +7 -7
  23. package/lib/embedding-worker.js +299 -299
  24. package/lib/ignore-patterns.js +316 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +337 -337
  27. package/lib/logging.js +164 -164
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +193 -193
  30. package/lib/project-detector.js +84 -84
  31. package/lib/server-lifecycle.js +165 -165
  32. package/lib/settings-editor.js +754 -754
  33. package/lib/tokenizer.js +256 -256
  34. package/lib/utils.js +428 -428
  35. package/lib/vector-store-binary.js +627 -627
  36. package/lib/vector-store-sqlite.js +95 -95
  37. package/lib/workspace-env.js +28 -28
  38. package/mcp_config.json +9 -9
  39. package/package.json +86 -75
  40. package/scripts/clear-cache.js +20 -0
  41. package/scripts/download-model.js +43 -0
  42. package/scripts/mcp-launcher.js +49 -0
  43. package/scripts/postinstall.js +12 -0
  44. package/search-configs.js +36 -36
  45. package/.prettierrc +0 -7
  46. package/debug-pids.js +0 -30
  47. package/eslint.config.js +0 -36
  48. package/specs/plan.md +0 -23
  49. package/vitest.config.js +0 -39
@@ -1,627 +1,627 @@
1
- import fs from 'fs/promises';
2
- import fsSync from 'fs';
3
- import path from 'path';
4
- import os from 'os';
5
- import {
6
- BINARY_STORE_VERSION as STORE_VERSION,
7
- BINARY_VECTOR_HEADER_SIZE as VECTOR_HEADER_SIZE,
8
- BINARY_RECORD_HEADER_SIZE as RECORD_HEADER_SIZE,
9
- BINARY_CONTENT_HEADER_SIZE as CONTENT_HEADER_SIZE,
10
- BINARY_RECORD_SIZE as RECORD_SIZE,
11
- } from './constants.js';
12
-
13
- const MAGIC_VECTORS = 'HMCV';
14
- const MAGIC_RECORDS = 'HMCR';
15
- const MAGIC_CONTENT = 'HMCC';
16
-
17
- const VECTORS_FILE = 'vectors.bin';
18
- const RECORDS_FILE = 'records.bin';
19
- const CONTENT_FILE = 'content.bin';
20
- const FILES_FILE = 'files.json';
21
- const RETRYABLE_RENAME_ERRORS = new Set(['EPERM', 'EACCES', 'EBUSY']);
22
-
23
- async function renameWithRetry(source, target, { retries = 5, delayMs = 50 } = {}) {
24
- let attempt = 0;
25
- let delay = delayMs;
26
- while (true) {
27
- try {
28
- await fs.rename(source, target);
29
- return;
30
- } catch (err) {
31
- const code = err?.code;
32
- if (!RETRYABLE_RENAME_ERRORS.has(code) || attempt >= retries) {
33
- throw err;
34
- }
35
- await new Promise((resolve) => setTimeout(resolve, delay));
36
- attempt += 1;
37
- delay *= 2;
38
- }
39
- }
40
- }
41
-
42
- function writeMagic(buffer, magic) {
43
- buffer.write(magic, 0, 'ascii');
44
- }
45
-
46
- function readMagic(buffer) {
47
- return buffer.toString('ascii', 0, 4);
48
- }
49
-
50
- function ensureLittleEndian() {
51
- if (os.endianness() !== 'LE') {
52
- throw new Error('Binary vector store requires little-endian architecture');
53
- }
54
- }
55
-
56
- function getDataView(buffer) {
57
- return new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
58
- }
59
-
60
- function readHeader(buffer, magic, headerSize) {
61
- if (buffer.length < headerSize) {
62
- throw new Error('Binary store header is truncated');
63
- }
64
- const actualMagic = readMagic(buffer);
65
- if (actualMagic !== magic) {
66
- throw new Error(`Invalid binary store magic (${actualMagic})`);
67
- }
68
- const view = getDataView(buffer);
69
- const version = view.getUint32(4, true);
70
- if (version !== STORE_VERSION) {
71
- throw new Error(`Unsupported binary store version (${version})`);
72
- }
73
- return view;
74
- }
75
-
76
- function writeVectorsHeader(buffer, dim, count) {
77
- writeMagic(buffer, MAGIC_VECTORS);
78
- const view = getDataView(buffer);
79
- view.setUint32(4, STORE_VERSION, true);
80
- view.setUint32(8, dim, true);
81
- view.setUint32(12, count, true);
82
- view.setUint32(16, 0, true);
83
- }
84
-
85
- function writeRecordsHeader(buffer, count, fileCount) {
86
- writeMagic(buffer, MAGIC_RECORDS);
87
- const view = getDataView(buffer);
88
- view.setUint32(4, STORE_VERSION, true);
89
- view.setUint32(8, count, true);
90
- view.setUint32(12, fileCount, true);
91
- view.setUint32(16, 0, true);
92
- }
93
-
94
- function writeContentHeader(buffer, totalBytes) {
95
- writeMagic(buffer, MAGIC_CONTENT);
96
- const view = getDataView(buffer);
97
- view.setUint32(4, STORE_VERSION, true);
98
- const value = BigInt(totalBytes);
99
- view.setBigUint64(8, value, true);
100
- view.setUint32(16, 0, true);
101
- }
102
-
103
- function readBigUint(view, offset) {
104
- const value = view.getBigUint64(offset, true);
105
- if (value > BigInt(Number.MAX_SAFE_INTEGER)) {
106
- throw new Error('Binary store content offset exceeds safe integer range');
107
- }
108
- return Number(value);
109
- }
110
-
111
- function normalizeContent(value) {
112
- if (value === null || value === undefined) return '';
113
- if (typeof value !== 'string') return String(value);
114
- return value;
115
- }
116
-
117
- export class BinaryVectorStore {
118
- constructor({
119
- vectorsBuffer,
120
- recordsBuffer,
121
- vectorsHandle,
122
- vectorsFd,
123
- contentHandle,
124
- contentBuffer,
125
- contentSize,
126
- files,
127
- dim,
128
- count,
129
- contentCacheEntries,
130
- vectorCacheEntries,
131
- }) {
132
- this.vectorsBuffer = vectorsBuffer;
133
- this.recordsBuffer = recordsBuffer;
134
- this.vectorsHandle = vectorsHandle ?? null;
135
- this.vectorsFd = Number.isInteger(vectorsFd) ? vectorsFd : null;
136
- this.contentHandle = contentHandle ?? null;
137
- this.contentBuffer = contentBuffer ?? null;
138
- this.contentSize = Number.isFinite(contentSize)
139
- ? contentSize
140
- : contentBuffer
141
- ? Math.max(0, contentBuffer.length - CONTENT_HEADER_SIZE)
142
- : 0;
143
- this.files = files;
144
- this.dim = dim;
145
- this.count = count;
146
- this.contentCacheEntries = Number.isInteger(contentCacheEntries) ? contentCacheEntries : 256;
147
- this.contentCache = new Map();
148
- this.vectorCacheEntries = Number.isInteger(vectorCacheEntries) ? vectorCacheEntries : 0;
149
- this.vectorCache = new Map();
150
-
151
- this.vectorDataOffset = VECTOR_HEADER_SIZE;
152
- this.recordDataOffset = RECORD_HEADER_SIZE;
153
- this.contentDataOffset = CONTENT_HEADER_SIZE;
154
- }
155
-
156
- async close() {
157
- this.contentCache.clear();
158
- this.vectorCache.clear();
159
- this.vectorsBuffer = null;
160
- this.recordsBuffer = null;
161
- this.contentBuffer = null;
162
- this.files = null;
163
- if (this.vectorsHandle) {
164
- try {
165
- await this.vectorsHandle.close();
166
- } catch {
167
- // ignore close errors
168
- }
169
- }
170
- this.vectorsHandle = null;
171
- if (Number.isInteger(this.vectorsFd)) {
172
- try {
173
- fsSync.closeSync(this.vectorsFd);
174
- } catch {
175
- // ignore close errors
176
- }
177
- }
178
- this.vectorsFd = null;
179
- if (this.contentHandle) {
180
- try {
181
- await this.contentHandle.close();
182
- } catch {
183
- // ignore close errors
184
- }
185
- }
186
- this.contentHandle = null;
187
- }
188
-
189
- static getPaths(cacheDir) {
190
- return {
191
- vectorsPath: path.join(cacheDir, VECTORS_FILE),
192
- recordsPath: path.join(cacheDir, RECORDS_FILE),
193
- contentPath: path.join(cacheDir, CONTENT_FILE),
194
- filesPath: path.join(cacheDir, FILES_FILE),
195
- };
196
- }
197
-
198
- static async load(cacheDir, { contentCacheEntries, vectorCacheEntries, vectorLoadMode } = {}) {
199
- ensureLittleEndian();
200
- const { vectorsPath, recordsPath, contentPath, filesPath } =
201
- BinaryVectorStore.getPaths(cacheDir);
202
-
203
- let contentReadHandle = null;
204
- let vectorsFd = null;
205
-
206
- try {
207
- const loadVectorsFromDisk = String(vectorLoadMode).toLowerCase() === 'disk';
208
- let vectorsBuffer = null;
209
-
210
- const [recordsBuffer, filesRaw] = await Promise.all([
211
- fs.readFile(recordsPath),
212
- fs.readFile(filesPath, 'utf-8'),
213
- ]);
214
-
215
- if (loadVectorsFromDisk) {
216
- vectorsFd = fsSync.openSync(vectorsPath, 'r');
217
- const headerBuffer = Buffer.alloc(VECTOR_HEADER_SIZE);
218
- const bytesRead = fsSync.readSync(vectorsFd, headerBuffer, 0, VECTOR_HEADER_SIZE, 0);
219
- if (bytesRead < VECTOR_HEADER_SIZE) {
220
- throw new Error('Binary store vectors header is truncated');
221
- }
222
- vectorsBuffer = headerBuffer;
223
- } else {
224
- vectorsBuffer = await fs.readFile(vectorsPath);
225
- }
226
-
227
- const vectorsView = readHeader(vectorsBuffer, MAGIC_VECTORS, VECTOR_HEADER_SIZE);
228
- const dim = vectorsView.getUint32(8, true);
229
- const count = vectorsView.getUint32(12, true);
230
-
231
- const recordsView = readHeader(recordsBuffer, MAGIC_RECORDS, RECORD_HEADER_SIZE);
232
- const recordCount = recordsView.getUint32(8, true);
233
- const fileCount = recordsView.getUint32(12, true);
234
-
235
- if (recordCount !== count) {
236
- throw new Error(`Binary store count mismatch (${recordCount} != ${count})`);
237
- }
238
-
239
- contentReadHandle = await fs.open(contentPath, 'r');
240
- let totalContentBytes = 0;
241
-
242
- const headerBuffer = Buffer.alloc(CONTENT_HEADER_SIZE);
243
- const { bytesRead } = await contentReadHandle.read(headerBuffer, 0, CONTENT_HEADER_SIZE, 0);
244
- if (bytesRead < CONTENT_HEADER_SIZE) {
245
- throw new Error('Binary store content header is truncated');
246
- }
247
- const contentView = readHeader(headerBuffer, MAGIC_CONTENT, CONTENT_HEADER_SIZE);
248
- totalContentBytes = readBigUint(contentView, 8);
249
- const stats = await contentReadHandle.stat();
250
- const expectedContentSize = CONTENT_HEADER_SIZE + totalContentBytes;
251
- if (stats.size < expectedContentSize) {
252
- throw new Error('Binary store content file truncated');
253
- }
254
-
255
- const files = JSON.parse(filesRaw);
256
- if (!Array.isArray(files) || files.length !== fileCount) {
257
- throw new Error('Binary store file table is invalid');
258
- }
259
-
260
- return new BinaryVectorStore({
261
- vectorsBuffer,
262
- recordsBuffer,
263
- vectorsHandle: null,
264
- vectorsFd,
265
- contentHandle: contentReadHandle,
266
- contentSize: totalContentBytes,
267
- files,
268
- dim,
269
- count,
270
- contentCacheEntries,
271
- vectorCacheEntries,
272
- });
273
- } catch (err) {
274
- if (contentReadHandle) await contentReadHandle.close().catch(() => {});
275
- if (Number.isInteger(vectorsFd)) {
276
- try {
277
- fsSync.closeSync(vectorsFd);
278
- } catch {
279
- // ignore close errors
280
- }
281
- }
282
- throw err;
283
- }
284
- }
285
-
286
- get length() {
287
- return this.count;
288
- }
289
-
290
- getRecord(index) {
291
- if (index < 0 || index >= this.count) return null;
292
- const offset = this.recordDataOffset + index * RECORD_SIZE;
293
- const view = getDataView(this.recordsBuffer);
294
-
295
- const fileId = view.getUint32(offset, true);
296
- const startLine = view.getUint32(offset + 4, true);
297
- const endLine = view.getUint32(offset + 8, true);
298
- const contentOffset = readBigUint(view, offset + 12);
299
- const contentLength = view.getUint32(offset + 20, true);
300
-
301
- return {
302
- fileId,
303
- file: this.files[fileId],
304
- startLine,
305
- endLine,
306
- contentOffset,
307
- contentLength,
308
- };
309
- }
310
-
311
- getVector(index) {
312
- if (index < 0 || index >= this.count) return null;
313
- if (this.vectorCacheEntries > 0) {
314
- const cached = this.vectorCache.get(index);
315
- if (cached) {
316
- this.vectorCache.delete(index);
317
- this.vectorCache.set(index, cached);
318
- return cached;
319
- }
320
- }
321
-
322
- const offset = this.vectorDataOffset + index * this.dim * 4;
323
- const byteLength = this.dim * 4;
324
- let vector = null;
325
-
326
- if (this.vectorsBuffer && this.vectorsBuffer.length >= this.vectorDataOffset + byteLength) {
327
- vector = new Float32Array(
328
- this.vectorsBuffer.buffer,
329
- this.vectorsBuffer.byteOffset + offset,
330
- this.dim
331
- );
332
- } else if (Number.isInteger(this.vectorsFd)) {
333
- // Use Buffer.alloc (not allocUnsafe) for safety - prevents potential
334
- // information leak if read is partial or fails silently
335
- const buffer = Buffer.alloc(byteLength);
336
- const bytesRead = fsSync.readSync(this.vectorsFd, buffer, 0, byteLength, offset);
337
- if (bytesRead === byteLength) {
338
- vector = new Float32Array(buffer.buffer, buffer.byteOffset, this.dim);
339
- }
340
- }
341
-
342
- if (vector && this.vectorCacheEntries > 0) {
343
- this.vectorCache.set(index, vector);
344
- if (this.vectorCache.size > this.vectorCacheEntries) {
345
- const firstKey = this.vectorCache.keys().next().value;
346
- this.vectorCache.delete(firstKey);
347
- }
348
- }
349
-
350
- return vector;
351
- }
352
-
353
- async getContent(index) {
354
- if (index < 0 || index >= this.count) return null;
355
- if (this.contentCacheEntries > 0) {
356
- const cached = this.contentCache.get(index);
357
- if (cached !== undefined) {
358
- this.contentCache.delete(index);
359
- this.contentCache.set(index, cached);
360
- return cached;
361
- }
362
- }
363
-
364
- const record = this.getRecord(index);
365
- if (!record || record.contentLength === 0) return '';
366
- const contentLimit = record.contentOffset + record.contentLength;
367
- if (Number.isFinite(this.contentSize) && contentLimit > this.contentSize) {
368
- return '';
369
- }
370
-
371
- let content = '';
372
- if (this.contentBuffer) {
373
- const start = this.contentDataOffset + record.contentOffset;
374
- const end = start + record.contentLength;
375
- content = this.contentBuffer.slice(start, end).toString('utf-8');
376
- } else if (this.contentHandle) {
377
- const start = this.contentDataOffset + record.contentOffset;
378
- const length = record.contentLength;
379
- const buffer = Buffer.alloc(length);
380
- const { bytesRead } = await this.contentHandle.read(buffer, 0, length, start);
381
- content = buffer.slice(0, bytesRead).toString('utf-8');
382
- } else {
383
- return '';
384
- }
385
-
386
- if (this.contentCacheEntries > 0) {
387
- this.contentCache.set(index, content);
388
- if (this.contentCache.size > this.contentCacheEntries) {
389
- const firstKey = this.contentCache.keys().next().value;
390
- this.contentCache.delete(firstKey);
391
- }
392
- }
393
-
394
- return content;
395
- }
396
-
397
- async toChunkViews({ includeContent = false, includeVector = true } = {}) {
398
- const chunks = new Array(this.count);
399
- for (let i = 0; i < this.count; i += 1) {
400
- const record = this.getRecord(i);
401
- if (!record) continue;
402
- const chunk = {
403
- file: record.file,
404
- startLine: record.startLine,
405
- endLine: record.endLine,
406
- _index: i,
407
- _binaryIndex: i,
408
- };
409
- if (includeVector) {
410
- chunk.vector = this.getVector(i);
411
- }
412
- if (includeContent) {
413
- chunk.content = await this.getContent(i);
414
- }
415
- chunks[i] = chunk;
416
- }
417
- return chunks;
418
- }
419
-
420
- getAllFileIndices() {
421
- const map = new Map();
422
- for (let i = 0; i < this.count; i++) {
423
- const record = this.getRecord(i);
424
- if (record) {
425
- let list = map.get(record.file);
426
- if (!list) {
427
- list = [];
428
- map.set(record.file, list);
429
- }
430
- list.push(i);
431
- }
432
- }
433
- return map;
434
- }
435
-
436
- static async write(
437
- cacheDir,
438
- chunks,
439
- {
440
- contentCacheEntries,
441
- vectorCacheEntries,
442
- vectorLoadMode,
443
- getContent,
444
- getVector,
445
- preRename,
446
- } = {}
447
- ) {
448
- ensureLittleEndian();
449
- const { vectorsPath, recordsPath, contentPath, filesPath } =
450
- BinaryVectorStore.getPaths(cacheDir);
451
-
452
- const tmpSuffix = `.tmp-${process.pid}`;
453
- const vectorsTmp = `${vectorsPath}${tmpSuffix}`;
454
- const recordsTmp = `${recordsPath}${tmpSuffix}`;
455
- const contentTmp = `${contentPath}${tmpSuffix}`;
456
- const filesTmp = `${filesPath}${tmpSuffix}`;
457
-
458
- const fileIds = new Map();
459
- const files = [];
460
- const denseChunks = [];
461
- const denseSourceIndices = [];
462
- for (let i = 0; i < chunks.length; i += 1) {
463
- const chunk = chunks[i];
464
- if (!chunk) continue;
465
- denseChunks.push(chunk);
466
- denseSourceIndices.push(i);
467
- }
468
-
469
- const resolveVector = async (chunk, sourceIndex) => {
470
- let vectorSource = chunk.vector;
471
- if (
472
- (vectorSource === undefined || vectorSource === null) &&
473
- typeof getVector === 'function'
474
- ) {
475
- vectorSource = getVector(chunk, sourceIndex);
476
- if (vectorSource && typeof vectorSource.then === 'function') {
477
- vectorSource = await vectorSource;
478
- }
479
- }
480
- if (vectorSource === undefined || vectorSource === null) {
481
- throw new Error(`Missing vector data for binary cache write at index ${sourceIndex}`);
482
- }
483
- const vector =
484
- vectorSource instanceof Float32Array
485
- ? vectorSource
486
- : ArrayBuffer.isView(vectorSource)
487
- ? Float32Array.from(vectorSource)
488
- : new Float32Array(vectorSource);
489
- if (!vector || vector.length === 0) {
490
- throw new Error(`Empty vector data for binary cache write at index ${sourceIndex}`);
491
- }
492
- return vector;
493
- };
494
-
495
- const resolveContent = async (chunk, sourceIndex) => {
496
- const contentSource =
497
- chunk.content !== undefined && chunk.content !== null
498
- ? chunk.content
499
- : getContent
500
- ? await getContent(chunk, sourceIndex)
501
- : '';
502
- return normalizeContent(contentSource);
503
- };
504
-
505
- const recordEntries = new Array(denseChunks.length);
506
- let contentOffset = 0;
507
-
508
- for (let i = 0; i < denseChunks.length; i += 1) {
509
- const chunk = denseChunks[i];
510
- const sourceIndex = denseSourceIndices[i];
511
-
512
- const file = chunk.file;
513
- if (!fileIds.has(file)) {
514
- fileIds.set(file, files.length);
515
- files.push(file);
516
- }
517
-
518
- const contentValue = await resolveContent(chunk, sourceIndex);
519
- const contentLength = Buffer.byteLength(contentValue, 'utf-8');
520
-
521
- recordEntries[i] = {
522
- fileId: fileIds.get(file),
523
- startLine: chunk.startLine ?? 0,
524
- endLine: chunk.endLine ?? 0,
525
- contentOffset,
526
- contentLength,
527
- };
528
-
529
- contentOffset += contentLength;
530
- }
531
-
532
- const count = denseChunks.length;
533
- const dim =
534
- count > 0 ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length : 0;
535
-
536
- await fs.writeFile(filesTmp, JSON.stringify(files));
537
-
538
- let vectorsHandle = null;
539
- let recordsHandle = null;
540
- let contentHandle = null;
541
-
542
- try {
543
- vectorsHandle = await fs.open(vectorsTmp, 'w');
544
- recordsHandle = await fs.open(recordsTmp, 'w');
545
- contentHandle = await fs.open(contentTmp, 'w');
546
-
547
- const vectorsHeader = Buffer.alloc(VECTOR_HEADER_SIZE);
548
- writeVectorsHeader(vectorsHeader, dim, count);
549
- await vectorsHandle.write(vectorsHeader, 0, vectorsHeader.length, 0);
550
-
551
- const recordsHeader = Buffer.alloc(RECORD_HEADER_SIZE);
552
- writeRecordsHeader(recordsHeader, count, files.length);
553
- await recordsHandle.write(recordsHeader, 0, recordsHeader.length, 0);
554
-
555
- const contentHeader = Buffer.alloc(CONTENT_HEADER_SIZE);
556
- writeContentHeader(contentHeader, contentOffset);
557
- await contentHandle.write(contentHeader, 0, contentHeader.length, 0);
558
-
559
- let vectorPos = VECTOR_HEADER_SIZE;
560
- let recordPos = RECORD_HEADER_SIZE;
561
- let contentPos = CONTENT_HEADER_SIZE;
562
-
563
- for (let i = 0; i < count; i += 1) {
564
- const entry = recordEntries[i];
565
- if (!entry) continue;
566
-
567
- const recordBuffer = Buffer.alloc(RECORD_SIZE);
568
- const view = getDataView(recordBuffer);
569
- view.setUint32(0, entry.fileId, true);
570
- view.setUint32(4, entry.startLine, true);
571
- view.setUint32(8, entry.endLine, true);
572
- view.setBigUint64(12, BigInt(entry.contentOffset), true);
573
- view.setUint32(20, entry.contentLength, true);
574
- view.setUint32(24, 0, true);
575
- view.setUint32(28, 0, true);
576
-
577
- await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
578
- recordPos += recordBuffer.length;
579
-
580
- const chunk = denseChunks[i];
581
- const sourceIndex = denseSourceIndices[i];
582
- const vector = await resolveVector(chunk, sourceIndex);
583
- if (vector.length !== dim) {
584
- throw new Error('Vector dimension mismatch in binary cache write');
585
- }
586
- const vectorBuffer = Buffer.from(
587
- vector.buffer,
588
- vector.byteOffset,
589
- vector.byteLength
590
- );
591
- await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
592
- vectorPos += vectorBuffer.length;
593
-
594
- if (entry.contentLength > 0) {
595
- // Re-fetch content to avoid holding all strings in memory
596
- const val = await resolveContent(chunk, sourceIndex);
597
- const contentBuffer = Buffer.from(val, 'utf-8');
598
- await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
599
- contentPos += contentBuffer.length;
600
- }
601
- }
602
- } finally {
603
- const closes = [];
604
- if (vectorsHandle) closes.push(vectorsHandle.close().catch(() => {}));
605
- if (recordsHandle) closes.push(recordsHandle.close().catch(() => {}));
606
- if (contentHandle) closes.push(contentHandle.close().catch(() => {}));
607
- await Promise.all(closes);
608
- }
609
-
610
- if (preRename) {
611
- await preRename();
612
- }
613
-
614
- await Promise.all([
615
- renameWithRetry(vectorsTmp, vectorsPath),
616
- renameWithRetry(recordsTmp, recordsPath),
617
- renameWithRetry(contentTmp, contentPath),
618
- renameWithRetry(filesTmp, filesPath),
619
- ]);
620
-
621
- return BinaryVectorStore.load(cacheDir, {
622
- contentCacheEntries,
623
- vectorCacheEntries,
624
- vectorLoadMode,
625
- });
626
- }
627
- }
1
+ import fs from 'fs/promises';
2
+ import fsSync from 'fs';
3
+ import path from 'path';
4
+ import os from 'os';
5
+ import {
6
+ BINARY_STORE_VERSION as STORE_VERSION,
7
+ BINARY_VECTOR_HEADER_SIZE as VECTOR_HEADER_SIZE,
8
+ BINARY_RECORD_HEADER_SIZE as RECORD_HEADER_SIZE,
9
+ BINARY_CONTENT_HEADER_SIZE as CONTENT_HEADER_SIZE,
10
+ BINARY_RECORD_SIZE as RECORD_SIZE,
11
+ } from './constants.js';
12
+
13
+ const MAGIC_VECTORS = 'HMCV';
14
+ const MAGIC_RECORDS = 'HMCR';
15
+ const MAGIC_CONTENT = 'HMCC';
16
+
17
+ const VECTORS_FILE = 'vectors.bin';
18
+ const RECORDS_FILE = 'records.bin';
19
+ const CONTENT_FILE = 'content.bin';
20
+ const FILES_FILE = 'files.json';
21
+ const RETRYABLE_RENAME_ERRORS = new Set(['EPERM', 'EACCES', 'EBUSY']);
22
+
23
+ async function renameWithRetry(source, target, { retries = 5, delayMs = 50 } = {}) {
24
+ let attempt = 0;
25
+ let delay = delayMs;
26
+ while (true) {
27
+ try {
28
+ await fs.rename(source, target);
29
+ return;
30
+ } catch (err) {
31
+ const code = err?.code;
32
+ if (!RETRYABLE_RENAME_ERRORS.has(code) || attempt >= retries) {
33
+ throw err;
34
+ }
35
+ await new Promise((resolve) => setTimeout(resolve, delay));
36
+ attempt += 1;
37
+ delay *= 2;
38
+ }
39
+ }
40
+ }
41
+
42
+ function writeMagic(buffer, magic) {
43
+ buffer.write(magic, 0, 'ascii');
44
+ }
45
+
46
+ function readMagic(buffer) {
47
+ return buffer.toString('ascii', 0, 4);
48
+ }
49
+
50
+ function ensureLittleEndian() {
51
+ if (os.endianness() !== 'LE') {
52
+ throw new Error('Binary vector store requires little-endian architecture');
53
+ }
54
+ }
55
+
56
+ function getDataView(buffer) {
57
+ return new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
58
+ }
59
+
60
+ function readHeader(buffer, magic, headerSize) {
61
+ if (buffer.length < headerSize) {
62
+ throw new Error('Binary store header is truncated');
63
+ }
64
+ const actualMagic = readMagic(buffer);
65
+ if (actualMagic !== magic) {
66
+ throw new Error(`Invalid binary store magic (${actualMagic})`);
67
+ }
68
+ const view = getDataView(buffer);
69
+ const version = view.getUint32(4, true);
70
+ if (version !== STORE_VERSION) {
71
+ throw new Error(`Unsupported binary store version (${version})`);
72
+ }
73
+ return view;
74
+ }
75
+
76
+ function writeVectorsHeader(buffer, dim, count) {
77
+ writeMagic(buffer, MAGIC_VECTORS);
78
+ const view = getDataView(buffer);
79
+ view.setUint32(4, STORE_VERSION, true);
80
+ view.setUint32(8, dim, true);
81
+ view.setUint32(12, count, true);
82
+ view.setUint32(16, 0, true);
83
+ }
84
+
85
+ function writeRecordsHeader(buffer, count, fileCount) {
86
+ writeMagic(buffer, MAGIC_RECORDS);
87
+ const view = getDataView(buffer);
88
+ view.setUint32(4, STORE_VERSION, true);
89
+ view.setUint32(8, count, true);
90
+ view.setUint32(12, fileCount, true);
91
+ view.setUint32(16, 0, true);
92
+ }
93
+
94
+ function writeContentHeader(buffer, totalBytes) {
95
+ writeMagic(buffer, MAGIC_CONTENT);
96
+ const view = getDataView(buffer);
97
+ view.setUint32(4, STORE_VERSION, true);
98
+ const value = BigInt(totalBytes);
99
+ view.setBigUint64(8, value, true);
100
+ view.setUint32(16, 0, true);
101
+ }
102
+
103
+ function readBigUint(view, offset) {
104
+ const value = view.getBigUint64(offset, true);
105
+ if (value > BigInt(Number.MAX_SAFE_INTEGER)) {
106
+ throw new Error('Binary store content offset exceeds safe integer range');
107
+ }
108
+ return Number(value);
109
+ }
110
+
111
+ function normalizeContent(value) {
112
+ if (value === null || value === undefined) return '';
113
+ if (typeof value !== 'string') return String(value);
114
+ return value;
115
+ }
116
+
117
+ export class BinaryVectorStore {
118
+ constructor({
119
+ vectorsBuffer,
120
+ recordsBuffer,
121
+ vectorsHandle,
122
+ vectorsFd,
123
+ contentHandle,
124
+ contentBuffer,
125
+ contentSize,
126
+ files,
127
+ dim,
128
+ count,
129
+ contentCacheEntries,
130
+ vectorCacheEntries,
131
+ }) {
132
+ this.vectorsBuffer = vectorsBuffer;
133
+ this.recordsBuffer = recordsBuffer;
134
+ this.vectorsHandle = vectorsHandle ?? null;
135
+ this.vectorsFd = Number.isInteger(vectorsFd) ? vectorsFd : null;
136
+ this.contentHandle = contentHandle ?? null;
137
+ this.contentBuffer = contentBuffer ?? null;
138
+ this.contentSize = Number.isFinite(contentSize)
139
+ ? contentSize
140
+ : contentBuffer
141
+ ? Math.max(0, contentBuffer.length - CONTENT_HEADER_SIZE)
142
+ : 0;
143
+ this.files = files;
144
+ this.dim = dim;
145
+ this.count = count;
146
+ this.contentCacheEntries = Number.isInteger(contentCacheEntries) ? contentCacheEntries : 256;
147
+ this.contentCache = new Map();
148
+ this.vectorCacheEntries = Number.isInteger(vectorCacheEntries) ? vectorCacheEntries : 0;
149
+ this.vectorCache = new Map();
150
+
151
+ this.vectorDataOffset = VECTOR_HEADER_SIZE;
152
+ this.recordDataOffset = RECORD_HEADER_SIZE;
153
+ this.contentDataOffset = CONTENT_HEADER_SIZE;
154
+ }
155
+
156
+ async close() {
157
+ this.contentCache.clear();
158
+ this.vectorCache.clear();
159
+ this.vectorsBuffer = null;
160
+ this.recordsBuffer = null;
161
+ this.contentBuffer = null;
162
+ this.files = null;
163
+ if (this.vectorsHandle) {
164
+ try {
165
+ await this.vectorsHandle.close();
166
+ } catch {
167
+ // ignore close errors
168
+ }
169
+ }
170
+ this.vectorsHandle = null;
171
+ if (Number.isInteger(this.vectorsFd)) {
172
+ try {
173
+ fsSync.closeSync(this.vectorsFd);
174
+ } catch {
175
+ // ignore close errors
176
+ }
177
+ }
178
+ this.vectorsFd = null;
179
+ if (this.contentHandle) {
180
+ try {
181
+ await this.contentHandle.close();
182
+ } catch {
183
+ // ignore close errors
184
+ }
185
+ }
186
+ this.contentHandle = null;
187
+ }
188
+
189
+ static getPaths(cacheDir) {
190
+ return {
191
+ vectorsPath: path.join(cacheDir, VECTORS_FILE),
192
+ recordsPath: path.join(cacheDir, RECORDS_FILE),
193
+ contentPath: path.join(cacheDir, CONTENT_FILE),
194
+ filesPath: path.join(cacheDir, FILES_FILE),
195
+ };
196
+ }
197
+
198
+ static async load(cacheDir, { contentCacheEntries, vectorCacheEntries, vectorLoadMode } = {}) {
199
+ ensureLittleEndian();
200
+ const { vectorsPath, recordsPath, contentPath, filesPath } =
201
+ BinaryVectorStore.getPaths(cacheDir);
202
+
203
+ let contentReadHandle = null;
204
+ let vectorsFd = null;
205
+
206
+ try {
207
+ const loadVectorsFromDisk = String(vectorLoadMode).toLowerCase() === 'disk';
208
+ let vectorsBuffer = null;
209
+
210
+ const [recordsBuffer, filesRaw] = await Promise.all([
211
+ fs.readFile(recordsPath),
212
+ fs.readFile(filesPath, 'utf-8'),
213
+ ]);
214
+
215
+ if (loadVectorsFromDisk) {
216
+ vectorsFd = fsSync.openSync(vectorsPath, 'r');
217
+ const headerBuffer = Buffer.alloc(VECTOR_HEADER_SIZE);
218
+ const bytesRead = fsSync.readSync(vectorsFd, headerBuffer, 0, VECTOR_HEADER_SIZE, 0);
219
+ if (bytesRead < VECTOR_HEADER_SIZE) {
220
+ throw new Error('Binary store vectors header is truncated');
221
+ }
222
+ vectorsBuffer = headerBuffer;
223
+ } else {
224
+ vectorsBuffer = await fs.readFile(vectorsPath);
225
+ }
226
+
227
+ const vectorsView = readHeader(vectorsBuffer, MAGIC_VECTORS, VECTOR_HEADER_SIZE);
228
+ const dim = vectorsView.getUint32(8, true);
229
+ const count = vectorsView.getUint32(12, true);
230
+
231
+ const recordsView = readHeader(recordsBuffer, MAGIC_RECORDS, RECORD_HEADER_SIZE);
232
+ const recordCount = recordsView.getUint32(8, true);
233
+ const fileCount = recordsView.getUint32(12, true);
234
+
235
+ if (recordCount !== count) {
236
+ throw new Error(`Binary store count mismatch (${recordCount} != ${count})`);
237
+ }
238
+
239
+ contentReadHandle = await fs.open(contentPath, 'r');
240
+ let totalContentBytes = 0;
241
+
242
+ const headerBuffer = Buffer.alloc(CONTENT_HEADER_SIZE);
243
+ const { bytesRead } = await contentReadHandle.read(headerBuffer, 0, CONTENT_HEADER_SIZE, 0);
244
+ if (bytesRead < CONTENT_HEADER_SIZE) {
245
+ throw new Error('Binary store content header is truncated');
246
+ }
247
+ const contentView = readHeader(headerBuffer, MAGIC_CONTENT, CONTENT_HEADER_SIZE);
248
+ totalContentBytes = readBigUint(contentView, 8);
249
+ const stats = await contentReadHandle.stat();
250
+ const expectedContentSize = CONTENT_HEADER_SIZE + totalContentBytes;
251
+ if (stats.size < expectedContentSize) {
252
+ throw new Error('Binary store content file truncated');
253
+ }
254
+
255
+ const files = JSON.parse(filesRaw);
256
+ if (!Array.isArray(files) || files.length !== fileCount) {
257
+ throw new Error('Binary store file table is invalid');
258
+ }
259
+
260
+ return new BinaryVectorStore({
261
+ vectorsBuffer,
262
+ recordsBuffer,
263
+ vectorsHandle: null,
264
+ vectorsFd,
265
+ contentHandle: contentReadHandle,
266
+ contentSize: totalContentBytes,
267
+ files,
268
+ dim,
269
+ count,
270
+ contentCacheEntries,
271
+ vectorCacheEntries,
272
+ });
273
+ } catch (err) {
274
+ if (contentReadHandle) await contentReadHandle.close().catch(() => {});
275
+ if (Number.isInteger(vectorsFd)) {
276
+ try {
277
+ fsSync.closeSync(vectorsFd);
278
+ } catch {
279
+ // ignore close errors
280
+ }
281
+ }
282
+ throw err;
283
+ }
284
+ }
285
+
286
+ get length() {
287
+ return this.count;
288
+ }
289
+
290
+ getRecord(index) {
291
+ if (index < 0 || index >= this.count) return null;
292
+ const offset = this.recordDataOffset + index * RECORD_SIZE;
293
+ const view = getDataView(this.recordsBuffer);
294
+
295
+ const fileId = view.getUint32(offset, true);
296
+ const startLine = view.getUint32(offset + 4, true);
297
+ const endLine = view.getUint32(offset + 8, true);
298
+ const contentOffset = readBigUint(view, offset + 12);
299
+ const contentLength = view.getUint32(offset + 20, true);
300
+
301
+ return {
302
+ fileId,
303
+ file: this.files[fileId],
304
+ startLine,
305
+ endLine,
306
+ contentOffset,
307
+ contentLength,
308
+ };
309
+ }
310
+
311
+ getVector(index) {
312
+ if (index < 0 || index >= this.count) return null;
313
+ if (this.vectorCacheEntries > 0) {
314
+ const cached = this.vectorCache.get(index);
315
+ if (cached) {
316
+ this.vectorCache.delete(index);
317
+ this.vectorCache.set(index, cached);
318
+ return cached;
319
+ }
320
+ }
321
+
322
+ const offset = this.vectorDataOffset + index * this.dim * 4;
323
+ const byteLength = this.dim * 4;
324
+ let vector = null;
325
+
326
+ if (this.vectorsBuffer && this.vectorsBuffer.length >= this.vectorDataOffset + byteLength) {
327
+ vector = new Float32Array(
328
+ this.vectorsBuffer.buffer,
329
+ this.vectorsBuffer.byteOffset + offset,
330
+ this.dim
331
+ );
332
+ } else if (Number.isInteger(this.vectorsFd)) {
333
+ // Use Buffer.alloc (not allocUnsafe) for safety - prevents potential
334
+ // information leak if read is partial or fails silently
335
+ const buffer = Buffer.alloc(byteLength);
336
+ const bytesRead = fsSync.readSync(this.vectorsFd, buffer, 0, byteLength, offset);
337
+ if (bytesRead === byteLength) {
338
+ vector = new Float32Array(buffer.buffer, buffer.byteOffset, this.dim);
339
+ }
340
+ }
341
+
342
+ if (vector && this.vectorCacheEntries > 0) {
343
+ this.vectorCache.set(index, vector);
344
+ if (this.vectorCache.size > this.vectorCacheEntries) {
345
+ const firstKey = this.vectorCache.keys().next().value;
346
+ this.vectorCache.delete(firstKey);
347
+ }
348
+ }
349
+
350
+ return vector;
351
+ }
352
+
353
+ async getContent(index) {
354
+ if (index < 0 || index >= this.count) return null;
355
+ if (this.contentCacheEntries > 0) {
356
+ const cached = this.contentCache.get(index);
357
+ if (cached !== undefined) {
358
+ this.contentCache.delete(index);
359
+ this.contentCache.set(index, cached);
360
+ return cached;
361
+ }
362
+ }
363
+
364
+ const record = this.getRecord(index);
365
+ if (!record || record.contentLength === 0) return '';
366
+ const contentLimit = record.contentOffset + record.contentLength;
367
+ if (Number.isFinite(this.contentSize) && contentLimit > this.contentSize) {
368
+ return '';
369
+ }
370
+
371
+ let content = '';
372
+ if (this.contentBuffer) {
373
+ const start = this.contentDataOffset + record.contentOffset;
374
+ const end = start + record.contentLength;
375
+ content = this.contentBuffer.slice(start, end).toString('utf-8');
376
+ } else if (this.contentHandle) {
377
+ const start = this.contentDataOffset + record.contentOffset;
378
+ const length = record.contentLength;
379
+ const buffer = Buffer.alloc(length);
380
+ const { bytesRead } = await this.contentHandle.read(buffer, 0, length, start);
381
+ content = buffer.slice(0, bytesRead).toString('utf-8');
382
+ } else {
383
+ return '';
384
+ }
385
+
386
+ if (this.contentCacheEntries > 0) {
387
+ this.contentCache.set(index, content);
388
+ if (this.contentCache.size > this.contentCacheEntries) {
389
+ const firstKey = this.contentCache.keys().next().value;
390
+ this.contentCache.delete(firstKey);
391
+ }
392
+ }
393
+
394
+ return content;
395
+ }
396
+
397
+ async toChunkViews({ includeContent = false, includeVector = true } = {}) {
398
+ const chunks = new Array(this.count);
399
+ for (let i = 0; i < this.count; i += 1) {
400
+ const record = this.getRecord(i);
401
+ if (!record) continue;
402
+ const chunk = {
403
+ file: record.file,
404
+ startLine: record.startLine,
405
+ endLine: record.endLine,
406
+ _index: i,
407
+ _binaryIndex: i,
408
+ };
409
+ if (includeVector) {
410
+ chunk.vector = this.getVector(i);
411
+ }
412
+ if (includeContent) {
413
+ chunk.content = await this.getContent(i);
414
+ }
415
+ chunks[i] = chunk;
416
+ }
417
+ return chunks;
418
+ }
419
+
420
+ getAllFileIndices() {
421
+ const map = new Map();
422
+ for (let i = 0; i < this.count; i++) {
423
+ const record = this.getRecord(i);
424
+ if (record) {
425
+ let list = map.get(record.file);
426
+ if (!list) {
427
+ list = [];
428
+ map.set(record.file, list);
429
+ }
430
+ list.push(i);
431
+ }
432
+ }
433
+ return map;
434
+ }
435
+
436
+ static async write(
437
+ cacheDir,
438
+ chunks,
439
+ {
440
+ contentCacheEntries,
441
+ vectorCacheEntries,
442
+ vectorLoadMode,
443
+ getContent,
444
+ getVector,
445
+ preRename,
446
+ } = {}
447
+ ) {
448
+ ensureLittleEndian();
449
+ const { vectorsPath, recordsPath, contentPath, filesPath } =
450
+ BinaryVectorStore.getPaths(cacheDir);
451
+
452
+ const tmpSuffix = `.tmp-${process.pid}`;
453
+ const vectorsTmp = `${vectorsPath}${tmpSuffix}`;
454
+ const recordsTmp = `${recordsPath}${tmpSuffix}`;
455
+ const contentTmp = `${contentPath}${tmpSuffix}`;
456
+ const filesTmp = `${filesPath}${tmpSuffix}`;
457
+
458
+ const fileIds = new Map();
459
+ const files = [];
460
+ const denseChunks = [];
461
+ const denseSourceIndices = [];
462
+ for (let i = 0; i < chunks.length; i += 1) {
463
+ const chunk = chunks[i];
464
+ if (!chunk) continue;
465
+ denseChunks.push(chunk);
466
+ denseSourceIndices.push(i);
467
+ }
468
+
469
+ const resolveVector = async (chunk, sourceIndex) => {
470
+ let vectorSource = chunk.vector;
471
+ if (
472
+ (vectorSource === undefined || vectorSource === null) &&
473
+ typeof getVector === 'function'
474
+ ) {
475
+ vectorSource = getVector(chunk, sourceIndex);
476
+ if (vectorSource && typeof vectorSource.then === 'function') {
477
+ vectorSource = await vectorSource;
478
+ }
479
+ }
480
+ if (vectorSource === undefined || vectorSource === null) {
481
+ throw new Error(`Missing vector data for binary cache write at index ${sourceIndex}`);
482
+ }
483
+ const vector =
484
+ vectorSource instanceof Float32Array
485
+ ? vectorSource
486
+ : ArrayBuffer.isView(vectorSource)
487
+ ? Float32Array.from(vectorSource)
488
+ : new Float32Array(vectorSource);
489
+ if (!vector || vector.length === 0) {
490
+ throw new Error(`Empty vector data for binary cache write at index ${sourceIndex}`);
491
+ }
492
+ return vector;
493
+ };
494
+
495
+ const resolveContent = async (chunk, sourceIndex) => {
496
+ const contentSource =
497
+ chunk.content !== undefined && chunk.content !== null
498
+ ? chunk.content
499
+ : getContent
500
+ ? await getContent(chunk, sourceIndex)
501
+ : '';
502
+ return normalizeContent(contentSource);
503
+ };
504
+
505
+ const recordEntries = new Array(denseChunks.length);
506
+ let contentOffset = 0;
507
+
508
+ for (let i = 0; i < denseChunks.length; i += 1) {
509
+ const chunk = denseChunks[i];
510
+ const sourceIndex = denseSourceIndices[i];
511
+
512
+ const file = chunk.file;
513
+ if (!fileIds.has(file)) {
514
+ fileIds.set(file, files.length);
515
+ files.push(file);
516
+ }
517
+
518
+ const contentValue = await resolveContent(chunk, sourceIndex);
519
+ const contentLength = Buffer.byteLength(contentValue, 'utf-8');
520
+
521
+ recordEntries[i] = {
522
+ fileId: fileIds.get(file),
523
+ startLine: chunk.startLine ?? 0,
524
+ endLine: chunk.endLine ?? 0,
525
+ contentOffset,
526
+ contentLength,
527
+ };
528
+
529
+ contentOffset += contentLength;
530
+ }
531
+
532
+ const count = denseChunks.length;
533
+ const dim =
534
+ count > 0 ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length : 0;
535
+
536
+ await fs.writeFile(filesTmp, JSON.stringify(files));
537
+
538
+ let vectorsHandle = null;
539
+ let recordsHandle = null;
540
+ let contentHandle = null;
541
+
542
+ try {
543
+ vectorsHandle = await fs.open(vectorsTmp, 'w');
544
+ recordsHandle = await fs.open(recordsTmp, 'w');
545
+ contentHandle = await fs.open(contentTmp, 'w');
546
+
547
+ const vectorsHeader = Buffer.alloc(VECTOR_HEADER_SIZE);
548
+ writeVectorsHeader(vectorsHeader, dim, count);
549
+ await vectorsHandle.write(vectorsHeader, 0, vectorsHeader.length, 0);
550
+
551
+ const recordsHeader = Buffer.alloc(RECORD_HEADER_SIZE);
552
+ writeRecordsHeader(recordsHeader, count, files.length);
553
+ await recordsHandle.write(recordsHeader, 0, recordsHeader.length, 0);
554
+
555
+ const contentHeader = Buffer.alloc(CONTENT_HEADER_SIZE);
556
+ writeContentHeader(contentHeader, contentOffset);
557
+ await contentHandle.write(contentHeader, 0, contentHeader.length, 0);
558
+
559
+ let vectorPos = VECTOR_HEADER_SIZE;
560
+ let recordPos = RECORD_HEADER_SIZE;
561
+ let contentPos = CONTENT_HEADER_SIZE;
562
+
563
+ for (let i = 0; i < count; i += 1) {
564
+ const entry = recordEntries[i];
565
+ if (!entry) continue;
566
+
567
+ const recordBuffer = Buffer.alloc(RECORD_SIZE);
568
+ const view = getDataView(recordBuffer);
569
+ view.setUint32(0, entry.fileId, true);
570
+ view.setUint32(4, entry.startLine, true);
571
+ view.setUint32(8, entry.endLine, true);
572
+ view.setBigUint64(12, BigInt(entry.contentOffset), true);
573
+ view.setUint32(20, entry.contentLength, true);
574
+ view.setUint32(24, 0, true);
575
+ view.setUint32(28, 0, true);
576
+
577
+ await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
578
+ recordPos += recordBuffer.length;
579
+
580
+ const chunk = denseChunks[i];
581
+ const sourceIndex = denseSourceIndices[i];
582
+ const vector = await resolveVector(chunk, sourceIndex);
583
+ if (vector.length !== dim) {
584
+ throw new Error('Vector dimension mismatch in binary cache write');
585
+ }
586
+ const vectorBuffer = Buffer.from(
587
+ vector.buffer,
588
+ vector.byteOffset,
589
+ vector.byteLength
590
+ );
591
+ await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
592
+ vectorPos += vectorBuffer.length;
593
+
594
+ if (entry.contentLength > 0) {
595
+ // Re-fetch content to avoid holding all strings in memory
596
+ const val = await resolveContent(chunk, sourceIndex);
597
+ const contentBuffer = Buffer.from(val, 'utf-8');
598
+ await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
599
+ contentPos += contentBuffer.length;
600
+ }
601
+ }
602
+ } finally {
603
+ const closes = [];
604
+ if (vectorsHandle) closes.push(vectorsHandle.close().catch(() => {}));
605
+ if (recordsHandle) closes.push(recordsHandle.close().catch(() => {}));
606
+ if (contentHandle) closes.push(contentHandle.close().catch(() => {}));
607
+ await Promise.all(closes);
608
+ }
609
+
610
+ if (preRename) {
611
+ await preRename();
612
+ }
613
+
614
+ await Promise.all([
615
+ renameWithRetry(vectorsTmp, vectorsPath),
616
+ renameWithRetry(recordsTmp, recordsPath),
617
+ renameWithRetry(contentTmp, contentPath),
618
+ renameWithRetry(filesTmp, filesPath),
619
+ ]);
620
+
621
+ return BinaryVectorStore.load(cacheDir, {
622
+ contentCacheEntries,
623
+ vectorCacheEntries,
624
+ vectorLoadMode,
625
+ });
626
+ }
627
+ }