@softerist/heuristic-mcp 3.2.3 → 3.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +387 -376
  2. package/config.jsonc +800 -800
  3. package/features/ann-config.js +102 -110
  4. package/features/clear-cache.js +81 -84
  5. package/features/find-similar-code.js +265 -286
  6. package/features/hybrid-search.js +487 -536
  7. package/features/index-codebase.js +3139 -3270
  8. package/features/lifecycle.js +1011 -1063
  9. package/features/package-version.js +277 -291
  10. package/features/register.js +351 -370
  11. package/features/resources.js +115 -130
  12. package/features/set-workspace.js +214 -240
  13. package/index.js +693 -758
  14. package/lib/cache-ops.js +22 -22
  15. package/lib/cache-utils.js +465 -519
  16. package/lib/cache.js +1749 -1849
  17. package/lib/call-graph.js +396 -396
  18. package/lib/cli.js +232 -226
  19. package/lib/config.js +1483 -1495
  20. package/lib/constants.js +511 -493
  21. package/lib/embed-query-process.js +206 -212
  22. package/lib/embedding-process.js +434 -451
  23. package/lib/embedding-worker.js +862 -934
  24. package/lib/ignore-patterns.js +276 -316
  25. package/lib/json-worker.js +14 -14
  26. package/lib/json-writer.js +302 -310
  27. package/lib/logging.js +116 -127
  28. package/lib/memory-logger.js +13 -13
  29. package/lib/onnx-backend.js +188 -193
  30. package/lib/path-utils.js +18 -23
  31. package/lib/project-detector.js +82 -84
  32. package/lib/server-lifecycle.js +133 -145
  33. package/lib/settings-editor.js +738 -739
  34. package/lib/slice-normalize.js +25 -31
  35. package/lib/tokenizer.js +168 -203
  36. package/lib/utils.js +364 -409
  37. package/lib/vector-store-binary.js +973 -991
  38. package/lib/vector-store-sqlite.js +377 -414
  39. package/lib/workspace-env.js +32 -34
  40. package/mcp_config.json +9 -9
  41. package/package.json +86 -86
  42. package/scripts/clear-cache.js +20 -20
  43. package/scripts/download-model.js +43 -43
  44. package/scripts/mcp-launcher.js +49 -49
  45. package/scripts/postinstall.js +12 -12
  46. package/search-configs.js +36 -36
@@ -1,40 +1,40 @@
1
- import fs from 'fs/promises';
2
- import fsSync from 'fs';
3
- import path from 'path';
4
- import os from 'os';
5
- import crypto from 'crypto';
6
- import { crc32 } from 'zlib';
7
- import {
8
- BINARY_STORE_VERSION as STORE_VERSION,
9
- BINARY_VECTOR_HEADER_SIZE as VECTOR_HEADER_SIZE,
10
- BINARY_RECORD_HEADER_SIZE as RECORD_HEADER_SIZE,
11
- BINARY_CONTENT_HEADER_SIZE as CONTENT_HEADER_SIZE,
12
- BINARY_RECORD_SIZE as RECORD_SIZE,
13
- } from './constants.js';
14
-
15
- const MAGIC_VECTORS = 'HMCV';
16
- const MAGIC_RECORDS = 'HMCR';
17
- const MAGIC_CONTENT = 'HMCC';
18
-
19
- const VECTORS_FILE = 'vectors.bin';
20
- const RECORDS_FILE = 'records.bin';
21
- const CONTENT_FILE = 'content.bin';
22
- const FILES_FILE = 'files.json';
23
- const TELEMETRY_FILE = 'binary-store-telemetry.json';
24
- const RETRYABLE_RENAME_ERRORS = new Set(['EPERM', 'EACCES', 'EBUSY']);
25
- const BINARY_ARTIFACT_BASE_FILES = [VECTORS_FILE, RECORDS_FILE, CONTENT_FILE, FILES_FILE];
26
- const STARTUP_TMP_CLEANUP_MIN_AGE_MS = 2 * 60 * 1000;
27
- const TELEMETRY_VERSION = 1;
28
-
1
+ import fs from 'fs/promises';
2
+ import fsSync from 'fs';
3
+ import path from 'path';
4
+ import os from 'os';
5
+ import crypto from 'crypto';
6
+ import { crc32 } from 'zlib';
7
+ import {
8
+ BINARY_STORE_VERSION as STORE_VERSION,
9
+ BINARY_VECTOR_HEADER_SIZE as VECTOR_HEADER_SIZE,
10
+ BINARY_RECORD_HEADER_SIZE as RECORD_HEADER_SIZE,
11
+ BINARY_CONTENT_HEADER_SIZE as CONTENT_HEADER_SIZE,
12
+ BINARY_RECORD_SIZE as RECORD_SIZE,
13
+ } from './constants.js';
14
+
15
+ const MAGIC_VECTORS = 'HMCV';
16
+ const MAGIC_RECORDS = 'HMCR';
17
+ const MAGIC_CONTENT = 'HMCC';
18
+
19
+ const VECTORS_FILE = 'vectors.bin';
20
+ const RECORDS_FILE = 'records.bin';
21
+ const CONTENT_FILE = 'content.bin';
22
+ const FILES_FILE = 'files.json';
23
+ const TELEMETRY_FILE = 'binary-store-telemetry.json';
24
+ const RETRYABLE_RENAME_ERRORS = new Set(['EPERM', 'EACCES', 'EBUSY']);
25
+ const BINARY_ARTIFACT_BASE_FILES = [VECTORS_FILE, RECORDS_FILE, CONTENT_FILE, FILES_FILE];
26
+ const STARTUP_TMP_CLEANUP_MIN_AGE_MS = 2 * 60 * 1000;
27
+ const TELEMETRY_VERSION = 1;
28
+
29
29
  function createTelemetryTotals() {
30
30
  return {
31
- atomicReplaceAttempts: 0,
32
- atomicReplaceSuccesses: 0,
33
- atomicReplaceFailures: 0,
34
- renameRetryCount: 0,
35
- fallbackCopyCount: 0,
36
- rollbackCount: 0,
37
- rollbackRestoreFailureCount: 0,
31
+ atomicReplaceAttempts: 0,
32
+ atomicReplaceSuccesses: 0,
33
+ atomicReplaceFailures: 0,
34
+ renameRetryCount: 0,
35
+ fallbackCopyCount: 0,
36
+ rollbackCount: 0,
37
+ rollbackRestoreFailureCount: 0,
38
38
  startupCleanupRuns: 0,
39
39
  staleTempFilesRemoved: 0,
40
40
  staleTempFilesSkippedActive: 0,
@@ -43,32 +43,31 @@ function createTelemetryTotals() {
43
43
  corruptionSecondaryReadonlyBlocked: 0,
44
44
  };
45
45
  }
46
-
47
- function normalizeTelemetry(raw) {
48
- const totals = createTelemetryTotals();
49
- if (raw?.totals && typeof raw.totals === 'object') {
50
- for (const key of Object.keys(totals)) {
51
- if (Number.isFinite(raw.totals[key])) {
52
- totals[key] = raw.totals[key];
53
- }
54
- }
55
- }
56
- return {
57
- version: TELEMETRY_VERSION,
58
- totals,
59
- updatedAt: typeof raw?.updatedAt === 'string' ? raw.updatedAt : null,
60
- lastError:
61
- raw?.lastError && typeof raw.lastError === 'object'
62
- ? {
63
- at: typeof raw.lastError.at === 'string' ? raw.lastError.at : null,
64
- message:
65
- typeof raw.lastError.message === 'string' ? raw.lastError.message : null,
66
- }
67
- : null,
68
- lastAtomicReplace:
69
- raw?.lastAtomicReplace && typeof raw.lastAtomicReplace === 'object'
70
- ? { ...raw.lastAtomicReplace }
71
- : null,
46
+
47
+ function normalizeTelemetry(raw) {
48
+ const totals = createTelemetryTotals();
49
+ if (raw?.totals && typeof raw.totals === 'object') {
50
+ for (const key of Object.keys(totals)) {
51
+ if (Number.isFinite(raw.totals[key])) {
52
+ totals[key] = raw.totals[key];
53
+ }
54
+ }
55
+ }
56
+ return {
57
+ version: TELEMETRY_VERSION,
58
+ totals,
59
+ updatedAt: typeof raw?.updatedAt === 'string' ? raw.updatedAt : null,
60
+ lastError:
61
+ raw?.lastError && typeof raw.lastError === 'object'
62
+ ? {
63
+ at: typeof raw.lastError.at === 'string' ? raw.lastError.at : null,
64
+ message: typeof raw.lastError.message === 'string' ? raw.lastError.message : null,
65
+ }
66
+ : null,
67
+ lastAtomicReplace:
68
+ raw?.lastAtomicReplace && typeof raw.lastAtomicReplace === 'object'
69
+ ? { ...raw.lastAtomicReplace }
70
+ : null,
72
71
  lastStartupCleanup:
73
72
  raw?.lastStartupCleanup && typeof raw.lastStartupCleanup === 'object'
74
73
  ? { ...raw.lastStartupCleanup }
@@ -79,64 +78,61 @@ function normalizeTelemetry(raw) {
79
78
  : null,
80
79
  };
81
80
  }
82
-
83
- async function readTelemetryFile(cacheDir) {
84
- const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
85
- try {
86
- const raw = await fs.readFile(telemetryPath, 'utf-8');
87
- return normalizeTelemetry(JSON.parse(raw));
88
- } catch {
89
- return normalizeTelemetry(null);
90
- }
91
- }
92
-
93
- async function writeTelemetryFile(cacheDir, telemetry) {
94
- const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
95
- await fs.mkdir(cacheDir, { recursive: true }).catch(() => {});
96
- await fs.writeFile(telemetryPath, JSON.stringify(telemetry, null, 2));
97
- }
98
-
99
- async function updateTelemetry(cacheDir, mutate) {
100
- if (!cacheDir) return;
101
- try {
102
- const telemetry = await readTelemetryFile(cacheDir);
103
- mutate(telemetry);
104
- telemetry.updatedAt = new Date().toISOString();
105
- await writeTelemetryFile(cacheDir, telemetry);
106
- } catch {
107
-
108
- }
109
- }
110
-
111
- function isProcessRunning(pid) {
112
- if (!Number.isInteger(pid) || pid <= 0) return false;
113
- try {
114
- process.kill(pid, 0);
115
- return true;
116
- } catch (err) {
117
- return err?.code === 'EPERM';
118
- }
119
- }
120
-
121
- function parsePidFromBinaryArtifact(fileName) {
122
- const match = fileName.match(/\.(?:tmp|bak)-(\d+)(?:-|$)/);
123
- if (!match) return null;
124
- const pid = Number.parseInt(match[1], 10);
125
- return Number.isInteger(pid) ? pid : null;
126
- }
127
-
128
- function isBinaryTempArtifact(fileName) {
129
- return BINARY_ARTIFACT_BASE_FILES.some(
130
- (baseFile) =>
131
- fileName.startsWith(`${baseFile}.tmp-`) || fileName.startsWith(`${baseFile}.bak-`)
132
- );
133
- }
134
-
135
- function addToMetric(metrics, key, value = 1) {
136
- if (!metrics || !Number.isFinite(value) || value <= 0) return;
137
- metrics[key] = (metrics[key] || 0) + value;
138
- }
139
-
81
+
82
+ async function readTelemetryFile(cacheDir) {
83
+ const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
84
+ try {
85
+ const raw = await fs.readFile(telemetryPath, 'utf-8');
86
+ return normalizeTelemetry(JSON.parse(raw));
87
+ } catch {
88
+ return normalizeTelemetry(null);
89
+ }
90
+ }
91
+
92
+ async function writeTelemetryFile(cacheDir, telemetry) {
93
+ const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
94
+ await fs.mkdir(cacheDir, { recursive: true }).catch(() => {});
95
+ await fs.writeFile(telemetryPath, JSON.stringify(telemetry, null, 2));
96
+ }
97
+
98
+ async function updateTelemetry(cacheDir, mutate) {
99
+ if (!cacheDir) return;
100
+ try {
101
+ const telemetry = await readTelemetryFile(cacheDir);
102
+ mutate(telemetry);
103
+ telemetry.updatedAt = new Date().toISOString();
104
+ await writeTelemetryFile(cacheDir, telemetry);
105
+ } catch {}
106
+ }
107
+
108
+ function isProcessRunning(pid) {
109
+ if (!Number.isInteger(pid) || pid <= 0) return false;
110
+ try {
111
+ process.kill(pid, 0);
112
+ return true;
113
+ } catch (err) {
114
+ return err?.code === 'EPERM';
115
+ }
116
+ }
117
+
118
+ function parsePidFromBinaryArtifact(fileName) {
119
+ const match = fileName.match(/\.(?:tmp|bak)-(\d+)(?:-|$)/);
120
+ if (!match) return null;
121
+ const pid = Number.parseInt(match[1], 10);
122
+ return Number.isInteger(pid) ? pid : null;
123
+ }
124
+
125
+ function isBinaryTempArtifact(fileName) {
126
+ return BINARY_ARTIFACT_BASE_FILES.some(
127
+ (baseFile) => fileName.startsWith(`${baseFile}.tmp-`) || fileName.startsWith(`${baseFile}.bak-`)
128
+ );
129
+ }
130
+
131
+ function addToMetric(metrics, key, value = 1) {
132
+ if (!metrics || !Number.isFinite(value) || value <= 0) return;
133
+ metrics[key] = (metrics[key] || 0) + value;
134
+ }
135
+
140
136
  export async function readBinaryStoreTelemetry(cacheDir) {
141
137
  return readTelemetryFile(cacheDir);
142
138
  }
@@ -160,267 +156,267 @@ export async function recordBinaryStoreCorruption(
160
156
  };
161
157
  });
162
158
  }
163
-
164
- export async function cleanupStaleBinaryArtifacts(
165
- cacheDir,
166
- { minAgeMs = STARTUP_TMP_CLEANUP_MIN_AGE_MS, logger = null } = {}
167
- ) {
168
- const result = {
169
- cacheDir,
170
- scanned: 0,
171
- removed: 0,
172
- skippedActive: 0,
173
- removedFiles: [],
174
- };
175
-
176
- let entries = [];
177
- try {
178
- entries = await fs.readdir(cacheDir, { withFileTypes: true });
179
- } catch {
180
- return result;
181
- }
182
-
183
- const now = Date.now();
184
- for (const entry of entries) {
185
- const fileName = typeof entry === 'string' ? entry : entry?.name;
186
- if (!fileName) continue;
187
- const isFileEntry = typeof entry === 'string' ? true : entry?.isFile?.() === true;
188
- if (!isFileEntry) continue;
189
- if (!isBinaryTempArtifact(fileName)) continue;
190
- result.scanned += 1;
191
-
192
- const fullPath = path.join(cacheDir, fileName);
193
- const stats = await fs.stat(fullPath).catch(() => null);
194
- if (!stats) continue;
195
-
196
- const ageMs = now - stats.mtimeMs;
197
- const ownerPid = parsePidFromBinaryArtifact(fileName);
198
- if (ownerPid && isProcessRunning(ownerPid)) {
199
- result.skippedActive += 1;
200
- continue;
201
- }
202
- if (ageMs < minAgeMs) continue;
203
-
204
- await fs.rm(fullPath, { force: true }).catch(() => {});
205
- result.removed += 1;
206
- result.removedFiles.push(fileName);
207
- }
208
-
209
- await updateTelemetry(cacheDir, (telemetry) => {
210
- telemetry.totals.startupCleanupRuns += 1;
211
- telemetry.totals.staleTempFilesRemoved += result.removed;
212
- telemetry.totals.staleTempFilesSkippedActive += result.skippedActive;
213
- telemetry.lastStartupCleanup = {
214
- at: new Date().toISOString(),
215
- scanned: result.scanned,
216
- removed: result.removed,
217
- skippedActive: result.skippedActive,
218
- };
219
- });
220
-
221
- if (logger && result.removed > 0) {
222
- logger.info(
223
- `[Cache] Startup temp cleanup removed ${result.removed} stale artifact(s) from ${cacheDir}`
224
- );
225
- }
226
-
227
- return result;
228
- }
229
-
230
- function isRetryableRenameError(err) {
231
- return RETRYABLE_RENAME_ERRORS.has(err?.code);
232
- }
233
-
234
- async function renameWithRetry(
235
- source,
236
- target,
237
- { retries = 12, delayMs = 50, maxDelayMs = 1000 } = {}
238
- ) {
239
- let attempt = 0;
240
- let delay = delayMs;
241
- while (true) {
242
- try {
243
- await fs.rename(source, target);
244
- return attempt;
245
- } catch (err) {
246
- if (!isRetryableRenameError(err) || attempt >= retries) {
247
- err.renameRetryCount = attempt;
248
- throw err;
249
- }
250
- await new Promise((resolve) => setTimeout(resolve, delay));
251
- attempt += 1;
252
- delay = Math.min(delay * 2, maxDelayMs);
253
- }
254
- }
255
- }
256
-
257
- async function pathExists(filePath) {
258
- try {
259
- await fs.access(filePath);
260
- return true;
261
- } catch {
262
- return false;
263
- }
264
- }
265
-
266
- async function removeIfExists(filePath) {
267
- await fs.rm(filePath, { force: true }).catch(() => {});
268
- }
269
-
270
- async function promoteFileWithFallback(source, target, renameOptions = {}, metrics = null) {
271
- try {
272
- const retriesUsed = await renameWithRetry(source, target, renameOptions);
273
- addToMetric(metrics, 'renameRetryCount', retriesUsed);
274
- return;
275
- } catch (renameError) {
276
- const retriesUsed = Number.isFinite(renameError?.renameRetryCount)
277
- ? renameError.renameRetryCount
278
- : 0;
279
- addToMetric(metrics, 'renameRetryCount', retriesUsed);
280
- if (!isRetryableRenameError(renameError)) {
281
- throw renameError;
282
- }
283
-
284
- try {
285
- await fs.copyFile(source, target);
286
- await removeIfExists(source);
287
- addToMetric(metrics, 'fallbackCopyCount', 1);
288
- return;
289
- } catch (copyError) {
290
- const wrapped = new Error(
291
- `rename failed (${renameError.message}); fallback copy failed (${copyError.message})`
292
- );
293
- wrapped.code = copyError?.code || renameError?.code;
294
- throw wrapped;
295
- }
296
- }
297
- }
298
-
299
- async function replaceFilesAtomically(filePairs, renameOptions = {}) {
300
- const metrics = createTelemetryTotals();
301
- metrics.atomicReplaceAttempts = 1;
302
- const cacheDir = filePairs.length > 0 ? path.dirname(filePairs[0].target) : null;
303
- const backupSuffix = `.bak-${process.pid}-${Date.now()}`;
304
- const backups = [];
305
- const replacedTargets = [];
306
- let operationError = null;
307
-
308
- try {
309
- // Stage current files as backups first. If this fails, nothing is replaced.
310
- for (const pair of filePairs) {
311
- if (!(await pathExists(pair.target))) continue;
312
- const backupPath = `${pair.target}${backupSuffix}`;
313
- await removeIfExists(backupPath);
314
- await promoteFileWithFallback(pair.target, backupPath, renameOptions, metrics);
315
- backups.push({ target: pair.target, backupPath });
316
- }
317
-
318
- // Replace targets with new temp files.
319
- for (const pair of filePairs) {
320
- await promoteFileWithFallback(pair.source, pair.target, renameOptions, metrics);
321
- replacedTargets.push(pair.target);
322
- }
323
- metrics.atomicReplaceSuccesses = 1;
324
- } catch (error) {
325
- operationError = error;
326
- metrics.atomicReplaceFailures = 1;
327
- metrics.rollbackCount = 1;
328
- const rollbackErrors = [];
329
-
330
- // Remove any partially replaced files before restoring backups.
331
- for (const target of replacedTargets.reverse()) {
332
- await removeIfExists(target);
333
- }
334
-
335
- // Restore original files from backups.
336
- for (const backup of backups.reverse()) {
337
- try {
338
- await promoteFileWithFallback(backup.backupPath, backup.target, renameOptions, metrics);
339
- } catch (restoreErr) {
340
- rollbackErrors.push(
341
- `restore ${path.basename(backup.target)} failed: ${restoreErr.message}`
342
- );
343
- }
344
- }
345
- if (rollbackErrors.length > 0) {
346
- metrics.rollbackRestoreFailureCount = rollbackErrors.length;
347
- }
348
-
349
- // Clean up temp files left from this failed write attempt.
350
- await Promise.all(filePairs.map((pair) => removeIfExists(pair.source)));
351
-
352
- if (rollbackErrors.length > 0) {
353
- error.message = `${error.message}. Rollback issues: ${rollbackErrors.join('; ')}`;
354
- }
355
- throw error;
356
- } finally {
357
- // Best-effort cleanup for any backup remnants after success/rollback.
358
- await Promise.all(backups.map((backup) => removeIfExists(backup.backupPath)));
359
- await updateTelemetry(cacheDir, (telemetry) => {
360
- telemetry.totals.atomicReplaceAttempts += metrics.atomicReplaceAttempts;
361
- telemetry.totals.atomicReplaceSuccesses += metrics.atomicReplaceSuccesses;
362
- telemetry.totals.atomicReplaceFailures += metrics.atomicReplaceFailures;
363
- telemetry.totals.renameRetryCount += metrics.renameRetryCount;
364
- telemetry.totals.fallbackCopyCount += metrics.fallbackCopyCount;
365
- telemetry.totals.rollbackCount += metrics.rollbackCount;
366
- telemetry.totals.rollbackRestoreFailureCount += metrics.rollbackRestoreFailureCount;
367
- telemetry.lastAtomicReplace = {
368
- at: new Date().toISOString(),
369
- success: metrics.atomicReplaceSuccesses > 0,
370
- renameRetryCount: metrics.renameRetryCount,
371
- fallbackCopyCount: metrics.fallbackCopyCount,
372
- rollbackCount: metrics.rollbackCount,
373
- rollbackRestoreFailureCount: metrics.rollbackRestoreFailureCount,
374
- };
375
- if (operationError) {
376
- telemetry.lastError = {
377
- at: new Date().toISOString(),
378
- message: operationError.message,
379
- };
380
- }
381
- });
382
- }
383
- }
384
-
385
- /**
386
- * Custom error for binary store corruption.
387
- * Allows cache layer to distinguish corruption from other load failures.
388
- */
389
- export class BinaryStoreCorruptionError extends Error {
390
- constructor(message) {
391
- super(message);
392
- this.name = 'BinaryStoreCorruptionError';
393
- }
394
- }
395
-
396
- function writeMagic(buffer, magic) {
397
- buffer.write(magic, 0, 'ascii');
398
- }
399
-
400
- function readMagic(buffer) {
401
- return buffer.toString('ascii', 0, 4);
402
- }
403
-
404
- function ensureLittleEndian() {
405
- if (os.endianness() !== 'LE') {
406
- throw new Error('Binary vector store requires little-endian architecture');
407
- }
408
- }
409
-
410
- function getDataView(buffer) {
411
- return new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
412
- }
413
-
414
- /**
415
- * Generate a random writeId shared across all files in a single write operation.
416
- */
417
- function generateWriteId() {
418
- return crypto.randomInt(1, 0xFFFFFFFF);
419
- }
420
-
421
- /**
422
- * Compute CRC32 checksum over a buffer.
423
- */
159
+
160
+ export async function cleanupStaleBinaryArtifacts(
161
+ cacheDir,
162
+ { minAgeMs = STARTUP_TMP_CLEANUP_MIN_AGE_MS, logger = null } = {}
163
+ ) {
164
+ const result = {
165
+ cacheDir,
166
+ scanned: 0,
167
+ removed: 0,
168
+ skippedActive: 0,
169
+ removedFiles: [],
170
+ };
171
+
172
+ let entries = [];
173
+ try {
174
+ entries = await fs.readdir(cacheDir, { withFileTypes: true });
175
+ } catch {
176
+ return result;
177
+ }
178
+
179
+ const now = Date.now();
180
+ for (const entry of entries) {
181
+ const fileName = typeof entry === 'string' ? entry : entry?.name;
182
+ if (!fileName) continue;
183
+ const isFileEntry = typeof entry === 'string' ? true : entry?.isFile?.() === true;
184
+ if (!isFileEntry) continue;
185
+ if (!isBinaryTempArtifact(fileName)) continue;
186
+ result.scanned += 1;
187
+
188
+ const fullPath = path.join(cacheDir, fileName);
189
+ const stats = await fs.stat(fullPath).catch(() => null);
190
+ if (!stats) continue;
191
+
192
+ const ageMs = now - stats.mtimeMs;
193
+ const ownerPid = parsePidFromBinaryArtifact(fileName);
194
+ if (ownerPid && isProcessRunning(ownerPid)) {
195
+ result.skippedActive += 1;
196
+ continue;
197
+ }
198
+ if (ageMs < minAgeMs) continue;
199
+
200
+ await fs.rm(fullPath, { force: true }).catch(() => {});
201
+ result.removed += 1;
202
+ result.removedFiles.push(fileName);
203
+ }
204
+
205
+ await updateTelemetry(cacheDir, (telemetry) => {
206
+ telemetry.totals.startupCleanupRuns += 1;
207
+ telemetry.totals.staleTempFilesRemoved += result.removed;
208
+ telemetry.totals.staleTempFilesSkippedActive += result.skippedActive;
209
+ telemetry.lastStartupCleanup = {
210
+ at: new Date().toISOString(),
211
+ scanned: result.scanned,
212
+ removed: result.removed,
213
+ skippedActive: result.skippedActive,
214
+ };
215
+ });
216
+
217
+ if (logger && result.removed > 0) {
218
+ logger.info(
219
+ `[Cache] Startup temp cleanup removed ${result.removed} stale artifact(s) from ${cacheDir}`
220
+ );
221
+ }
222
+
223
+ return result;
224
+ }
225
+
226
+ function isRetryableRenameError(err) {
227
+ return RETRYABLE_RENAME_ERRORS.has(err?.code);
228
+ }
229
+
230
+ async function renameWithRetry(
231
+ source,
232
+ target,
233
+ { retries = 12, delayMs = 50, maxDelayMs = 1000 } = {}
234
+ ) {
235
+ let attempt = 0;
236
+ let delay = delayMs;
237
+ while (true) {
238
+ try {
239
+ await fs.rename(source, target);
240
+ return attempt;
241
+ } catch (err) {
242
+ if (!isRetryableRenameError(err) || attempt >= retries) {
243
+ err.renameRetryCount = attempt;
244
+ throw err;
245
+ }
246
+ await new Promise((resolve) => setTimeout(resolve, delay));
247
+ attempt += 1;
248
+ delay = Math.min(delay * 2, maxDelayMs);
249
+ }
250
+ }
251
+ }
252
+
253
+ async function pathExists(filePath) {
254
+ try {
255
+ await fs.access(filePath);
256
+ return true;
257
+ } catch {
258
+ return false;
259
+ }
260
+ }
261
+
262
+ async function removeIfExists(filePath) {
263
+ await fs.rm(filePath, { force: true }).catch(() => {});
264
+ }
265
+
266
+ async function promoteFileWithFallback(source, target, renameOptions = {}, metrics = null) {
267
+ try {
268
+ const retriesUsed = await renameWithRetry(source, target, renameOptions);
269
+ addToMetric(metrics, 'renameRetryCount', retriesUsed);
270
+ return;
271
+ } catch (renameError) {
272
+ const retriesUsed = Number.isFinite(renameError?.renameRetryCount)
273
+ ? renameError.renameRetryCount
274
+ : 0;
275
+ addToMetric(metrics, 'renameRetryCount', retriesUsed);
276
+ if (!isRetryableRenameError(renameError)) {
277
+ throw renameError;
278
+ }
279
+
280
+ try {
281
+ await fs.copyFile(source, target);
282
+ await removeIfExists(source);
283
+ addToMetric(metrics, 'fallbackCopyCount', 1);
284
+ return;
285
+ } catch (copyError) {
286
+ const wrapped = new Error(
287
+ `rename failed (${renameError.message}); fallback copy failed (${copyError.message})`
288
+ );
289
+ wrapped.code = copyError?.code || renameError?.code;
290
+ throw wrapped;
291
+ }
292
+ }
293
+ }
294
+
295
+ async function replaceFilesAtomically(filePairs, renameOptions = {}) {
296
+ const metrics = createTelemetryTotals();
297
+ metrics.atomicReplaceAttempts = 1;
298
+ const cacheDir = filePairs.length > 0 ? path.dirname(filePairs[0].target) : null;
299
+ const backupSuffix = `.bak-${process.pid}-${Date.now()}`;
300
+ const backups = [];
301
+ const replacedTargets = [];
302
+ let operationError = null;
303
+
304
+ try {
305
+ // Stage current files as backups first. If this fails, nothing is replaced.
306
+ for (const pair of filePairs) {
307
+ if (!(await pathExists(pair.target))) continue;
308
+ const backupPath = `${pair.target}${backupSuffix}`;
309
+ await removeIfExists(backupPath);
310
+ await promoteFileWithFallback(pair.target, backupPath, renameOptions, metrics);
311
+ backups.push({ target: pair.target, backupPath });
312
+ }
313
+
314
+ // Replace targets with new temp files.
315
+ for (const pair of filePairs) {
316
+ await promoteFileWithFallback(pair.source, pair.target, renameOptions, metrics);
317
+ replacedTargets.push(pair.target);
318
+ }
319
+ metrics.atomicReplaceSuccesses = 1;
320
+ } catch (error) {
321
+ operationError = error;
322
+ metrics.atomicReplaceFailures = 1;
323
+ metrics.rollbackCount = 1;
324
+ const rollbackErrors = [];
325
+
326
+ // Remove any partially replaced files before restoring backups.
327
+ for (const target of replacedTargets.reverse()) {
328
+ await removeIfExists(target);
329
+ }
330
+
331
+ // Restore original files from backups.
332
+ for (const backup of backups.reverse()) {
333
+ try {
334
+ await promoteFileWithFallback(backup.backupPath, backup.target, renameOptions, metrics);
335
+ } catch (restoreErr) {
336
+ rollbackErrors.push(
337
+ `restore ${path.basename(backup.target)} failed: ${restoreErr.message}`
338
+ );
339
+ }
340
+ }
341
+ if (rollbackErrors.length > 0) {
342
+ metrics.rollbackRestoreFailureCount = rollbackErrors.length;
343
+ }
344
+
345
+ // Clean up temp files left from this failed write attempt.
346
+ await Promise.all(filePairs.map((pair) => removeIfExists(pair.source)));
347
+
348
+ if (rollbackErrors.length > 0) {
349
+ error.message = `${error.message}. Rollback issues: ${rollbackErrors.join('; ')}`;
350
+ }
351
+ throw error;
352
+ } finally {
353
+ // Best-effort cleanup for any backup remnants after success/rollback.
354
+ await Promise.all(backups.map((backup) => removeIfExists(backup.backupPath)));
355
+ await updateTelemetry(cacheDir, (telemetry) => {
356
+ telemetry.totals.atomicReplaceAttempts += metrics.atomicReplaceAttempts;
357
+ telemetry.totals.atomicReplaceSuccesses += metrics.atomicReplaceSuccesses;
358
+ telemetry.totals.atomicReplaceFailures += metrics.atomicReplaceFailures;
359
+ telemetry.totals.renameRetryCount += metrics.renameRetryCount;
360
+ telemetry.totals.fallbackCopyCount += metrics.fallbackCopyCount;
361
+ telemetry.totals.rollbackCount += metrics.rollbackCount;
362
+ telemetry.totals.rollbackRestoreFailureCount += metrics.rollbackRestoreFailureCount;
363
+ telemetry.lastAtomicReplace = {
364
+ at: new Date().toISOString(),
365
+ success: metrics.atomicReplaceSuccesses > 0,
366
+ renameRetryCount: metrics.renameRetryCount,
367
+ fallbackCopyCount: metrics.fallbackCopyCount,
368
+ rollbackCount: metrics.rollbackCount,
369
+ rollbackRestoreFailureCount: metrics.rollbackRestoreFailureCount,
370
+ };
371
+ if (operationError) {
372
+ telemetry.lastError = {
373
+ at: new Date().toISOString(),
374
+ message: operationError.message,
375
+ };
376
+ }
377
+ });
378
+ }
379
+ }
380
+
381
+ /**
382
+ * Custom error for binary store corruption.
383
+ * Allows cache layer to distinguish corruption from other load failures.
384
+ */
385
+ export class BinaryStoreCorruptionError extends Error {
386
+ constructor(message) {
387
+ super(message);
388
+ this.name = 'BinaryStoreCorruptionError';
389
+ }
390
+ }
391
+
392
+ function writeMagic(buffer, magic) {
393
+ buffer.write(magic, 0, 'ascii');
394
+ }
395
+
396
+ function readMagic(buffer) {
397
+ return buffer.toString('ascii', 0, 4);
398
+ }
399
+
400
+ function ensureLittleEndian() {
401
+ if (os.endianness() !== 'LE') {
402
+ throw new Error('Binary vector store requires little-endian architecture');
403
+ }
404
+ }
405
+
406
+ function getDataView(buffer) {
407
+ return new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
408
+ }
409
+
410
+ /**
411
+ * Generate a random writeId shared across all files in a single write operation.
412
+ */
413
+ function generateWriteId() {
414
+ return crypto.randomInt(1, 0xffffffff);
415
+ }
416
+
417
+ /**
418
+ * Compute CRC32 checksum over a buffer.
419
+ */
424
420
  function computeCrc32(buffer, initial) {
425
421
  return initial !== undefined ? crc32(buffer, initial) >>> 0 : crc32(buffer) >>> 0;
426
422
  }
@@ -483,180 +479,174 @@ async function writeHeaderCrc(handle, crcValue) {
483
479
  crcView.setUint32(0, crcValue >>> 0, true);
484
480
  await handle.write(crcBuffer, 0, crcBuffer.length, 20);
485
481
  }
486
-
487
- function readHeader(buffer, magic, headerSize) {
488
- if (buffer.length < headerSize) {
489
- throw new BinaryStoreCorruptionError('Binary store header is truncated');
490
- }
491
- const actualMagic = readMagic(buffer);
492
- if (actualMagic !== magic) {
493
- throw new BinaryStoreCorruptionError(`Invalid binary store magic (${actualMagic})`);
494
- }
495
- const view = getDataView(buffer);
496
- const version = view.getUint32(4, true);
497
- if (version !== STORE_VERSION) {
498
- throw new Error(`Unsupported binary store version (${version})`);
499
- }
500
- return view;
501
- }
502
-
503
- function writeVectorsHeader(buffer, dim, count, writeId) {
504
- writeMagic(buffer, MAGIC_VECTORS);
505
- const view = getDataView(buffer);
506
- view.setUint32(4, STORE_VERSION, true);
507
- view.setUint32(8, dim, true);
508
- view.setUint32(12, count, true);
509
- view.setUint32(16, writeId, true);
510
- view.setUint32(20, 0, true); // CRC32 placeholder — filled after payload write
511
- // bytes 24-31: reserved
512
- }
513
-
514
- function writeRecordsHeader(buffer, count, fileCount, writeId) {
515
- writeMagic(buffer, MAGIC_RECORDS);
516
- const view = getDataView(buffer);
517
- view.setUint32(4, STORE_VERSION, true);
518
- view.setUint32(8, count, true);
519
- view.setUint32(12, fileCount, true);
520
- view.setUint32(16, writeId, true);
521
- view.setUint32(20, 0, true); // CRC32 placeholder
522
- // bytes 24-31: reserved
523
- }
524
-
525
- function writeContentHeader(buffer, totalBytes, writeId) {
526
- writeMagic(buffer, MAGIC_CONTENT);
527
- const view = getDataView(buffer);
528
- view.setUint32(4, STORE_VERSION, true);
529
- const value = BigInt(totalBytes);
530
- view.setBigUint64(8, value, true);
531
- view.setUint32(16, writeId, true);
532
- view.setUint32(20, 0, true); // CRC32 placeholder
533
- // bytes 24-31: reserved
534
- }
535
-
536
- function readBigUint(view, offset) {
537
- const value = view.getBigUint64(offset, true);
538
- if (value > BigInt(Number.MAX_SAFE_INTEGER)) {
539
- throw new Error('Binary store content offset exceeds safe integer range');
540
- }
541
- return Number(value);
542
- }
543
-
544
- function normalizeContent(value) {
545
- if (value === null || value === undefined) return '';
546
- if (typeof value !== 'string') return String(value);
547
- return value;
548
- }
549
-
550
- export class BinaryVectorStore {
551
- constructor({
552
- vectorsBuffer,
553
- recordsBuffer,
554
- vectorsHandle,
555
- vectorsFd,
556
- contentHandle,
557
- contentBuffer,
558
- contentSize,
559
- files,
560
- dim,
561
- count,
562
- contentCacheEntries,
563
- vectorCacheEntries,
564
- }) {
565
- this.vectorsBuffer = vectorsBuffer;
566
- this.recordsBuffer = recordsBuffer;
567
- this.vectorsHandle = vectorsHandle ?? null;
568
- this.vectorsFd = Number.isInteger(vectorsFd) ? vectorsFd : null;
569
- this.contentHandle = contentHandle ?? null;
570
- this.contentBuffer = contentBuffer ?? null;
571
- this.contentSize = Number.isFinite(contentSize)
572
- ? contentSize
573
- : contentBuffer
574
- ? Math.max(0, contentBuffer.length - CONTENT_HEADER_SIZE)
575
- : 0;
576
- this.files = files;
577
- this.dim = dim;
578
- this.count = count;
579
- this.contentCacheEntries = Number.isInteger(contentCacheEntries) ? contentCacheEntries : 256;
580
- this.contentCache = new Map();
581
- this.vectorCacheEntries = Number.isInteger(vectorCacheEntries) ? vectorCacheEntries : 0;
582
- this.vectorCache = new Map();
583
-
584
- this.vectorDataOffset = VECTOR_HEADER_SIZE;
585
- this.recordDataOffset = RECORD_HEADER_SIZE;
586
- this.contentDataOffset = CONTENT_HEADER_SIZE;
587
- }
588
-
589
- async close() {
590
- this.contentCache.clear();
591
- this.vectorCache.clear();
592
- this.vectorsBuffer = null;
593
- this.recordsBuffer = null;
594
- this.contentBuffer = null;
595
- this.files = null;
596
- if (this.vectorsHandle) {
597
- try {
598
- await this.vectorsHandle.close();
599
- } catch {
600
-
601
- }
602
- }
603
- this.vectorsHandle = null;
604
- if (Number.isInteger(this.vectorsFd)) {
605
- try {
606
- fsSync.closeSync(this.vectorsFd);
607
- } catch {
608
-
609
- }
610
- }
611
- this.vectorsFd = null;
612
- if (this.contentHandle) {
613
- try {
614
- await this.contentHandle.close();
615
- } catch {
616
-
617
- }
618
- }
619
- this.contentHandle = null;
620
- }
621
-
622
- static getPaths(cacheDir) {
623
- return {
624
- vectorsPath: path.join(cacheDir, VECTORS_FILE),
625
- recordsPath: path.join(cacheDir, RECORDS_FILE),
626
- contentPath: path.join(cacheDir, CONTENT_FILE),
627
- filesPath: path.join(cacheDir, FILES_FILE),
628
- };
629
- }
630
-
631
- static async load(cacheDir, { contentCacheEntries, vectorCacheEntries, vectorLoadMode } = {}) {
632
- ensureLittleEndian();
633
- const { vectorsPath, recordsPath, contentPath, filesPath } =
634
- BinaryVectorStore.getPaths(cacheDir);
635
-
636
- let contentReadHandle = null;
637
- let vectorsFd = null;
638
-
639
- try {
640
- const loadVectorsFromDisk = String(vectorLoadMode).toLowerCase() === 'disk';
641
- let vectorsBuffer = null;
642
-
643
- const [recordsBuffer, filesRaw] = await Promise.all([
644
- fs.readFile(recordsPath),
645
- fs.readFile(filesPath, 'utf-8'),
646
- ]);
647
-
648
- if (loadVectorsFromDisk) {
649
- vectorsFd = fsSync.openSync(vectorsPath, 'r');
650
- const headerBuffer = Buffer.alloc(VECTOR_HEADER_SIZE);
651
- const bytesRead = fsSync.readSync(vectorsFd, headerBuffer, 0, VECTOR_HEADER_SIZE, 0);
652
- if (bytesRead < VECTOR_HEADER_SIZE) {
653
- throw new Error('Binary store vectors header is truncated');
654
- }
655
- vectorsBuffer = headerBuffer;
656
- } else {
657
- vectorsBuffer = await fs.readFile(vectorsPath);
658
- }
659
-
482
+
483
+ function readHeader(buffer, magic, headerSize) {
484
+ if (buffer.length < headerSize) {
485
+ throw new BinaryStoreCorruptionError('Binary store header is truncated');
486
+ }
487
+ const actualMagic = readMagic(buffer);
488
+ if (actualMagic !== magic) {
489
+ throw new BinaryStoreCorruptionError(`Invalid binary store magic (${actualMagic})`);
490
+ }
491
+ const view = getDataView(buffer);
492
+ const version = view.getUint32(4, true);
493
+ if (version !== STORE_VERSION) {
494
+ throw new Error(`Unsupported binary store version (${version})`);
495
+ }
496
+ return view;
497
+ }
498
+
499
+ function writeVectorsHeader(buffer, dim, count, writeId) {
500
+ writeMagic(buffer, MAGIC_VECTORS);
501
+ const view = getDataView(buffer);
502
+ view.setUint32(4, STORE_VERSION, true);
503
+ view.setUint32(8, dim, true);
504
+ view.setUint32(12, count, true);
505
+ view.setUint32(16, writeId, true);
506
+ view.setUint32(20, 0, true); // CRC32 placeholder — filled after payload write
507
+ // bytes 24-31: reserved
508
+ }
509
+
510
+ function writeRecordsHeader(buffer, count, fileCount, writeId) {
511
+ writeMagic(buffer, MAGIC_RECORDS);
512
+ const view = getDataView(buffer);
513
+ view.setUint32(4, STORE_VERSION, true);
514
+ view.setUint32(8, count, true);
515
+ view.setUint32(12, fileCount, true);
516
+ view.setUint32(16, writeId, true);
517
+ view.setUint32(20, 0, true); // CRC32 placeholder
518
+ // bytes 24-31: reserved
519
+ }
520
+
521
+ function writeContentHeader(buffer, totalBytes, writeId) {
522
+ writeMagic(buffer, MAGIC_CONTENT);
523
+ const view = getDataView(buffer);
524
+ view.setUint32(4, STORE_VERSION, true);
525
+ const value = BigInt(totalBytes);
526
+ view.setBigUint64(8, value, true);
527
+ view.setUint32(16, writeId, true);
528
+ view.setUint32(20, 0, true); // CRC32 placeholder
529
+ // bytes 24-31: reserved
530
+ }
531
+
532
+ function readBigUint(view, offset) {
533
+ const value = view.getBigUint64(offset, true);
534
+ if (value > BigInt(Number.MAX_SAFE_INTEGER)) {
535
+ throw new Error('Binary store content offset exceeds safe integer range');
536
+ }
537
+ return Number(value);
538
+ }
539
+
540
+ function normalizeContent(value) {
541
+ if (value === null || value === undefined) return '';
542
+ if (typeof value !== 'string') return String(value);
543
+ return value;
544
+ }
545
+
546
+ export class BinaryVectorStore {
547
+ constructor({
548
+ vectorsBuffer,
549
+ recordsBuffer,
550
+ vectorsHandle,
551
+ vectorsFd,
552
+ contentHandle,
553
+ contentBuffer,
554
+ contentSize,
555
+ files,
556
+ dim,
557
+ count,
558
+ contentCacheEntries,
559
+ vectorCacheEntries,
560
+ }) {
561
+ this.vectorsBuffer = vectorsBuffer;
562
+ this.recordsBuffer = recordsBuffer;
563
+ this.vectorsHandle = vectorsHandle ?? null;
564
+ this.vectorsFd = Number.isInteger(vectorsFd) ? vectorsFd : null;
565
+ this.contentHandle = contentHandle ?? null;
566
+ this.contentBuffer = contentBuffer ?? null;
567
+ this.contentSize = Number.isFinite(contentSize)
568
+ ? contentSize
569
+ : contentBuffer
570
+ ? Math.max(0, contentBuffer.length - CONTENT_HEADER_SIZE)
571
+ : 0;
572
+ this.files = files;
573
+ this.dim = dim;
574
+ this.count = count;
575
+ this.contentCacheEntries = Number.isInteger(contentCacheEntries) ? contentCacheEntries : 256;
576
+ this.contentCache = new Map();
577
+ this.vectorCacheEntries = Number.isInteger(vectorCacheEntries) ? vectorCacheEntries : 0;
578
+ this.vectorCache = new Map();
579
+
580
+ this.vectorDataOffset = VECTOR_HEADER_SIZE;
581
+ this.recordDataOffset = RECORD_HEADER_SIZE;
582
+ this.contentDataOffset = CONTENT_HEADER_SIZE;
583
+ }
584
+
585
+ async close() {
586
+ this.contentCache.clear();
587
+ this.vectorCache.clear();
588
+ this.vectorsBuffer = null;
589
+ this.recordsBuffer = null;
590
+ this.contentBuffer = null;
591
+ this.files = null;
592
+ if (this.vectorsHandle) {
593
+ try {
594
+ await this.vectorsHandle.close();
595
+ } catch {}
596
+ }
597
+ this.vectorsHandle = null;
598
+ if (Number.isInteger(this.vectorsFd)) {
599
+ try {
600
+ fsSync.closeSync(this.vectorsFd);
601
+ } catch {}
602
+ }
603
+ this.vectorsFd = null;
604
+ if (this.contentHandle) {
605
+ try {
606
+ await this.contentHandle.close();
607
+ } catch {}
608
+ }
609
+ this.contentHandle = null;
610
+ }
611
+
612
+ static getPaths(cacheDir) {
613
+ return {
614
+ vectorsPath: path.join(cacheDir, VECTORS_FILE),
615
+ recordsPath: path.join(cacheDir, RECORDS_FILE),
616
+ contentPath: path.join(cacheDir, CONTENT_FILE),
617
+ filesPath: path.join(cacheDir, FILES_FILE),
618
+ };
619
+ }
620
+
621
+ static async load(cacheDir, { contentCacheEntries, vectorCacheEntries, vectorLoadMode } = {}) {
622
+ ensureLittleEndian();
623
+ const { vectorsPath, recordsPath, contentPath, filesPath } =
624
+ BinaryVectorStore.getPaths(cacheDir);
625
+
626
+ let contentReadHandle = null;
627
+ let vectorsFd = null;
628
+
629
+ try {
630
+ const loadVectorsFromDisk = String(vectorLoadMode).toLowerCase() === 'disk';
631
+ let vectorsBuffer = null;
632
+
633
+ const [recordsBuffer, filesRaw] = await Promise.all([
634
+ fs.readFile(recordsPath),
635
+ fs.readFile(filesPath, 'utf-8'),
636
+ ]);
637
+
638
+ if (loadVectorsFromDisk) {
639
+ vectorsFd = fsSync.openSync(vectorsPath, 'r');
640
+ const headerBuffer = Buffer.alloc(VECTOR_HEADER_SIZE);
641
+ const bytesRead = fsSync.readSync(vectorsFd, headerBuffer, 0, VECTOR_HEADER_SIZE, 0);
642
+ if (bytesRead < VECTOR_HEADER_SIZE) {
643
+ throw new Error('Binary store vectors header is truncated');
644
+ }
645
+ vectorsBuffer = headerBuffer;
646
+ } else {
647
+ vectorsBuffer = await fs.readFile(vectorsPath);
648
+ }
649
+
660
650
  const vectorsView = readHeader(vectorsBuffer, MAGIC_VECTORS, VECTOR_HEADER_SIZE);
661
651
  const dim = vectorsView.getUint32(8, true);
662
652
  const count = vectorsView.getUint32(12, true);
@@ -665,47 +655,49 @@ export class BinaryVectorStore {
665
655
  const vectorsPayloadBytes = count * dim * 4;
666
656
 
667
657
  const recordsView = readHeader(recordsBuffer, MAGIC_RECORDS, RECORD_HEADER_SIZE);
668
- const recordCount = recordsView.getUint32(8, true);
669
- const fileCount = recordsView.getUint32(12, true);
670
- const recordsWriteId = recordsView.getUint32(16, true);
671
- const recordsExpectedCrc = recordsView.getUint32(20, true);
672
-
673
- if (recordCount !== count) {
674
- throw new BinaryStoreCorruptionError(`Binary store count mismatch (${recordCount} != ${count})`);
675
- }
676
-
677
- // Validate writeId consistency between vectors and records
678
- if (vectorsWriteId !== recordsWriteId) {
679
- throw new BinaryStoreCorruptionError(
680
- `Binary store writeId mismatch: vectors=${vectorsWriteId}, records=${recordsWriteId}`
681
- );
682
- }
683
-
684
- contentReadHandle = await fs.open(contentPath, 'r');
685
- let totalContentBytes = 0;
686
-
687
- const headerBuffer = Buffer.alloc(CONTENT_HEADER_SIZE);
688
- const { bytesRead } = await contentReadHandle.read(headerBuffer, 0, CONTENT_HEADER_SIZE, 0);
689
- if (bytesRead < CONTENT_HEADER_SIZE) {
690
- throw new BinaryStoreCorruptionError('Binary store content header is truncated');
691
- }
692
- const contentView = readHeader(headerBuffer, MAGIC_CONTENT, CONTENT_HEADER_SIZE);
693
- totalContentBytes = readBigUint(contentView, 8);
694
- const contentWriteId = contentView.getUint32(16, true);
695
- const contentExpectedCrc = contentView.getUint32(20, true);
696
- const stats = await contentReadHandle.stat();
697
- const expectedContentSize = CONTENT_HEADER_SIZE + totalContentBytes;
698
- if (stats.size < expectedContentSize) {
699
- throw new BinaryStoreCorruptionError('Binary store content file truncated');
700
- }
701
-
702
- // Validate writeId consistency across all three files
703
- if (vectorsWriteId !== contentWriteId) {
704
- throw new BinaryStoreCorruptionError(
705
- `Binary store writeId mismatch: vectors=${vectorsWriteId}, content=${contentWriteId}`
706
- );
707
- }
708
-
658
+ const recordCount = recordsView.getUint32(8, true);
659
+ const fileCount = recordsView.getUint32(12, true);
660
+ const recordsWriteId = recordsView.getUint32(16, true);
661
+ const recordsExpectedCrc = recordsView.getUint32(20, true);
662
+
663
+ if (recordCount !== count) {
664
+ throw new BinaryStoreCorruptionError(
665
+ `Binary store count mismatch (${recordCount} != ${count})`
666
+ );
667
+ }
668
+
669
+ // Validate writeId consistency between vectors and records
670
+ if (vectorsWriteId !== recordsWriteId) {
671
+ throw new BinaryStoreCorruptionError(
672
+ `Binary store writeId mismatch: vectors=${vectorsWriteId}, records=${recordsWriteId}`
673
+ );
674
+ }
675
+
676
+ contentReadHandle = await fs.open(contentPath, 'r');
677
+ let totalContentBytes = 0;
678
+
679
+ const headerBuffer = Buffer.alloc(CONTENT_HEADER_SIZE);
680
+ const { bytesRead } = await contentReadHandle.read(headerBuffer, 0, CONTENT_HEADER_SIZE, 0);
681
+ if (bytesRead < CONTENT_HEADER_SIZE) {
682
+ throw new BinaryStoreCorruptionError('Binary store content header is truncated');
683
+ }
684
+ const contentView = readHeader(headerBuffer, MAGIC_CONTENT, CONTENT_HEADER_SIZE);
685
+ totalContentBytes = readBigUint(contentView, 8);
686
+ const contentWriteId = contentView.getUint32(16, true);
687
+ const contentExpectedCrc = contentView.getUint32(20, true);
688
+ const stats = await contentReadHandle.stat();
689
+ const expectedContentSize = CONTENT_HEADER_SIZE + totalContentBytes;
690
+ if (stats.size < expectedContentSize) {
691
+ throw new BinaryStoreCorruptionError('Binary store content file truncated');
692
+ }
693
+
694
+ // Validate writeId consistency across all three files
695
+ if (vectorsWriteId !== contentWriteId) {
696
+ throw new BinaryStoreCorruptionError(
697
+ `Binary store writeId mismatch: vectors=${vectorsWriteId}, content=${contentWriteId}`
698
+ );
699
+ }
700
+
709
701
  // Validate CRC32 for records payload
710
702
  const recordsPayload = recordsBuffer.subarray(RECORD_HEADER_SIZE);
711
703
  const recordsActualCrc = computeCrc32(recordsPayload);
@@ -766,380 +758,370 @@ export class BinaryVectorStore {
766
758
  `Binary store content CRC32 mismatch (expected ${contentExpectedCrc}, got 0)`
767
759
  );
768
760
  }
769
-
761
+
770
762
  const filesData = JSON.parse(filesRaw);
771
- // Support new format { writeId, files } and legacy raw array
772
- let files;
773
- let filesWriteId = null;
774
- if (filesData && !Array.isArray(filesData) && Array.isArray(filesData.files)) {
775
- files = filesData.files;
776
- filesWriteId = filesData.writeId ?? null;
777
- } else if (Array.isArray(filesData)) {
778
- files = filesData;
779
- } else {
780
- throw new BinaryStoreCorruptionError('Binary store file table is invalid');
781
- }
782
-
783
- if (files.length !== fileCount) {
784
- throw new BinaryStoreCorruptionError(
785
- `Binary store file table count mismatch (${files.length} != ${fileCount})`
786
- );
787
- }
788
-
789
- // Validate writeId from files.json if present
790
- if (filesWriteId !== null && filesWriteId !== vectorsWriteId) {
791
- throw new BinaryStoreCorruptionError(
792
- `Binary store writeId mismatch: vectors=${vectorsWriteId}, files.json=${filesWriteId}`
793
- );
794
- }
795
-
796
- return new BinaryVectorStore({
797
- vectorsBuffer,
798
- recordsBuffer,
799
- vectorsHandle: null,
800
- vectorsFd,
801
- contentHandle: contentReadHandle,
802
- contentSize: totalContentBytes,
803
- files,
804
- dim,
805
- count,
806
- contentCacheEntries,
807
- vectorCacheEntries,
808
- });
809
- } catch (err) {
810
- if (contentReadHandle) await contentReadHandle.close().catch(() => {});
811
- if (Number.isInteger(vectorsFd)) {
812
- try {
813
- fsSync.closeSync(vectorsFd);
814
- } catch {
815
-
816
- }
817
- }
818
- throw err;
819
- }
820
- }
821
-
822
- get length() {
823
- return this.count;
824
- }
825
-
826
- getRecord(index) {
827
- if (index < 0 || index >= this.count) return null;
828
- const offset = this.recordDataOffset + index * RECORD_SIZE;
829
- const view = getDataView(this.recordsBuffer);
830
-
831
- const fileId = view.getUint32(offset, true);
832
- const startLine = view.getUint32(offset + 4, true);
833
- const endLine = view.getUint32(offset + 8, true);
834
- const contentOffset = readBigUint(view, offset + 12);
835
- const contentLength = view.getUint32(offset + 20, true);
836
-
837
- return {
838
- fileId,
839
- file: this.files[fileId],
840
- startLine,
841
- endLine,
842
- contentOffset,
843
- contentLength,
844
- };
845
- }
846
-
847
- getVector(index) {
848
- if (index < 0 || index >= this.count) return null;
849
- if (this.vectorCacheEntries > 0) {
850
- const cached = this.vectorCache.get(index);
851
- if (cached) {
852
- this.vectorCache.delete(index);
853
- this.vectorCache.set(index, cached);
854
- return cached;
855
- }
856
- }
857
-
858
- const offset = this.vectorDataOffset + index * this.dim * 4;
859
- const byteLength = this.dim * 4;
860
- let vector = null;
861
-
862
- if (this.vectorsBuffer && this.vectorsBuffer.length >= this.vectorDataOffset + byteLength) {
863
- vector = new Float32Array(
864
- this.vectorsBuffer.buffer,
865
- this.vectorsBuffer.byteOffset + offset,
866
- this.dim
867
- );
868
- } else if (Number.isInteger(this.vectorsFd)) {
869
-
870
-
871
- const buffer = Buffer.alloc(byteLength);
872
- const bytesRead = fsSync.readSync(this.vectorsFd, buffer, 0, byteLength, offset);
873
- if (bytesRead === byteLength) {
874
- vector = new Float32Array(buffer.buffer, buffer.byteOffset, this.dim);
875
- }
876
- }
877
-
878
- if (vector && this.vectorCacheEntries > 0) {
879
- this.vectorCache.set(index, vector);
880
- if (this.vectorCache.size > this.vectorCacheEntries) {
881
- const firstKey = this.vectorCache.keys().next().value;
882
- this.vectorCache.delete(firstKey);
883
- }
884
- }
885
-
886
- return vector;
887
- }
888
-
889
- async getContent(index) {
890
- if (index < 0 || index >= this.count) return null;
891
- if (this.contentCacheEntries > 0) {
892
- const cached = this.contentCache.get(index);
893
- if (cached !== undefined) {
894
- this.contentCache.delete(index);
895
- this.contentCache.set(index, cached);
896
- return cached;
897
- }
898
- }
899
-
900
- const record = this.getRecord(index);
901
- if (!record || record.contentLength === 0) return '';
902
- const contentLimit = record.contentOffset + record.contentLength;
903
- if (Number.isFinite(this.contentSize) && contentLimit > this.contentSize) {
904
- return '';
905
- }
906
-
907
- let content = '';
908
- if (this.contentBuffer) {
909
- const start = this.contentDataOffset + record.contentOffset;
910
- const end = start + record.contentLength;
911
- content = this.contentBuffer.slice(start, end).toString('utf-8');
912
- } else if (this.contentHandle) {
913
- const start = this.contentDataOffset + record.contentOffset;
914
- const length = record.contentLength;
915
- const buffer = Buffer.alloc(length);
916
- const { bytesRead } = await this.contentHandle.read(buffer, 0, length, start);
917
- content = buffer.slice(0, bytesRead).toString('utf-8');
918
- } else {
919
- return '';
920
- }
921
-
922
- if (this.contentCacheEntries > 0) {
923
- this.contentCache.set(index, content);
924
- if (this.contentCache.size > this.contentCacheEntries) {
925
- const firstKey = this.contentCache.keys().next().value;
926
- this.contentCache.delete(firstKey);
927
- }
928
- }
929
-
930
- return content;
931
- }
932
-
933
- async toChunkViews({ includeContent = false, includeVector = true } = {}) {
934
- const chunks = new Array(this.count);
935
- for (let i = 0; i < this.count; i += 1) {
936
- const record = this.getRecord(i);
937
- if (!record) continue;
938
- const chunk = {
939
- file: record.file,
940
- startLine: record.startLine,
941
- endLine: record.endLine,
942
- _index: i,
943
- _binaryIndex: i,
944
- };
945
- if (includeVector) {
946
- chunk.vector = this.getVector(i);
947
- }
948
- if (includeContent) {
949
- chunk.content = await this.getContent(i);
950
- }
951
- chunks[i] = chunk;
952
- }
953
- return chunks;
954
- }
955
-
956
- getAllFileIndices() {
957
- const map = new Map();
958
- for (let i = 0; i < this.count; i++) {
959
- const record = this.getRecord(i);
960
- if (record) {
961
- let list = map.get(record.file);
962
- if (!list) {
963
- list = [];
964
- map.set(record.file, list);
965
- }
966
- list.push(i);
967
- }
968
- }
969
- return map;
970
- }
971
-
972
- static async write(
973
- cacheDir,
974
- chunks,
975
- {
976
- contentCacheEntries,
977
- vectorCacheEntries,
978
- vectorLoadMode,
979
- getContent,
980
- getVector,
981
- preRename,
982
- renameOptions,
983
- } = {}
984
- ) {
985
- ensureLittleEndian();
986
- const { vectorsPath, recordsPath, contentPath, filesPath } =
987
- BinaryVectorStore.getPaths(cacheDir);
988
-
989
- const tmpSuffix = `.tmp-${process.pid}`;
990
- const vectorsTmp = `${vectorsPath}${tmpSuffix}`;
991
- const recordsTmp = `${recordsPath}${tmpSuffix}`;
992
- const contentTmp = `${contentPath}${tmpSuffix}`;
993
- const filesTmp = `${filesPath}${tmpSuffix}`;
994
-
995
- const fileIds = new Map();
996
- const files = [];
997
- const denseChunks = [];
998
- const denseSourceIndices = [];
999
- for (let i = 0; i < chunks.length; i += 1) {
1000
- const chunk = chunks[i];
1001
- if (!chunk) continue;
1002
- denseChunks.push(chunk);
1003
- denseSourceIndices.push(i);
1004
- }
1005
-
1006
- const resolveVector = async (chunk, sourceIndex) => {
1007
- let vectorSource = chunk.vector;
1008
- if (
1009
- (vectorSource === undefined || vectorSource === null) &&
1010
- typeof getVector === 'function'
1011
- ) {
1012
- vectorSource = getVector(chunk, sourceIndex);
1013
- if (vectorSource && typeof vectorSource.then === 'function') {
1014
- vectorSource = await vectorSource;
1015
- }
1016
- }
1017
- if (vectorSource === undefined || vectorSource === null) {
1018
- throw new Error(`Missing vector data for binary cache write at index ${sourceIndex}`);
1019
- }
1020
- const vector =
1021
- vectorSource instanceof Float32Array
1022
- ? vectorSource
1023
- : ArrayBuffer.isView(vectorSource)
1024
- ? Float32Array.from(vectorSource)
1025
- : new Float32Array(vectorSource);
1026
- if (!vector || vector.length === 0) {
1027
- throw new Error(`Empty vector data for binary cache write at index ${sourceIndex}`);
1028
- }
1029
- return vector;
1030
- };
1031
-
1032
- const resolveContent = async (chunk, sourceIndex) => {
1033
- const contentSource =
1034
- chunk.content !== undefined && chunk.content !== null
1035
- ? chunk.content
1036
- : getContent
1037
- ? await getContent(chunk, sourceIndex)
1038
- : '';
1039
- return normalizeContent(contentSource);
1040
- };
1041
-
1042
- const recordEntries = new Array(denseChunks.length);
1043
- let contentOffset = 0;
1044
-
1045
- for (let i = 0; i < denseChunks.length; i += 1) {
1046
- const chunk = denseChunks[i];
1047
- const sourceIndex = denseSourceIndices[i];
1048
-
1049
- const file = chunk.file;
1050
- if (!fileIds.has(file)) {
1051
- fileIds.set(file, files.length);
1052
- files.push(file);
1053
- }
1054
-
1055
- const contentValue = await resolveContent(chunk, sourceIndex);
1056
- const contentLength = Buffer.byteLength(contentValue, 'utf-8');
1057
-
1058
- recordEntries[i] = {
1059
- fileId: fileIds.get(file),
1060
- startLine: chunk.startLine ?? 0,
1061
- endLine: chunk.endLine ?? 0,
1062
- contentOffset,
1063
- contentLength,
1064
- };
1065
-
1066
- contentOffset += contentLength;
1067
- }
1068
-
1069
- const count = denseChunks.length;
1070
- const dim =
1071
- count > 0 ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length : 0;
1072
-
1073
- const writeId = generateWriteId();
1074
-
1075
- await fs.writeFile(filesTmp, JSON.stringify({ writeId, files }));
1076
-
1077
- let vectorsHandle = null;
1078
- let recordsHandle = null;
1079
- let contentHandle = null;
1080
-
1081
- try {
1082
- vectorsHandle = await fs.open(vectorsTmp, 'w');
1083
- recordsHandle = await fs.open(recordsTmp, 'w');
1084
- contentHandle = await fs.open(contentTmp, 'w');
1085
-
1086
- const vectorsHeader = Buffer.alloc(VECTOR_HEADER_SIZE);
1087
- writeVectorsHeader(vectorsHeader, dim, count, writeId);
1088
- await vectorsHandle.write(vectorsHeader, 0, vectorsHeader.length, 0);
1089
-
1090
- const recordsHeader = Buffer.alloc(RECORD_HEADER_SIZE);
1091
- writeRecordsHeader(recordsHeader, count, files.length, writeId);
1092
- await recordsHandle.write(recordsHeader, 0, recordsHeader.length, 0);
1093
-
1094
- const contentHeader = Buffer.alloc(CONTENT_HEADER_SIZE);
1095
- writeContentHeader(contentHeader, contentOffset, writeId);
1096
- await contentHandle.write(contentHeader, 0, contentHeader.length, 0);
1097
-
1098
- // Incremental CRC32 accumulators (zero-alloc — no read-back needed)
1099
- let vectorsCrc = 0;
1100
- let recordsCrc = 0;
1101
- let contentCrc = 0;
1102
-
1103
- let vectorPos = VECTOR_HEADER_SIZE;
1104
- let recordPos = RECORD_HEADER_SIZE;
1105
- let contentPos = CONTENT_HEADER_SIZE;
1106
-
1107
- for (let i = 0; i < count; i += 1) {
1108
- const entry = recordEntries[i];
1109
- if (!entry) continue;
1110
-
1111
- const recordBuffer = Buffer.alloc(RECORD_SIZE);
1112
- const view = getDataView(recordBuffer);
1113
- view.setUint32(0, entry.fileId, true);
1114
- view.setUint32(4, entry.startLine, true);
1115
- view.setUint32(8, entry.endLine, true);
1116
- view.setBigUint64(12, BigInt(entry.contentOffset), true);
1117
- view.setUint32(20, entry.contentLength, true);
1118
- view.setUint32(24, 0, true);
1119
- view.setUint32(28, 0, true);
1120
-
763
+ // Support new format { writeId, files } and legacy raw array
764
+ let files;
765
+ let filesWriteId = null;
766
+ if (filesData && !Array.isArray(filesData) && Array.isArray(filesData.files)) {
767
+ files = filesData.files;
768
+ filesWriteId = filesData.writeId ?? null;
769
+ } else if (Array.isArray(filesData)) {
770
+ files = filesData;
771
+ } else {
772
+ throw new BinaryStoreCorruptionError('Binary store file table is invalid');
773
+ }
774
+
775
+ if (files.length !== fileCount) {
776
+ throw new BinaryStoreCorruptionError(
777
+ `Binary store file table count mismatch (${files.length} != ${fileCount})`
778
+ );
779
+ }
780
+
781
+ // Validate writeId from files.json if present
782
+ if (filesWriteId !== null && filesWriteId !== vectorsWriteId) {
783
+ throw new BinaryStoreCorruptionError(
784
+ `Binary store writeId mismatch: vectors=${vectorsWriteId}, files.json=${filesWriteId}`
785
+ );
786
+ }
787
+
788
+ return new BinaryVectorStore({
789
+ vectorsBuffer,
790
+ recordsBuffer,
791
+ vectorsHandle: null,
792
+ vectorsFd,
793
+ contentHandle: contentReadHandle,
794
+ contentSize: totalContentBytes,
795
+ files,
796
+ dim,
797
+ count,
798
+ contentCacheEntries,
799
+ vectorCacheEntries,
800
+ });
801
+ } catch (err) {
802
+ if (contentReadHandle) await contentReadHandle.close().catch(() => {});
803
+ if (Number.isInteger(vectorsFd)) {
804
+ try {
805
+ fsSync.closeSync(vectorsFd);
806
+ } catch {}
807
+ }
808
+ throw err;
809
+ }
810
+ }
811
+
812
+ get length() {
813
+ return this.count;
814
+ }
815
+
816
+ getRecord(index) {
817
+ if (index < 0 || index >= this.count) return null;
818
+ const offset = this.recordDataOffset + index * RECORD_SIZE;
819
+ const view = getDataView(this.recordsBuffer);
820
+
821
+ const fileId = view.getUint32(offset, true);
822
+ const startLine = view.getUint32(offset + 4, true);
823
+ const endLine = view.getUint32(offset + 8, true);
824
+ const contentOffset = readBigUint(view, offset + 12);
825
+ const contentLength = view.getUint32(offset + 20, true);
826
+
827
+ return {
828
+ fileId,
829
+ file: this.files[fileId],
830
+ startLine,
831
+ endLine,
832
+ contentOffset,
833
+ contentLength,
834
+ };
835
+ }
836
+
837
+ getVector(index) {
838
+ if (index < 0 || index >= this.count) return null;
839
+ if (this.vectorCacheEntries > 0) {
840
+ const cached = this.vectorCache.get(index);
841
+ if (cached) {
842
+ this.vectorCache.delete(index);
843
+ this.vectorCache.set(index, cached);
844
+ return cached;
845
+ }
846
+ }
847
+
848
+ const offset = this.vectorDataOffset + index * this.dim * 4;
849
+ const byteLength = this.dim * 4;
850
+ let vector = null;
851
+
852
+ if (this.vectorsBuffer && this.vectorsBuffer.length >= this.vectorDataOffset + byteLength) {
853
+ vector = new Float32Array(
854
+ this.vectorsBuffer.buffer,
855
+ this.vectorsBuffer.byteOffset + offset,
856
+ this.dim
857
+ );
858
+ } else if (Number.isInteger(this.vectorsFd)) {
859
+ const buffer = Buffer.alloc(byteLength);
860
+ const bytesRead = fsSync.readSync(this.vectorsFd, buffer, 0, byteLength, offset);
861
+ if (bytesRead === byteLength) {
862
+ vector = new Float32Array(buffer.buffer, buffer.byteOffset, this.dim);
863
+ }
864
+ }
865
+
866
+ if (vector && this.vectorCacheEntries > 0) {
867
+ this.vectorCache.set(index, vector);
868
+ if (this.vectorCache.size > this.vectorCacheEntries) {
869
+ const firstKey = this.vectorCache.keys().next().value;
870
+ this.vectorCache.delete(firstKey);
871
+ }
872
+ }
873
+
874
+ return vector;
875
+ }
876
+
877
+ async getContent(index) {
878
+ if (index < 0 || index >= this.count) return null;
879
+ if (this.contentCacheEntries > 0) {
880
+ const cached = this.contentCache.get(index);
881
+ if (cached !== undefined) {
882
+ this.contentCache.delete(index);
883
+ this.contentCache.set(index, cached);
884
+ return cached;
885
+ }
886
+ }
887
+
888
+ const record = this.getRecord(index);
889
+ if (!record || record.contentLength === 0) return '';
890
+ const contentLimit = record.contentOffset + record.contentLength;
891
+ if (Number.isFinite(this.contentSize) && contentLimit > this.contentSize) {
892
+ return '';
893
+ }
894
+
895
+ let content = '';
896
+ if (this.contentBuffer) {
897
+ const start = this.contentDataOffset + record.contentOffset;
898
+ const end = start + record.contentLength;
899
+ content = this.contentBuffer.slice(start, end).toString('utf-8');
900
+ } else if (this.contentHandle) {
901
+ const start = this.contentDataOffset + record.contentOffset;
902
+ const length = record.contentLength;
903
+ const buffer = Buffer.alloc(length);
904
+ const { bytesRead } = await this.contentHandle.read(buffer, 0, length, start);
905
+ content = buffer.slice(0, bytesRead).toString('utf-8');
906
+ } else {
907
+ return '';
908
+ }
909
+
910
+ if (this.contentCacheEntries > 0) {
911
+ this.contentCache.set(index, content);
912
+ if (this.contentCache.size > this.contentCacheEntries) {
913
+ const firstKey = this.contentCache.keys().next().value;
914
+ this.contentCache.delete(firstKey);
915
+ }
916
+ }
917
+
918
+ return content;
919
+ }
920
+
921
+ async toChunkViews({ includeContent = false, includeVector = true } = {}) {
922
+ const chunks = new Array(this.count);
923
+ for (let i = 0; i < this.count; i += 1) {
924
+ const record = this.getRecord(i);
925
+ if (!record) continue;
926
+ const chunk = {
927
+ file: record.file,
928
+ startLine: record.startLine,
929
+ endLine: record.endLine,
930
+ _index: i,
931
+ _binaryIndex: i,
932
+ };
933
+ if (includeVector) {
934
+ chunk.vector = this.getVector(i);
935
+ }
936
+ if (includeContent) {
937
+ chunk.content = await this.getContent(i);
938
+ }
939
+ chunks[i] = chunk;
940
+ }
941
+ return chunks;
942
+ }
943
+
944
+ getAllFileIndices() {
945
+ const map = new Map();
946
+ for (let i = 0; i < this.count; i++) {
947
+ const record = this.getRecord(i);
948
+ if (record) {
949
+ let list = map.get(record.file);
950
+ if (!list) {
951
+ list = [];
952
+ map.set(record.file, list);
953
+ }
954
+ list.push(i);
955
+ }
956
+ }
957
+ return map;
958
+ }
959
+
960
+ static async write(
961
+ cacheDir,
962
+ chunks,
963
+ {
964
+ contentCacheEntries,
965
+ vectorCacheEntries,
966
+ vectorLoadMode,
967
+ getContent,
968
+ getVector,
969
+ preRename,
970
+ renameOptions,
971
+ } = {}
972
+ ) {
973
+ ensureLittleEndian();
974
+ const { vectorsPath, recordsPath, contentPath, filesPath } =
975
+ BinaryVectorStore.getPaths(cacheDir);
976
+
977
+ const tmpSuffix = `.tmp-${process.pid}`;
978
+ const vectorsTmp = `${vectorsPath}${tmpSuffix}`;
979
+ const recordsTmp = `${recordsPath}${tmpSuffix}`;
980
+ const contentTmp = `${contentPath}${tmpSuffix}`;
981
+ const filesTmp = `${filesPath}${tmpSuffix}`;
982
+
983
+ const fileIds = new Map();
984
+ const files = [];
985
+ const denseChunks = [];
986
+ const denseSourceIndices = [];
987
+ for (let i = 0; i < chunks.length; i += 1) {
988
+ const chunk = chunks[i];
989
+ if (!chunk) continue;
990
+ denseChunks.push(chunk);
991
+ denseSourceIndices.push(i);
992
+ }
993
+
994
+ const resolveVector = async (chunk, sourceIndex) => {
995
+ let vectorSource = chunk.vector;
996
+ if (
997
+ (vectorSource === undefined || vectorSource === null) &&
998
+ typeof getVector === 'function'
999
+ ) {
1000
+ vectorSource = getVector(chunk, sourceIndex);
1001
+ if (vectorSource && typeof vectorSource.then === 'function') {
1002
+ vectorSource = await vectorSource;
1003
+ }
1004
+ }
1005
+ if (vectorSource === undefined || vectorSource === null) {
1006
+ throw new Error(`Missing vector data for binary cache write at index ${sourceIndex}`);
1007
+ }
1008
+ const vector =
1009
+ vectorSource instanceof Float32Array
1010
+ ? vectorSource
1011
+ : ArrayBuffer.isView(vectorSource)
1012
+ ? Float32Array.from(vectorSource)
1013
+ : new Float32Array(vectorSource);
1014
+ if (!vector || vector.length === 0) {
1015
+ throw new Error(`Empty vector data for binary cache write at index ${sourceIndex}`);
1016
+ }
1017
+ return vector;
1018
+ };
1019
+
1020
+ const resolveContent = async (chunk, sourceIndex) => {
1021
+ const contentSource =
1022
+ chunk.content !== undefined && chunk.content !== null
1023
+ ? chunk.content
1024
+ : getContent
1025
+ ? await getContent(chunk, sourceIndex)
1026
+ : '';
1027
+ return normalizeContent(contentSource);
1028
+ };
1029
+
1030
+ const recordEntries = new Array(denseChunks.length);
1031
+ let contentOffset = 0;
1032
+
1033
+ for (let i = 0; i < denseChunks.length; i += 1) {
1034
+ const chunk = denseChunks[i];
1035
+ const sourceIndex = denseSourceIndices[i];
1036
+
1037
+ const file = chunk.file;
1038
+ if (!fileIds.has(file)) {
1039
+ fileIds.set(file, files.length);
1040
+ files.push(file);
1041
+ }
1042
+
1043
+ const contentValue = await resolveContent(chunk, sourceIndex);
1044
+ const contentLength = Buffer.byteLength(contentValue, 'utf-8');
1045
+
1046
+ recordEntries[i] = {
1047
+ fileId: fileIds.get(file),
1048
+ startLine: chunk.startLine ?? 0,
1049
+ endLine: chunk.endLine ?? 0,
1050
+ contentOffset,
1051
+ contentLength,
1052
+ };
1053
+
1054
+ contentOffset += contentLength;
1055
+ }
1056
+
1057
+ const count = denseChunks.length;
1058
+ const dim = count > 0 ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length : 0;
1059
+
1060
+ const writeId = generateWriteId();
1061
+
1062
+ await fs.writeFile(filesTmp, JSON.stringify({ writeId, files }));
1063
+
1064
+ let vectorsHandle = null;
1065
+ let recordsHandle = null;
1066
+ let contentHandle = null;
1067
+
1068
+ try {
1069
+ vectorsHandle = await fs.open(vectorsTmp, 'w');
1070
+ recordsHandle = await fs.open(recordsTmp, 'w');
1071
+ contentHandle = await fs.open(contentTmp, 'w');
1072
+
1073
+ const vectorsHeader = Buffer.alloc(VECTOR_HEADER_SIZE);
1074
+ writeVectorsHeader(vectorsHeader, dim, count, writeId);
1075
+ await vectorsHandle.write(vectorsHeader, 0, vectorsHeader.length, 0);
1076
+
1077
+ const recordsHeader = Buffer.alloc(RECORD_HEADER_SIZE);
1078
+ writeRecordsHeader(recordsHeader, count, files.length, writeId);
1079
+ await recordsHandle.write(recordsHeader, 0, recordsHeader.length, 0);
1080
+
1081
+ const contentHeader = Buffer.alloc(CONTENT_HEADER_SIZE);
1082
+ writeContentHeader(contentHeader, contentOffset, writeId);
1083
+ await contentHandle.write(contentHeader, 0, contentHeader.length, 0);
1084
+
1085
+ // Incremental CRC32 accumulators (zero-alloc — no read-back needed)
1086
+ let vectorsCrc = 0;
1087
+ let recordsCrc = 0;
1088
+ let contentCrc = 0;
1089
+
1090
+ let vectorPos = VECTOR_HEADER_SIZE;
1091
+ let recordPos = RECORD_HEADER_SIZE;
1092
+ let contentPos = CONTENT_HEADER_SIZE;
1093
+
1094
+ for (let i = 0; i < count; i += 1) {
1095
+ const entry = recordEntries[i];
1096
+ if (!entry) continue;
1097
+
1098
+ const recordBuffer = Buffer.alloc(RECORD_SIZE);
1099
+ const view = getDataView(recordBuffer);
1100
+ view.setUint32(0, entry.fileId, true);
1101
+ view.setUint32(4, entry.startLine, true);
1102
+ view.setUint32(8, entry.endLine, true);
1103
+ view.setBigUint64(12, BigInt(entry.contentOffset), true);
1104
+ view.setUint32(20, entry.contentLength, true);
1105
+ view.setUint32(24, 0, true);
1106
+ view.setUint32(28, 0, true);
1107
+
1121
1108
  await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
1122
1109
  recordPos += recordBuffer.length;
1123
1110
  recordsCrc = updateCrc32(recordsCrc, recordBuffer);
1124
-
1125
- const chunk = denseChunks[i];
1126
- const sourceIndex = denseSourceIndices[i];
1127
- const vector = await resolveVector(chunk, sourceIndex);
1128
- if (vector.length !== dim) {
1129
- throw new Error('Vector dimension mismatch in binary cache write');
1130
- }
1131
- const vectorBuffer = Buffer.from(
1132
- vector.buffer,
1133
- vector.byteOffset,
1134
- vector.byteLength
1135
- );
1111
+
1112
+ const chunk = denseChunks[i];
1113
+ const sourceIndex = denseSourceIndices[i];
1114
+ const vector = await resolveVector(chunk, sourceIndex);
1115
+ if (vector.length !== dim) {
1116
+ throw new Error('Vector dimension mismatch in binary cache write');
1117
+ }
1118
+ const vectorBuffer = Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
1136
1119
  await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
1137
1120
  vectorPos += vectorBuffer.length;
1138
1121
  vectorsCrc = updateCrc32(vectorsCrc, vectorBuffer);
1139
-
1140
- if (entry.contentLength > 0) {
1141
-
1142
- const val = await resolveContent(chunk, sourceIndex);
1122
+
1123
+ if (entry.contentLength > 0) {
1124
+ const val = await resolveContent(chunk, sourceIndex);
1143
1125
  const contentBuffer = Buffer.from(val, 'utf-8');
1144
1126
  await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
1145
1127
  contentPos += contentBuffer.length;
@@ -1152,32 +1134,32 @@ export class BinaryVectorStore {
1152
1134
  await writeHeaderCrc(recordsHandle, recordsCrc);
1153
1135
  }
1154
1136
  await writeHeaderCrc(contentHandle, contentCrc);
1155
- } finally {
1156
- const closes = [];
1157
- if (vectorsHandle) closes.push(vectorsHandle.close().catch(() => {}));
1158
- if (recordsHandle) closes.push(recordsHandle.close().catch(() => {}));
1159
- if (contentHandle) closes.push(contentHandle.close().catch(() => {}));
1160
- await Promise.all(closes);
1161
- }
1162
-
1163
- if (preRename) {
1164
- await preRename();
1165
- }
1166
-
1167
- await replaceFilesAtomically(
1168
- [
1169
- { source: vectorsTmp, target: vectorsPath },
1170
- { source: recordsTmp, target: recordsPath },
1171
- { source: contentTmp, target: contentPath },
1172
- { source: filesTmp, target: filesPath },
1173
- ],
1174
- renameOptions
1175
- );
1176
-
1177
- return BinaryVectorStore.load(cacheDir, {
1178
- contentCacheEntries,
1179
- vectorCacheEntries,
1180
- vectorLoadMode,
1181
- });
1182
- }
1183
- }
1137
+ } finally {
1138
+ const closes = [];
1139
+ if (vectorsHandle) closes.push(vectorsHandle.close().catch(() => {}));
1140
+ if (recordsHandle) closes.push(recordsHandle.close().catch(() => {}));
1141
+ if (contentHandle) closes.push(contentHandle.close().catch(() => {}));
1142
+ await Promise.all(closes);
1143
+ }
1144
+
1145
+ if (preRename) {
1146
+ await preRename();
1147
+ }
1148
+
1149
+ await replaceFilesAtomically(
1150
+ [
1151
+ { source: vectorsTmp, target: vectorsPath },
1152
+ { source: recordsTmp, target: recordsPath },
1153
+ { source: contentTmp, target: contentPath },
1154
+ { source: filesTmp, target: filesPath },
1155
+ ],
1156
+ renameOptions
1157
+ );
1158
+
1159
+ return BinaryVectorStore.load(cacheDir, {
1160
+ contentCacheEntries,
1161
+ vectorCacheEntries,
1162
+ vectorLoadMode,
1163
+ });
1164
+ }
1165
+ }