@softerist/heuristic-mcp 3.2.1 → 3.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@ import fs from 'fs/promises';
2
2
  import fsSync from 'fs';
3
3
  import path from 'path';
4
4
  import os from 'os';
5
+ import crypto from 'crypto';
6
+ import { crc32 } from 'zlib';
5
7
  import {
6
8
  BINARY_STORE_VERSION as STORE_VERSION,
7
9
  BINARY_VECTOR_HEADER_SIZE as VECTOR_HEADER_SIZE,
@@ -18,24 +20,376 @@ const VECTORS_FILE = 'vectors.bin';
18
20
  const RECORDS_FILE = 'records.bin';
19
21
  const CONTENT_FILE = 'content.bin';
20
22
  const FILES_FILE = 'files.json';
23
+ const TELEMETRY_FILE = 'binary-store-telemetry.json';
21
24
  const RETRYABLE_RENAME_ERRORS = new Set(['EPERM', 'EACCES', 'EBUSY']);
25
+ const BINARY_ARTIFACT_BASE_FILES = [VECTORS_FILE, RECORDS_FILE, CONTENT_FILE, FILES_FILE];
26
+ const STARTUP_TMP_CLEANUP_MIN_AGE_MS = 2 * 60 * 1000;
27
+ const TELEMETRY_VERSION = 1;
28
+
29
+ function createTelemetryTotals() {
30
+ return {
31
+ atomicReplaceAttempts: 0,
32
+ atomicReplaceSuccesses: 0,
33
+ atomicReplaceFailures: 0,
34
+ renameRetryCount: 0,
35
+ fallbackCopyCount: 0,
36
+ rollbackCount: 0,
37
+ rollbackRestoreFailureCount: 0,
38
+ startupCleanupRuns: 0,
39
+ staleTempFilesRemoved: 0,
40
+ staleTempFilesSkippedActive: 0,
41
+ corruptionDetected: 0,
42
+ corruptionAutoCleared: 0,
43
+ corruptionSecondaryReadonlyBlocked: 0,
44
+ };
45
+ }
46
+
47
+ function normalizeTelemetry(raw) {
48
+ const totals = createTelemetryTotals();
49
+ if (raw?.totals && typeof raw.totals === 'object') {
50
+ for (const key of Object.keys(totals)) {
51
+ if (Number.isFinite(raw.totals[key])) {
52
+ totals[key] = raw.totals[key];
53
+ }
54
+ }
55
+ }
56
+ return {
57
+ version: TELEMETRY_VERSION,
58
+ totals,
59
+ updatedAt: typeof raw?.updatedAt === 'string' ? raw.updatedAt : null,
60
+ lastError:
61
+ raw?.lastError && typeof raw.lastError === 'object'
62
+ ? {
63
+ at: typeof raw.lastError.at === 'string' ? raw.lastError.at : null,
64
+ message:
65
+ typeof raw.lastError.message === 'string' ? raw.lastError.message : null,
66
+ }
67
+ : null,
68
+ lastAtomicReplace:
69
+ raw?.lastAtomicReplace && typeof raw.lastAtomicReplace === 'object'
70
+ ? { ...raw.lastAtomicReplace }
71
+ : null,
72
+ lastStartupCleanup:
73
+ raw?.lastStartupCleanup && typeof raw.lastStartupCleanup === 'object'
74
+ ? { ...raw.lastStartupCleanup }
75
+ : null,
76
+ lastCorruption:
77
+ raw?.lastCorruption && typeof raw.lastCorruption === 'object'
78
+ ? { ...raw.lastCorruption }
79
+ : null,
80
+ };
81
+ }
82
+
83
+ async function readTelemetryFile(cacheDir) {
84
+ const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
85
+ try {
86
+ const raw = await fs.readFile(telemetryPath, 'utf-8');
87
+ return normalizeTelemetry(JSON.parse(raw));
88
+ } catch {
89
+ return normalizeTelemetry(null);
90
+ }
91
+ }
92
+
93
+ async function writeTelemetryFile(cacheDir, telemetry) {
94
+ const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
95
+ await fs.mkdir(cacheDir, { recursive: true }).catch(() => {});
96
+ await fs.writeFile(telemetryPath, JSON.stringify(telemetry, null, 2));
97
+ }
98
+
99
+ async function updateTelemetry(cacheDir, mutate) {
100
+ if (!cacheDir) return;
101
+ try {
102
+ const telemetry = await readTelemetryFile(cacheDir);
103
+ mutate(telemetry);
104
+ telemetry.updatedAt = new Date().toISOString();
105
+ await writeTelemetryFile(cacheDir, telemetry);
106
+ } catch {
107
+
108
+ }
109
+ }
110
+
111
+ function isProcessRunning(pid) {
112
+ if (!Number.isInteger(pid) || pid <= 0) return false;
113
+ try {
114
+ process.kill(pid, 0);
115
+ return true;
116
+ } catch (err) {
117
+ return err?.code === 'EPERM';
118
+ }
119
+ }
120
+
121
+ function parsePidFromBinaryArtifact(fileName) {
122
+ const match = fileName.match(/\.(?:tmp|bak)-(\d+)(?:-|$)/);
123
+ if (!match) return null;
124
+ const pid = Number.parseInt(match[1], 10);
125
+ return Number.isInteger(pid) ? pid : null;
126
+ }
127
+
128
+ function isBinaryTempArtifact(fileName) {
129
+ return BINARY_ARTIFACT_BASE_FILES.some(
130
+ (baseFile) =>
131
+ fileName.startsWith(`${baseFile}.tmp-`) || fileName.startsWith(`${baseFile}.bak-`)
132
+ );
133
+ }
134
+
135
+ function addToMetric(metrics, key, value = 1) {
136
+ if (!metrics || !Number.isFinite(value) || value <= 0) return;
137
+ metrics[key] = (metrics[key] || 0) + value;
138
+ }
139
+
140
+ export async function readBinaryStoreTelemetry(cacheDir) {
141
+ return readTelemetryFile(cacheDir);
142
+ }
143
+
144
+ export async function recordBinaryStoreCorruption(
145
+ cacheDir,
146
+ { message = null, context = null, action = 'detected' } = {}
147
+ ) {
148
+ await updateTelemetry(cacheDir, (telemetry) => {
149
+ if (!telemetry?.totals || typeof telemetry.totals !== 'object') return;
150
+ if (action === 'detected') telemetry.totals.corruptionDetected += 1;
151
+ if (action === 'auto-cleared') telemetry.totals.corruptionAutoCleared += 1;
152
+ if (action === 'secondary-readonly-blocked') {
153
+ telemetry.totals.corruptionSecondaryReadonlyBlocked += 1;
154
+ }
155
+ telemetry.lastCorruption = {
156
+ at: new Date().toISOString(),
157
+ action,
158
+ context: typeof context === 'string' ? context : null,
159
+ message: typeof message === 'string' ? message : null,
160
+ };
161
+ });
162
+ }
163
+
164
+ export async function cleanupStaleBinaryArtifacts(
165
+ cacheDir,
166
+ { minAgeMs = STARTUP_TMP_CLEANUP_MIN_AGE_MS, logger = null } = {}
167
+ ) {
168
+ const result = {
169
+ cacheDir,
170
+ scanned: 0,
171
+ removed: 0,
172
+ skippedActive: 0,
173
+ removedFiles: [],
174
+ };
175
+
176
+ let entries = [];
177
+ try {
178
+ entries = await fs.readdir(cacheDir, { withFileTypes: true });
179
+ } catch {
180
+ return result;
181
+ }
182
+
183
+ const now = Date.now();
184
+ for (const entry of entries) {
185
+ const fileName = typeof entry === 'string' ? entry : entry?.name;
186
+ if (!fileName) continue;
187
+ const isFileEntry = typeof entry === 'string' ? true : entry?.isFile?.() === true;
188
+ if (!isFileEntry) continue;
189
+ if (!isBinaryTempArtifact(fileName)) continue;
190
+ result.scanned += 1;
191
+
192
+ const fullPath = path.join(cacheDir, fileName);
193
+ const stats = await fs.stat(fullPath).catch(() => null);
194
+ if (!stats) continue;
195
+
196
+ const ageMs = now - stats.mtimeMs;
197
+ const ownerPid = parsePidFromBinaryArtifact(fileName);
198
+ if (ownerPid && isProcessRunning(ownerPid)) {
199
+ result.skippedActive += 1;
200
+ continue;
201
+ }
202
+ if (ageMs < minAgeMs) continue;
203
+
204
+ await fs.rm(fullPath, { force: true }).catch(() => {});
205
+ result.removed += 1;
206
+ result.removedFiles.push(fileName);
207
+ }
208
+
209
+ await updateTelemetry(cacheDir, (telemetry) => {
210
+ telemetry.totals.startupCleanupRuns += 1;
211
+ telemetry.totals.staleTempFilesRemoved += result.removed;
212
+ telemetry.totals.staleTempFilesSkippedActive += result.skippedActive;
213
+ telemetry.lastStartupCleanup = {
214
+ at: new Date().toISOString(),
215
+ scanned: result.scanned,
216
+ removed: result.removed,
217
+ skippedActive: result.skippedActive,
218
+ };
219
+ });
220
+
221
+ if (logger && result.removed > 0) {
222
+ logger.info(
223
+ `[Cache] Startup temp cleanup removed ${result.removed} stale artifact(s) from ${cacheDir}`
224
+ );
225
+ }
226
+
227
+ return result;
228
+ }
22
229
 
23
- async function renameWithRetry(source, target, { retries = 5, delayMs = 50 } = {}) {
230
+ function isRetryableRenameError(err) {
231
+ return RETRYABLE_RENAME_ERRORS.has(err?.code);
232
+ }
233
+
234
+ async function renameWithRetry(
235
+ source,
236
+ target,
237
+ { retries = 12, delayMs = 50, maxDelayMs = 1000 } = {}
238
+ ) {
24
239
  let attempt = 0;
25
240
  let delay = delayMs;
26
241
  while (true) {
27
242
  try {
28
243
  await fs.rename(source, target);
29
- return;
244
+ return attempt;
30
245
  } catch (err) {
31
- const code = err?.code;
32
- if (!RETRYABLE_RENAME_ERRORS.has(code) || attempt >= retries) {
246
+ if (!isRetryableRenameError(err) || attempt >= retries) {
247
+ err.renameRetryCount = attempt;
33
248
  throw err;
34
249
  }
35
250
  await new Promise((resolve) => setTimeout(resolve, delay));
36
251
  attempt += 1;
37
- delay *= 2;
252
+ delay = Math.min(delay * 2, maxDelayMs);
253
+ }
254
+ }
255
+ }
256
+
257
+ async function pathExists(filePath) {
258
+ try {
259
+ await fs.access(filePath);
260
+ return true;
261
+ } catch {
262
+ return false;
263
+ }
264
+ }
265
+
266
+ async function removeIfExists(filePath) {
267
+ await fs.rm(filePath, { force: true }).catch(() => {});
268
+ }
269
+
270
+ async function promoteFileWithFallback(source, target, renameOptions = {}, metrics = null) {
271
+ try {
272
+ const retriesUsed = await renameWithRetry(source, target, renameOptions);
273
+ addToMetric(metrics, 'renameRetryCount', retriesUsed);
274
+ return;
275
+ } catch (renameError) {
276
+ const retriesUsed = Number.isFinite(renameError?.renameRetryCount)
277
+ ? renameError.renameRetryCount
278
+ : 0;
279
+ addToMetric(metrics, 'renameRetryCount', retriesUsed);
280
+ if (!isRetryableRenameError(renameError)) {
281
+ throw renameError;
282
+ }
283
+
284
+ try {
285
+ await fs.copyFile(source, target);
286
+ await removeIfExists(source);
287
+ addToMetric(metrics, 'fallbackCopyCount', 1);
288
+ return;
289
+ } catch (copyError) {
290
+ const wrapped = new Error(
291
+ `rename failed (${renameError.message}); fallback copy failed (${copyError.message})`
292
+ );
293
+ wrapped.code = copyError?.code || renameError?.code;
294
+ throw wrapped;
295
+ }
296
+ }
297
+ }
298
+
299
+ async function replaceFilesAtomically(filePairs, renameOptions = {}) {
300
+ const metrics = createTelemetryTotals();
301
+ metrics.atomicReplaceAttempts = 1;
302
+ const cacheDir = filePairs.length > 0 ? path.dirname(filePairs[0].target) : null;
303
+ const backupSuffix = `.bak-${process.pid}-${Date.now()}`;
304
+ const backups = [];
305
+ const replacedTargets = [];
306
+ let operationError = null;
307
+
308
+ try {
309
+ // Stage current files as backups first. If this fails, nothing is replaced.
310
+ for (const pair of filePairs) {
311
+ if (!(await pathExists(pair.target))) continue;
312
+ const backupPath = `${pair.target}${backupSuffix}`;
313
+ await removeIfExists(backupPath);
314
+ await promoteFileWithFallback(pair.target, backupPath, renameOptions, metrics);
315
+ backups.push({ target: pair.target, backupPath });
316
+ }
317
+
318
+ // Replace targets with new temp files.
319
+ for (const pair of filePairs) {
320
+ await promoteFileWithFallback(pair.source, pair.target, renameOptions, metrics);
321
+ replacedTargets.push(pair.target);
38
322
  }
323
+ metrics.atomicReplaceSuccesses = 1;
324
+ } catch (error) {
325
+ operationError = error;
326
+ metrics.atomicReplaceFailures = 1;
327
+ metrics.rollbackCount = 1;
328
+ const rollbackErrors = [];
329
+
330
+ // Remove any partially replaced files before restoring backups.
331
+ for (const target of replacedTargets.reverse()) {
332
+ await removeIfExists(target);
333
+ }
334
+
335
+ // Restore original files from backups.
336
+ for (const backup of backups.reverse()) {
337
+ try {
338
+ await promoteFileWithFallback(backup.backupPath, backup.target, renameOptions, metrics);
339
+ } catch (restoreErr) {
340
+ rollbackErrors.push(
341
+ `restore ${path.basename(backup.target)} failed: ${restoreErr.message}`
342
+ );
343
+ }
344
+ }
345
+ if (rollbackErrors.length > 0) {
346
+ metrics.rollbackRestoreFailureCount = rollbackErrors.length;
347
+ }
348
+
349
+ // Clean up temp files left from this failed write attempt.
350
+ await Promise.all(filePairs.map((pair) => removeIfExists(pair.source)));
351
+
352
+ if (rollbackErrors.length > 0) {
353
+ error.message = `${error.message}. Rollback issues: ${rollbackErrors.join('; ')}`;
354
+ }
355
+ throw error;
356
+ } finally {
357
+ // Best-effort cleanup for any backup remnants after success/rollback.
358
+ await Promise.all(backups.map((backup) => removeIfExists(backup.backupPath)));
359
+ await updateTelemetry(cacheDir, (telemetry) => {
360
+ telemetry.totals.atomicReplaceAttempts += metrics.atomicReplaceAttempts;
361
+ telemetry.totals.atomicReplaceSuccesses += metrics.atomicReplaceSuccesses;
362
+ telemetry.totals.atomicReplaceFailures += metrics.atomicReplaceFailures;
363
+ telemetry.totals.renameRetryCount += metrics.renameRetryCount;
364
+ telemetry.totals.fallbackCopyCount += metrics.fallbackCopyCount;
365
+ telemetry.totals.rollbackCount += metrics.rollbackCount;
366
+ telemetry.totals.rollbackRestoreFailureCount += metrics.rollbackRestoreFailureCount;
367
+ telemetry.lastAtomicReplace = {
368
+ at: new Date().toISOString(),
369
+ success: metrics.atomicReplaceSuccesses > 0,
370
+ renameRetryCount: metrics.renameRetryCount,
371
+ fallbackCopyCount: metrics.fallbackCopyCount,
372
+ rollbackCount: metrics.rollbackCount,
373
+ rollbackRestoreFailureCount: metrics.rollbackRestoreFailureCount,
374
+ };
375
+ if (operationError) {
376
+ telemetry.lastError = {
377
+ at: new Date().toISOString(),
378
+ message: operationError.message,
379
+ };
380
+ }
381
+ });
382
+ }
383
+ }
384
+
385
+ /**
386
+ * Custom error for binary store corruption.
387
+ * Allows cache layer to distinguish corruption from other load failures.
388
+ */
389
+ export class BinaryStoreCorruptionError extends Error {
390
+ constructor(message) {
391
+ super(message);
392
+ this.name = 'BinaryStoreCorruptionError';
39
393
  }
40
394
  }
41
395
 
@@ -57,13 +411,86 @@ function getDataView(buffer) {
57
411
  return new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
58
412
  }
59
413
 
414
+ /**
415
+ * Generate a random writeId shared across all files in a single write operation.
416
+ */
417
+ function generateWriteId() {
418
+ return crypto.randomInt(1, 0xFFFFFFFF);
419
+ }
420
+
421
+ /**
422
+ * Compute CRC32 checksum over a buffer.
423
+ */
424
+ function computeCrc32(buffer, initial) {
425
+ return initial !== undefined ? crc32(buffer, initial) >>> 0 : crc32(buffer) >>> 0;
426
+ }
427
+
428
+ function updateCrc32(checksum, buffer) {
429
+ return crc32(buffer, checksum >>> 0) >>> 0;
430
+ }
431
+
432
+ async function computeHandleCrc32(handle, startOffset, totalBytes) {
433
+ if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
434
+ const chunkSize = Math.min(1024 * 1024, totalBytes);
435
+ const buffer = Buffer.allocUnsafe(chunkSize);
436
+ let checksum = 0;
437
+ let remaining = totalBytes;
438
+ let position = startOffset;
439
+
440
+ while (remaining > 0) {
441
+ const toRead = Math.min(buffer.length, remaining);
442
+ const { bytesRead } = await handle.read(buffer, 0, toRead, position);
443
+ if (bytesRead !== toRead) {
444
+ throw new BinaryStoreCorruptionError(
445
+ 'Binary store content file truncated during CRC validation'
446
+ );
447
+ }
448
+ checksum = updateCrc32(checksum, buffer.subarray(0, bytesRead));
449
+ remaining -= bytesRead;
450
+ position += bytesRead;
451
+ }
452
+
453
+ return checksum >>> 0;
454
+ }
455
+
456
+ function computeFdCrc32Sync(fd, startOffset, totalBytes) {
457
+ if (!Number.isFinite(totalBytes) || totalBytes <= 0) return 0;
458
+ const chunkSize = Math.min(1024 * 1024, totalBytes);
459
+ const buffer = Buffer.allocUnsafe(chunkSize);
460
+ let checksum = 0;
461
+ let remaining = totalBytes;
462
+ let position = startOffset;
463
+
464
+ while (remaining > 0) {
465
+ const toRead = Math.min(buffer.length, remaining);
466
+ const bytesRead = fsSync.readSync(fd, buffer, 0, toRead, position);
467
+ if (bytesRead !== toRead) {
468
+ throw new BinaryStoreCorruptionError(
469
+ 'Binary store vectors file truncated during CRC validation'
470
+ );
471
+ }
472
+ checksum = updateCrc32(checksum, buffer.subarray(0, bytesRead));
473
+ remaining -= bytesRead;
474
+ position += bytesRead;
475
+ }
476
+
477
+ return checksum >>> 0;
478
+ }
479
+
480
+ async function writeHeaderCrc(handle, crcValue) {
481
+ const crcBuffer = Buffer.alloc(4);
482
+ const crcView = getDataView(crcBuffer);
483
+ crcView.setUint32(0, crcValue >>> 0, true);
484
+ await handle.write(crcBuffer, 0, crcBuffer.length, 20);
485
+ }
486
+
60
487
  function readHeader(buffer, magic, headerSize) {
61
488
  if (buffer.length < headerSize) {
62
- throw new Error('Binary store header is truncated');
489
+ throw new BinaryStoreCorruptionError('Binary store header is truncated');
63
490
  }
64
491
  const actualMagic = readMagic(buffer);
65
492
  if (actualMagic !== magic) {
66
- throw new Error(`Invalid binary store magic (${actualMagic})`);
493
+ throw new BinaryStoreCorruptionError(`Invalid binary store magic (${actualMagic})`);
67
494
  }
68
495
  const view = getDataView(buffer);
69
496
  const version = view.getUint32(4, true);
@@ -73,31 +500,37 @@ function readHeader(buffer, magic, headerSize) {
73
500
  return view;
74
501
  }
75
502
 
76
- function writeVectorsHeader(buffer, dim, count) {
503
+ function writeVectorsHeader(buffer, dim, count, writeId) {
77
504
  writeMagic(buffer, MAGIC_VECTORS);
78
505
  const view = getDataView(buffer);
79
506
  view.setUint32(4, STORE_VERSION, true);
80
507
  view.setUint32(8, dim, true);
81
508
  view.setUint32(12, count, true);
82
- view.setUint32(16, 0, true);
509
+ view.setUint32(16, writeId, true);
510
+ view.setUint32(20, 0, true); // CRC32 placeholder — filled after payload write
511
+ // bytes 24-31: reserved
83
512
  }
84
513
 
85
- function writeRecordsHeader(buffer, count, fileCount) {
514
+ function writeRecordsHeader(buffer, count, fileCount, writeId) {
86
515
  writeMagic(buffer, MAGIC_RECORDS);
87
516
  const view = getDataView(buffer);
88
517
  view.setUint32(4, STORE_VERSION, true);
89
518
  view.setUint32(8, count, true);
90
519
  view.setUint32(12, fileCount, true);
91
- view.setUint32(16, 0, true);
520
+ view.setUint32(16, writeId, true);
521
+ view.setUint32(20, 0, true); // CRC32 placeholder
522
+ // bytes 24-31: reserved
92
523
  }
93
524
 
94
- function writeContentHeader(buffer, totalBytes) {
525
+ function writeContentHeader(buffer, totalBytes, writeId) {
95
526
  writeMagic(buffer, MAGIC_CONTENT);
96
527
  const view = getDataView(buffer);
97
528
  view.setUint32(4, STORE_VERSION, true);
98
529
  const value = BigInt(totalBytes);
99
530
  view.setBigUint64(8, value, true);
100
- view.setUint32(16, 0, true);
531
+ view.setUint32(16, writeId, true);
532
+ view.setUint32(20, 0, true); // CRC32 placeholder
533
+ // bytes 24-31: reserved
101
534
  }
102
535
 
103
536
  function readBigUint(view, offset) {
@@ -224,16 +657,28 @@ export class BinaryVectorStore {
224
657
  vectorsBuffer = await fs.readFile(vectorsPath);
225
658
  }
226
659
 
227
- const vectorsView = readHeader(vectorsBuffer, MAGIC_VECTORS, VECTOR_HEADER_SIZE);
228
- const dim = vectorsView.getUint32(8, true);
229
- const count = vectorsView.getUint32(12, true);
230
-
231
- const recordsView = readHeader(recordsBuffer, MAGIC_RECORDS, RECORD_HEADER_SIZE);
660
+ const vectorsView = readHeader(vectorsBuffer, MAGIC_VECTORS, VECTOR_HEADER_SIZE);
661
+ const dim = vectorsView.getUint32(8, true);
662
+ const count = vectorsView.getUint32(12, true);
663
+ const vectorsWriteId = vectorsView.getUint32(16, true);
664
+ const vectorsExpectedCrc = vectorsView.getUint32(20, true);
665
+ const vectorsPayloadBytes = count * dim * 4;
666
+
667
+ const recordsView = readHeader(recordsBuffer, MAGIC_RECORDS, RECORD_HEADER_SIZE);
232
668
  const recordCount = recordsView.getUint32(8, true);
233
669
  const fileCount = recordsView.getUint32(12, true);
670
+ const recordsWriteId = recordsView.getUint32(16, true);
671
+ const recordsExpectedCrc = recordsView.getUint32(20, true);
234
672
 
235
673
  if (recordCount !== count) {
236
- throw new Error(`Binary store count mismatch (${recordCount} != ${count})`);
674
+ throw new BinaryStoreCorruptionError(`Binary store count mismatch (${recordCount} != ${count})`);
675
+ }
676
+
677
+ // Validate writeId consistency between vectors and records
678
+ if (vectorsWriteId !== recordsWriteId) {
679
+ throw new BinaryStoreCorruptionError(
680
+ `Binary store writeId mismatch: vectors=${vectorsWriteId}, records=${recordsWriteId}`
681
+ );
237
682
  }
238
683
 
239
684
  contentReadHandle = await fs.open(contentPath, 'r');
@@ -242,19 +687,110 @@ export class BinaryVectorStore {
242
687
  const headerBuffer = Buffer.alloc(CONTENT_HEADER_SIZE);
243
688
  const { bytesRead } = await contentReadHandle.read(headerBuffer, 0, CONTENT_HEADER_SIZE, 0);
244
689
  if (bytesRead < CONTENT_HEADER_SIZE) {
245
- throw new Error('Binary store content header is truncated');
690
+ throw new BinaryStoreCorruptionError('Binary store content header is truncated');
246
691
  }
247
692
  const contentView = readHeader(headerBuffer, MAGIC_CONTENT, CONTENT_HEADER_SIZE);
248
693
  totalContentBytes = readBigUint(contentView, 8);
694
+ const contentWriteId = contentView.getUint32(16, true);
695
+ const contentExpectedCrc = contentView.getUint32(20, true);
249
696
  const stats = await contentReadHandle.stat();
250
697
  const expectedContentSize = CONTENT_HEADER_SIZE + totalContentBytes;
251
698
  if (stats.size < expectedContentSize) {
252
- throw new Error('Binary store content file truncated');
699
+ throw new BinaryStoreCorruptionError('Binary store content file truncated');
253
700
  }
254
701
 
255
- const files = JSON.parse(filesRaw);
256
- if (!Array.isArray(files) || files.length !== fileCount) {
257
- throw new Error('Binary store file table is invalid');
702
+ // Validate writeId consistency across all three files
703
+ if (vectorsWriteId !== contentWriteId) {
704
+ throw new BinaryStoreCorruptionError(
705
+ `Binary store writeId mismatch: vectors=${vectorsWriteId}, content=${contentWriteId}`
706
+ );
707
+ }
708
+
709
+ // Validate CRC32 for records payload
710
+ const recordsPayload = recordsBuffer.subarray(RECORD_HEADER_SIZE);
711
+ const recordsActualCrc = computeCrc32(recordsPayload);
712
+ if (recordsActualCrc !== recordsExpectedCrc) {
713
+ throw new BinaryStoreCorruptionError(
714
+ `Binary store records CRC32 mismatch (expected ${recordsExpectedCrc}, got ${recordsActualCrc})`
715
+ );
716
+ }
717
+
718
+ // Validate CRC32 for vectors payload (only when fully loaded into memory)
719
+ if (!loadVectorsFromDisk) {
720
+ const expectedVectorsSize = VECTOR_HEADER_SIZE + vectorsPayloadBytes;
721
+ if (vectorsBuffer.length < expectedVectorsSize) {
722
+ throw new BinaryStoreCorruptionError('Binary store vectors file truncated');
723
+ }
724
+ const vectorsPayload = vectorsBuffer.subarray(VECTOR_HEADER_SIZE);
725
+ const vectorsActualCrc = computeCrc32(vectorsPayload);
726
+ if (vectorsActualCrc !== vectorsExpectedCrc) {
727
+ throw new BinaryStoreCorruptionError(
728
+ `Binary store vectors CRC32 mismatch (expected ${vectorsExpectedCrc}, got ${vectorsActualCrc})`
729
+ );
730
+ }
731
+ } else if (vectorsPayloadBytes > 0) {
732
+ const vectorsStats = fsSync.fstatSync(vectorsFd);
733
+ const expectedVectorsSize = VECTOR_HEADER_SIZE + vectorsPayloadBytes;
734
+ if (vectorsStats.size < expectedVectorsSize) {
735
+ throw new BinaryStoreCorruptionError('Binary store vectors file truncated');
736
+ }
737
+ const vectorsActualCrc = computeFdCrc32Sync(
738
+ vectorsFd,
739
+ VECTOR_HEADER_SIZE,
740
+ vectorsPayloadBytes
741
+ );
742
+ if (vectorsActualCrc !== vectorsExpectedCrc) {
743
+ throw new BinaryStoreCorruptionError(
744
+ `Binary store vectors CRC32 mismatch (expected ${vectorsExpectedCrc}, got ${vectorsActualCrc})`
745
+ );
746
+ }
747
+ } else if (vectorsExpectedCrc !== 0) {
748
+ throw new BinaryStoreCorruptionError(
749
+ `Binary store vectors CRC32 mismatch (expected ${vectorsExpectedCrc}, got 0)`
750
+ );
751
+ }
752
+
753
+ if (totalContentBytes > 0) {
754
+ const contentActualCrc = await computeHandleCrc32(
755
+ contentReadHandle,
756
+ CONTENT_HEADER_SIZE,
757
+ totalContentBytes
758
+ );
759
+ if (contentActualCrc !== contentExpectedCrc) {
760
+ throw new BinaryStoreCorruptionError(
761
+ `Binary store content CRC32 mismatch (expected ${contentExpectedCrc}, got ${contentActualCrc})`
762
+ );
763
+ }
764
+ } else if (contentExpectedCrc !== 0) {
765
+ throw new BinaryStoreCorruptionError(
766
+ `Binary store content CRC32 mismatch (expected ${contentExpectedCrc}, got 0)`
767
+ );
768
+ }
769
+
770
+ const filesData = JSON.parse(filesRaw);
771
+ // Support new format { writeId, files } and legacy raw array
772
+ let files;
773
+ let filesWriteId = null;
774
+ if (filesData && !Array.isArray(filesData) && Array.isArray(filesData.files)) {
775
+ files = filesData.files;
776
+ filesWriteId = filesData.writeId ?? null;
777
+ } else if (Array.isArray(filesData)) {
778
+ files = filesData;
779
+ } else {
780
+ throw new BinaryStoreCorruptionError('Binary store file table is invalid');
781
+ }
782
+
783
+ if (files.length !== fileCount) {
784
+ throw new BinaryStoreCorruptionError(
785
+ `Binary store file table count mismatch (${files.length} != ${fileCount})`
786
+ );
787
+ }
788
+
789
+ // Validate writeId from files.json if present
790
+ if (filesWriteId !== null && filesWriteId !== vectorsWriteId) {
791
+ throw new BinaryStoreCorruptionError(
792
+ `Binary store writeId mismatch: vectors=${vectorsWriteId}, files.json=${filesWriteId}`
793
+ );
258
794
  }
259
795
 
260
796
  return new BinaryVectorStore({
@@ -443,6 +979,7 @@ export class BinaryVectorStore {
443
979
  getContent,
444
980
  getVector,
445
981
  preRename,
982
+ renameOptions,
446
983
  } = {}
447
984
  ) {
448
985
  ensureLittleEndian();
@@ -533,7 +1070,9 @@ export class BinaryVectorStore {
533
1070
  const dim =
534
1071
  count > 0 ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length : 0;
535
1072
 
536
- await fs.writeFile(filesTmp, JSON.stringify(files));
1073
+ const writeId = generateWriteId();
1074
+
1075
+ await fs.writeFile(filesTmp, JSON.stringify({ writeId, files }));
537
1076
 
538
1077
  let vectorsHandle = null;
539
1078
  let recordsHandle = null;
@@ -545,17 +1084,22 @@ export class BinaryVectorStore {
545
1084
  contentHandle = await fs.open(contentTmp, 'w');
546
1085
 
547
1086
  const vectorsHeader = Buffer.alloc(VECTOR_HEADER_SIZE);
548
- writeVectorsHeader(vectorsHeader, dim, count);
1087
+ writeVectorsHeader(vectorsHeader, dim, count, writeId);
549
1088
  await vectorsHandle.write(vectorsHeader, 0, vectorsHeader.length, 0);
550
1089
 
551
1090
  const recordsHeader = Buffer.alloc(RECORD_HEADER_SIZE);
552
- writeRecordsHeader(recordsHeader, count, files.length);
1091
+ writeRecordsHeader(recordsHeader, count, files.length, writeId);
553
1092
  await recordsHandle.write(recordsHeader, 0, recordsHeader.length, 0);
554
1093
 
555
1094
  const contentHeader = Buffer.alloc(CONTENT_HEADER_SIZE);
556
- writeContentHeader(contentHeader, contentOffset);
1095
+ writeContentHeader(contentHeader, contentOffset, writeId);
557
1096
  await contentHandle.write(contentHeader, 0, contentHeader.length, 0);
558
1097
 
1098
+ // Incremental CRC32 accumulators (zero-alloc — no read-back needed)
1099
+ let vectorsCrc = 0;
1100
+ let recordsCrc = 0;
1101
+ let contentCrc = 0;
1102
+
559
1103
  let vectorPos = VECTOR_HEADER_SIZE;
560
1104
  let recordPos = RECORD_HEADER_SIZE;
561
1105
  let contentPos = CONTENT_HEADER_SIZE;
@@ -574,8 +1118,9 @@ export class BinaryVectorStore {
574
1118
  view.setUint32(24, 0, true);
575
1119
  view.setUint32(28, 0, true);
576
1120
 
577
- await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
578
- recordPos += recordBuffer.length;
1121
+ await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
1122
+ recordPos += recordBuffer.length;
1123
+ recordsCrc = updateCrc32(recordsCrc, recordBuffer);
579
1124
 
580
1125
  const chunk = denseChunks[i];
581
1126
  const sourceIndex = denseSourceIndices[i];
@@ -588,17 +1133,25 @@ export class BinaryVectorStore {
588
1133
  vector.byteOffset,
589
1134
  vector.byteLength
590
1135
  );
591
- await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
592
- vectorPos += vectorBuffer.length;
1136
+ await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
1137
+ vectorPos += vectorBuffer.length;
1138
+ vectorsCrc = updateCrc32(vectorsCrc, vectorBuffer);
593
1139
 
594
1140
  if (entry.contentLength > 0) {
595
1141
 
596
1142
  const val = await resolveContent(chunk, sourceIndex);
597
- const contentBuffer = Buffer.from(val, 'utf-8');
598
- await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
599
- contentPos += contentBuffer.length;
600
- }
601
- }
1143
+ const contentBuffer = Buffer.from(val, 'utf-8');
1144
+ await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
1145
+ contentPos += contentBuffer.length;
1146
+ contentCrc = updateCrc32(contentCrc, contentBuffer);
1147
+ }
1148
+ }
1149
+
1150
+ if (count > 0) {
1151
+ await writeHeaderCrc(vectorsHandle, vectorsCrc);
1152
+ await writeHeaderCrc(recordsHandle, recordsCrc);
1153
+ }
1154
+ await writeHeaderCrc(contentHandle, contentCrc);
602
1155
  } finally {
603
1156
  const closes = [];
604
1157
  if (vectorsHandle) closes.push(vectorsHandle.close().catch(() => {}));
@@ -611,12 +1164,15 @@ export class BinaryVectorStore {
611
1164
  await preRename();
612
1165
  }
613
1166
 
614
- await Promise.all([
615
- renameWithRetry(vectorsTmp, vectorsPath),
616
- renameWithRetry(recordsTmp, recordsPath),
617
- renameWithRetry(contentTmp, contentPath),
618
- renameWithRetry(filesTmp, filesPath),
619
- ]);
1167
+ await replaceFilesAtomically(
1168
+ [
1169
+ { source: vectorsTmp, target: vectorsPath },
1170
+ { source: recordsTmp, target: recordsPath },
1171
+ { source: contentTmp, target: contentPath },
1172
+ { source: filesTmp, target: filesPath },
1173
+ ],
1174
+ renameOptions
1175
+ );
620
1176
 
621
1177
  return BinaryVectorStore.load(cacheDir, {
622
1178
  contentCacheEntries,