@softerist/heuristic-mcp 3.2.3 → 3.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +387 -376
- package/config.jsonc +800 -800
- package/features/ann-config.js +102 -110
- package/features/clear-cache.js +81 -84
- package/features/find-similar-code.js +265 -286
- package/features/hybrid-search.js +487 -536
- package/features/index-codebase.js +3139 -3270
- package/features/lifecycle.js +1011 -1063
- package/features/package-version.js +277 -291
- package/features/register.js +351 -370
- package/features/resources.js +115 -130
- package/features/set-workspace.js +214 -240
- package/index.js +693 -758
- package/lib/cache-ops.js +22 -22
- package/lib/cache-utils.js +465 -519
- package/lib/cache.js +1749 -1849
- package/lib/call-graph.js +396 -396
- package/lib/cli.js +232 -226
- package/lib/config.js +1483 -1495
- package/lib/constants.js +511 -493
- package/lib/embed-query-process.js +206 -212
- package/lib/embedding-process.js +434 -451
- package/lib/embedding-worker.js +862 -934
- package/lib/ignore-patterns.js +276 -316
- package/lib/json-worker.js +14 -14
- package/lib/json-writer.js +302 -310
- package/lib/logging.js +116 -127
- package/lib/memory-logger.js +13 -13
- package/lib/onnx-backend.js +188 -193
- package/lib/path-utils.js +18 -23
- package/lib/project-detector.js +82 -84
- package/lib/server-lifecycle.js +133 -145
- package/lib/settings-editor.js +738 -739
- package/lib/slice-normalize.js +25 -31
- package/lib/tokenizer.js +168 -203
- package/lib/utils.js +364 -409
- package/lib/vector-store-binary.js +973 -991
- package/lib/vector-store-sqlite.js +377 -414
- package/lib/workspace-env.js +32 -34
- package/mcp_config.json +9 -9
- package/package.json +86 -86
- package/scripts/clear-cache.js +20 -20
- package/scripts/download-model.js +43 -43
- package/scripts/mcp-launcher.js +49 -49
- package/scripts/postinstall.js +12 -12
- package/search-configs.js +36 -36
|
@@ -1,40 +1,40 @@
|
|
|
1
|
-
import fs from 'fs/promises';
|
|
2
|
-
import fsSync from 'fs';
|
|
3
|
-
import path from 'path';
|
|
4
|
-
import os from 'os';
|
|
5
|
-
import crypto from 'crypto';
|
|
6
|
-
import { crc32 } from 'zlib';
|
|
7
|
-
import {
|
|
8
|
-
BINARY_STORE_VERSION as STORE_VERSION,
|
|
9
|
-
BINARY_VECTOR_HEADER_SIZE as VECTOR_HEADER_SIZE,
|
|
10
|
-
BINARY_RECORD_HEADER_SIZE as RECORD_HEADER_SIZE,
|
|
11
|
-
BINARY_CONTENT_HEADER_SIZE as CONTENT_HEADER_SIZE,
|
|
12
|
-
BINARY_RECORD_SIZE as RECORD_SIZE,
|
|
13
|
-
} from './constants.js';
|
|
14
|
-
|
|
15
|
-
const MAGIC_VECTORS = 'HMCV';
|
|
16
|
-
const MAGIC_RECORDS = 'HMCR';
|
|
17
|
-
const MAGIC_CONTENT = 'HMCC';
|
|
18
|
-
|
|
19
|
-
const VECTORS_FILE = 'vectors.bin';
|
|
20
|
-
const RECORDS_FILE = 'records.bin';
|
|
21
|
-
const CONTENT_FILE = 'content.bin';
|
|
22
|
-
const FILES_FILE = 'files.json';
|
|
23
|
-
const TELEMETRY_FILE = 'binary-store-telemetry.json';
|
|
24
|
-
const RETRYABLE_RENAME_ERRORS = new Set(['EPERM', 'EACCES', 'EBUSY']);
|
|
25
|
-
const BINARY_ARTIFACT_BASE_FILES = [VECTORS_FILE, RECORDS_FILE, CONTENT_FILE, FILES_FILE];
|
|
26
|
-
const STARTUP_TMP_CLEANUP_MIN_AGE_MS = 2 * 60 * 1000;
|
|
27
|
-
const TELEMETRY_VERSION = 1;
|
|
28
|
-
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import fsSync from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import os from 'os';
|
|
5
|
+
import crypto from 'crypto';
|
|
6
|
+
import { crc32 } from 'zlib';
|
|
7
|
+
import {
|
|
8
|
+
BINARY_STORE_VERSION as STORE_VERSION,
|
|
9
|
+
BINARY_VECTOR_HEADER_SIZE as VECTOR_HEADER_SIZE,
|
|
10
|
+
BINARY_RECORD_HEADER_SIZE as RECORD_HEADER_SIZE,
|
|
11
|
+
BINARY_CONTENT_HEADER_SIZE as CONTENT_HEADER_SIZE,
|
|
12
|
+
BINARY_RECORD_SIZE as RECORD_SIZE,
|
|
13
|
+
} from './constants.js';
|
|
14
|
+
|
|
15
|
+
const MAGIC_VECTORS = 'HMCV';
|
|
16
|
+
const MAGIC_RECORDS = 'HMCR';
|
|
17
|
+
const MAGIC_CONTENT = 'HMCC';
|
|
18
|
+
|
|
19
|
+
const VECTORS_FILE = 'vectors.bin';
|
|
20
|
+
const RECORDS_FILE = 'records.bin';
|
|
21
|
+
const CONTENT_FILE = 'content.bin';
|
|
22
|
+
const FILES_FILE = 'files.json';
|
|
23
|
+
const TELEMETRY_FILE = 'binary-store-telemetry.json';
|
|
24
|
+
const RETRYABLE_RENAME_ERRORS = new Set(['EPERM', 'EACCES', 'EBUSY']);
|
|
25
|
+
const BINARY_ARTIFACT_BASE_FILES = [VECTORS_FILE, RECORDS_FILE, CONTENT_FILE, FILES_FILE];
|
|
26
|
+
const STARTUP_TMP_CLEANUP_MIN_AGE_MS = 2 * 60 * 1000;
|
|
27
|
+
const TELEMETRY_VERSION = 1;
|
|
28
|
+
|
|
29
29
|
function createTelemetryTotals() {
|
|
30
30
|
return {
|
|
31
|
-
atomicReplaceAttempts: 0,
|
|
32
|
-
atomicReplaceSuccesses: 0,
|
|
33
|
-
atomicReplaceFailures: 0,
|
|
34
|
-
renameRetryCount: 0,
|
|
35
|
-
fallbackCopyCount: 0,
|
|
36
|
-
rollbackCount: 0,
|
|
37
|
-
rollbackRestoreFailureCount: 0,
|
|
31
|
+
atomicReplaceAttempts: 0,
|
|
32
|
+
atomicReplaceSuccesses: 0,
|
|
33
|
+
atomicReplaceFailures: 0,
|
|
34
|
+
renameRetryCount: 0,
|
|
35
|
+
fallbackCopyCount: 0,
|
|
36
|
+
rollbackCount: 0,
|
|
37
|
+
rollbackRestoreFailureCount: 0,
|
|
38
38
|
startupCleanupRuns: 0,
|
|
39
39
|
staleTempFilesRemoved: 0,
|
|
40
40
|
staleTempFilesSkippedActive: 0,
|
|
@@ -43,32 +43,31 @@ function createTelemetryTotals() {
|
|
|
43
43
|
corruptionSecondaryReadonlyBlocked: 0,
|
|
44
44
|
};
|
|
45
45
|
}
|
|
46
|
-
|
|
47
|
-
function normalizeTelemetry(raw) {
|
|
48
|
-
const totals = createTelemetryTotals();
|
|
49
|
-
if (raw?.totals && typeof raw.totals === 'object') {
|
|
50
|
-
for (const key of Object.keys(totals)) {
|
|
51
|
-
if (Number.isFinite(raw.totals[key])) {
|
|
52
|
-
totals[key] = raw.totals[key];
|
|
53
|
-
}
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
return {
|
|
57
|
-
version: TELEMETRY_VERSION,
|
|
58
|
-
totals,
|
|
59
|
-
updatedAt: typeof raw?.updatedAt === 'string' ? raw.updatedAt : null,
|
|
60
|
-
lastError:
|
|
61
|
-
raw?.lastError && typeof raw.lastError === 'object'
|
|
62
|
-
? {
|
|
63
|
-
at: typeof raw.lastError.at === 'string' ? raw.lastError.at : null,
|
|
64
|
-
message:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
: null,
|
|
46
|
+
|
|
47
|
+
function normalizeTelemetry(raw) {
|
|
48
|
+
const totals = createTelemetryTotals();
|
|
49
|
+
if (raw?.totals && typeof raw.totals === 'object') {
|
|
50
|
+
for (const key of Object.keys(totals)) {
|
|
51
|
+
if (Number.isFinite(raw.totals[key])) {
|
|
52
|
+
totals[key] = raw.totals[key];
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return {
|
|
57
|
+
version: TELEMETRY_VERSION,
|
|
58
|
+
totals,
|
|
59
|
+
updatedAt: typeof raw?.updatedAt === 'string' ? raw.updatedAt : null,
|
|
60
|
+
lastError:
|
|
61
|
+
raw?.lastError && typeof raw.lastError === 'object'
|
|
62
|
+
? {
|
|
63
|
+
at: typeof raw.lastError.at === 'string' ? raw.lastError.at : null,
|
|
64
|
+
message: typeof raw.lastError.message === 'string' ? raw.lastError.message : null,
|
|
65
|
+
}
|
|
66
|
+
: null,
|
|
67
|
+
lastAtomicReplace:
|
|
68
|
+
raw?.lastAtomicReplace && typeof raw.lastAtomicReplace === 'object'
|
|
69
|
+
? { ...raw.lastAtomicReplace }
|
|
70
|
+
: null,
|
|
72
71
|
lastStartupCleanup:
|
|
73
72
|
raw?.lastStartupCleanup && typeof raw.lastStartupCleanup === 'object'
|
|
74
73
|
? { ...raw.lastStartupCleanup }
|
|
@@ -79,64 +78,61 @@ function normalizeTelemetry(raw) {
|
|
|
79
78
|
: null,
|
|
80
79
|
};
|
|
81
80
|
}
|
|
82
|
-
|
|
83
|
-
async function readTelemetryFile(cacheDir) {
|
|
84
|
-
const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
|
|
85
|
-
try {
|
|
86
|
-
const raw = await fs.readFile(telemetryPath, 'utf-8');
|
|
87
|
-
return normalizeTelemetry(JSON.parse(raw));
|
|
88
|
-
} catch {
|
|
89
|
-
return normalizeTelemetry(null);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
async function writeTelemetryFile(cacheDir, telemetry) {
|
|
94
|
-
const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
|
|
95
|
-
await fs.mkdir(cacheDir, { recursive: true }).catch(() => {});
|
|
96
|
-
await fs.writeFile(telemetryPath, JSON.stringify(telemetry, null, 2));
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
async function updateTelemetry(cacheDir, mutate) {
|
|
100
|
-
if (!cacheDir) return;
|
|
101
|
-
try {
|
|
102
|
-
const telemetry = await readTelemetryFile(cacheDir);
|
|
103
|
-
mutate(telemetry);
|
|
104
|
-
telemetry.updatedAt = new Date().toISOString();
|
|
105
|
-
await writeTelemetryFile(cacheDir, telemetry);
|
|
106
|
-
} catch {
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
return
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
const
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
metrics[key] = (metrics[key] || 0) + value;
|
|
138
|
-
}
|
|
139
|
-
|
|
81
|
+
|
|
82
|
+
async function readTelemetryFile(cacheDir) {
|
|
83
|
+
const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
|
|
84
|
+
try {
|
|
85
|
+
const raw = await fs.readFile(telemetryPath, 'utf-8');
|
|
86
|
+
return normalizeTelemetry(JSON.parse(raw));
|
|
87
|
+
} catch {
|
|
88
|
+
return normalizeTelemetry(null);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function writeTelemetryFile(cacheDir, telemetry) {
|
|
93
|
+
const telemetryPath = path.join(cacheDir, TELEMETRY_FILE);
|
|
94
|
+
await fs.mkdir(cacheDir, { recursive: true }).catch(() => {});
|
|
95
|
+
await fs.writeFile(telemetryPath, JSON.stringify(telemetry, null, 2));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async function updateTelemetry(cacheDir, mutate) {
|
|
99
|
+
if (!cacheDir) return;
|
|
100
|
+
try {
|
|
101
|
+
const telemetry = await readTelemetryFile(cacheDir);
|
|
102
|
+
mutate(telemetry);
|
|
103
|
+
telemetry.updatedAt = new Date().toISOString();
|
|
104
|
+
await writeTelemetryFile(cacheDir, telemetry);
|
|
105
|
+
} catch {}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function isProcessRunning(pid) {
|
|
109
|
+
if (!Number.isInteger(pid) || pid <= 0) return false;
|
|
110
|
+
try {
|
|
111
|
+
process.kill(pid, 0);
|
|
112
|
+
return true;
|
|
113
|
+
} catch (err) {
|
|
114
|
+
return err?.code === 'EPERM';
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function parsePidFromBinaryArtifact(fileName) {
|
|
119
|
+
const match = fileName.match(/\.(?:tmp|bak)-(\d+)(?:-|$)/);
|
|
120
|
+
if (!match) return null;
|
|
121
|
+
const pid = Number.parseInt(match[1], 10);
|
|
122
|
+
return Number.isInteger(pid) ? pid : null;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function isBinaryTempArtifact(fileName) {
|
|
126
|
+
return BINARY_ARTIFACT_BASE_FILES.some(
|
|
127
|
+
(baseFile) => fileName.startsWith(`${baseFile}.tmp-`) || fileName.startsWith(`${baseFile}.bak-`)
|
|
128
|
+
);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function addToMetric(metrics, key, value = 1) {
|
|
132
|
+
if (!metrics || !Number.isFinite(value) || value <= 0) return;
|
|
133
|
+
metrics[key] = (metrics[key] || 0) + value;
|
|
134
|
+
}
|
|
135
|
+
|
|
140
136
|
export async function readBinaryStoreTelemetry(cacheDir) {
|
|
141
137
|
return readTelemetryFile(cacheDir);
|
|
142
138
|
}
|
|
@@ -160,267 +156,267 @@ export async function recordBinaryStoreCorruption(
|
|
|
160
156
|
};
|
|
161
157
|
});
|
|
162
158
|
}
|
|
163
|
-
|
|
164
|
-
export async function cleanupStaleBinaryArtifacts(
|
|
165
|
-
cacheDir,
|
|
166
|
-
{ minAgeMs = STARTUP_TMP_CLEANUP_MIN_AGE_MS, logger = null } = {}
|
|
167
|
-
) {
|
|
168
|
-
const result = {
|
|
169
|
-
cacheDir,
|
|
170
|
-
scanned: 0,
|
|
171
|
-
removed: 0,
|
|
172
|
-
skippedActive: 0,
|
|
173
|
-
removedFiles: [],
|
|
174
|
-
};
|
|
175
|
-
|
|
176
|
-
let entries = [];
|
|
177
|
-
try {
|
|
178
|
-
entries = await fs.readdir(cacheDir, { withFileTypes: true });
|
|
179
|
-
} catch {
|
|
180
|
-
return result;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
const now = Date.now();
|
|
184
|
-
for (const entry of entries) {
|
|
185
|
-
const fileName = typeof entry === 'string' ? entry : entry?.name;
|
|
186
|
-
if (!fileName) continue;
|
|
187
|
-
const isFileEntry = typeof entry === 'string' ? true : entry?.isFile?.() === true;
|
|
188
|
-
if (!isFileEntry) continue;
|
|
189
|
-
if (!isBinaryTempArtifact(fileName)) continue;
|
|
190
|
-
result.scanned += 1;
|
|
191
|
-
|
|
192
|
-
const fullPath = path.join(cacheDir, fileName);
|
|
193
|
-
const stats = await fs.stat(fullPath).catch(() => null);
|
|
194
|
-
if (!stats) continue;
|
|
195
|
-
|
|
196
|
-
const ageMs = now - stats.mtimeMs;
|
|
197
|
-
const ownerPid = parsePidFromBinaryArtifact(fileName);
|
|
198
|
-
if (ownerPid && isProcessRunning(ownerPid)) {
|
|
199
|
-
result.skippedActive += 1;
|
|
200
|
-
continue;
|
|
201
|
-
}
|
|
202
|
-
if (ageMs < minAgeMs) continue;
|
|
203
|
-
|
|
204
|
-
await fs.rm(fullPath, { force: true }).catch(() => {});
|
|
205
|
-
result.removed += 1;
|
|
206
|
-
result.removedFiles.push(fileName);
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
await updateTelemetry(cacheDir, (telemetry) => {
|
|
210
|
-
telemetry.totals.startupCleanupRuns += 1;
|
|
211
|
-
telemetry.totals.staleTempFilesRemoved += result.removed;
|
|
212
|
-
telemetry.totals.staleTempFilesSkippedActive += result.skippedActive;
|
|
213
|
-
telemetry.lastStartupCleanup = {
|
|
214
|
-
at: new Date().toISOString(),
|
|
215
|
-
scanned: result.scanned,
|
|
216
|
-
removed: result.removed,
|
|
217
|
-
skippedActive: result.skippedActive,
|
|
218
|
-
};
|
|
219
|
-
});
|
|
220
|
-
|
|
221
|
-
if (logger && result.removed > 0) {
|
|
222
|
-
logger.info(
|
|
223
|
-
`[Cache] Startup temp cleanup removed ${result.removed} stale artifact(s) from ${cacheDir}`
|
|
224
|
-
);
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
return result;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
function isRetryableRenameError(err) {
|
|
231
|
-
return RETRYABLE_RENAME_ERRORS.has(err?.code);
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
async function renameWithRetry(
|
|
235
|
-
source,
|
|
236
|
-
target,
|
|
237
|
-
{ retries = 12, delayMs = 50, maxDelayMs = 1000 } = {}
|
|
238
|
-
) {
|
|
239
|
-
let attempt = 0;
|
|
240
|
-
let delay = delayMs;
|
|
241
|
-
while (true) {
|
|
242
|
-
try {
|
|
243
|
-
await fs.rename(source, target);
|
|
244
|
-
return attempt;
|
|
245
|
-
} catch (err) {
|
|
246
|
-
if (!isRetryableRenameError(err) || attempt >= retries) {
|
|
247
|
-
err.renameRetryCount = attempt;
|
|
248
|
-
throw err;
|
|
249
|
-
}
|
|
250
|
-
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
251
|
-
attempt += 1;
|
|
252
|
-
delay = Math.min(delay * 2, maxDelayMs);
|
|
253
|
-
}
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
async function pathExists(filePath) {
|
|
258
|
-
try {
|
|
259
|
-
await fs.access(filePath);
|
|
260
|
-
return true;
|
|
261
|
-
} catch {
|
|
262
|
-
return false;
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
async function removeIfExists(filePath) {
|
|
267
|
-
await fs.rm(filePath, { force: true }).catch(() => {});
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
async function promoteFileWithFallback(source, target, renameOptions = {}, metrics = null) {
|
|
271
|
-
try {
|
|
272
|
-
const retriesUsed = await renameWithRetry(source, target, renameOptions);
|
|
273
|
-
addToMetric(metrics, 'renameRetryCount', retriesUsed);
|
|
274
|
-
return;
|
|
275
|
-
} catch (renameError) {
|
|
276
|
-
const retriesUsed = Number.isFinite(renameError?.renameRetryCount)
|
|
277
|
-
? renameError.renameRetryCount
|
|
278
|
-
: 0;
|
|
279
|
-
addToMetric(metrics, 'renameRetryCount', retriesUsed);
|
|
280
|
-
if (!isRetryableRenameError(renameError)) {
|
|
281
|
-
throw renameError;
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
try {
|
|
285
|
-
await fs.copyFile(source, target);
|
|
286
|
-
await removeIfExists(source);
|
|
287
|
-
addToMetric(metrics, 'fallbackCopyCount', 1);
|
|
288
|
-
return;
|
|
289
|
-
} catch (copyError) {
|
|
290
|
-
const wrapped = new Error(
|
|
291
|
-
`rename failed (${renameError.message}); fallback copy failed (${copyError.message})`
|
|
292
|
-
);
|
|
293
|
-
wrapped.code = copyError?.code || renameError?.code;
|
|
294
|
-
throw wrapped;
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
async function replaceFilesAtomically(filePairs, renameOptions = {}) {
|
|
300
|
-
const metrics = createTelemetryTotals();
|
|
301
|
-
metrics.atomicReplaceAttempts = 1;
|
|
302
|
-
const cacheDir = filePairs.length > 0 ? path.dirname(filePairs[0].target) : null;
|
|
303
|
-
const backupSuffix = `.bak-${process.pid}-${Date.now()}`;
|
|
304
|
-
const backups = [];
|
|
305
|
-
const replacedTargets = [];
|
|
306
|
-
let operationError = null;
|
|
307
|
-
|
|
308
|
-
try {
|
|
309
|
-
// Stage current files as backups first. If this fails, nothing is replaced.
|
|
310
|
-
for (const pair of filePairs) {
|
|
311
|
-
if (!(await pathExists(pair.target))) continue;
|
|
312
|
-
const backupPath = `${pair.target}${backupSuffix}`;
|
|
313
|
-
await removeIfExists(backupPath);
|
|
314
|
-
await promoteFileWithFallback(pair.target, backupPath, renameOptions, metrics);
|
|
315
|
-
backups.push({ target: pair.target, backupPath });
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
// Replace targets with new temp files.
|
|
319
|
-
for (const pair of filePairs) {
|
|
320
|
-
await promoteFileWithFallback(pair.source, pair.target, renameOptions, metrics);
|
|
321
|
-
replacedTargets.push(pair.target);
|
|
322
|
-
}
|
|
323
|
-
metrics.atomicReplaceSuccesses = 1;
|
|
324
|
-
} catch (error) {
|
|
325
|
-
operationError = error;
|
|
326
|
-
metrics.atomicReplaceFailures = 1;
|
|
327
|
-
metrics.rollbackCount = 1;
|
|
328
|
-
const rollbackErrors = [];
|
|
329
|
-
|
|
330
|
-
// Remove any partially replaced files before restoring backups.
|
|
331
|
-
for (const target of replacedTargets.reverse()) {
|
|
332
|
-
await removeIfExists(target);
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
// Restore original files from backups.
|
|
336
|
-
for (const backup of backups.reverse()) {
|
|
337
|
-
try {
|
|
338
|
-
await promoteFileWithFallback(backup.backupPath, backup.target, renameOptions, metrics);
|
|
339
|
-
} catch (restoreErr) {
|
|
340
|
-
rollbackErrors.push(
|
|
341
|
-
`restore ${path.basename(backup.target)} failed: ${restoreErr.message}`
|
|
342
|
-
);
|
|
343
|
-
}
|
|
344
|
-
}
|
|
345
|
-
if (rollbackErrors.length > 0) {
|
|
346
|
-
metrics.rollbackRestoreFailureCount = rollbackErrors.length;
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
// Clean up temp files left from this failed write attempt.
|
|
350
|
-
await Promise.all(filePairs.map((pair) => removeIfExists(pair.source)));
|
|
351
|
-
|
|
352
|
-
if (rollbackErrors.length > 0) {
|
|
353
|
-
error.message = `${error.message}. Rollback issues: ${rollbackErrors.join('; ')}`;
|
|
354
|
-
}
|
|
355
|
-
throw error;
|
|
356
|
-
} finally {
|
|
357
|
-
// Best-effort cleanup for any backup remnants after success/rollback.
|
|
358
|
-
await Promise.all(backups.map((backup) => removeIfExists(backup.backupPath)));
|
|
359
|
-
await updateTelemetry(cacheDir, (telemetry) => {
|
|
360
|
-
telemetry.totals.atomicReplaceAttempts += metrics.atomicReplaceAttempts;
|
|
361
|
-
telemetry.totals.atomicReplaceSuccesses += metrics.atomicReplaceSuccesses;
|
|
362
|
-
telemetry.totals.atomicReplaceFailures += metrics.atomicReplaceFailures;
|
|
363
|
-
telemetry.totals.renameRetryCount += metrics.renameRetryCount;
|
|
364
|
-
telemetry.totals.fallbackCopyCount += metrics.fallbackCopyCount;
|
|
365
|
-
telemetry.totals.rollbackCount += metrics.rollbackCount;
|
|
366
|
-
telemetry.totals.rollbackRestoreFailureCount += metrics.rollbackRestoreFailureCount;
|
|
367
|
-
telemetry.lastAtomicReplace = {
|
|
368
|
-
at: new Date().toISOString(),
|
|
369
|
-
success: metrics.atomicReplaceSuccesses > 0,
|
|
370
|
-
renameRetryCount: metrics.renameRetryCount,
|
|
371
|
-
fallbackCopyCount: metrics.fallbackCopyCount,
|
|
372
|
-
rollbackCount: metrics.rollbackCount,
|
|
373
|
-
rollbackRestoreFailureCount: metrics.rollbackRestoreFailureCount,
|
|
374
|
-
};
|
|
375
|
-
if (operationError) {
|
|
376
|
-
telemetry.lastError = {
|
|
377
|
-
at: new Date().toISOString(),
|
|
378
|
-
message: operationError.message,
|
|
379
|
-
};
|
|
380
|
-
}
|
|
381
|
-
});
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
/**
|
|
386
|
-
* Custom error for binary store corruption.
|
|
387
|
-
* Allows cache layer to distinguish corruption from other load failures.
|
|
388
|
-
*/
|
|
389
|
-
export class BinaryStoreCorruptionError extends Error {
|
|
390
|
-
constructor(message) {
|
|
391
|
-
super(message);
|
|
392
|
-
this.name = 'BinaryStoreCorruptionError';
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
function writeMagic(buffer, magic) {
|
|
397
|
-
buffer.write(magic, 0, 'ascii');
|
|
398
|
-
}
|
|
399
|
-
|
|
400
|
-
function readMagic(buffer) {
|
|
401
|
-
return buffer.toString('ascii', 0, 4);
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
function ensureLittleEndian() {
|
|
405
|
-
if (os.endianness() !== 'LE') {
|
|
406
|
-
throw new Error('Binary vector store requires little-endian architecture');
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
function getDataView(buffer) {
|
|
411
|
-
return new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
|
|
412
|
-
}
|
|
413
|
-
|
|
414
|
-
/**
|
|
415
|
-
* Generate a random writeId shared across all files in a single write operation.
|
|
416
|
-
*/
|
|
417
|
-
function generateWriteId() {
|
|
418
|
-
return crypto.randomInt(1,
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
/**
|
|
422
|
-
* Compute CRC32 checksum over a buffer.
|
|
423
|
-
*/
|
|
159
|
+
|
|
160
|
+
export async function cleanupStaleBinaryArtifacts(
|
|
161
|
+
cacheDir,
|
|
162
|
+
{ minAgeMs = STARTUP_TMP_CLEANUP_MIN_AGE_MS, logger = null } = {}
|
|
163
|
+
) {
|
|
164
|
+
const result = {
|
|
165
|
+
cacheDir,
|
|
166
|
+
scanned: 0,
|
|
167
|
+
removed: 0,
|
|
168
|
+
skippedActive: 0,
|
|
169
|
+
removedFiles: [],
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
let entries = [];
|
|
173
|
+
try {
|
|
174
|
+
entries = await fs.readdir(cacheDir, { withFileTypes: true });
|
|
175
|
+
} catch {
|
|
176
|
+
return result;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const now = Date.now();
|
|
180
|
+
for (const entry of entries) {
|
|
181
|
+
const fileName = typeof entry === 'string' ? entry : entry?.name;
|
|
182
|
+
if (!fileName) continue;
|
|
183
|
+
const isFileEntry = typeof entry === 'string' ? true : entry?.isFile?.() === true;
|
|
184
|
+
if (!isFileEntry) continue;
|
|
185
|
+
if (!isBinaryTempArtifact(fileName)) continue;
|
|
186
|
+
result.scanned += 1;
|
|
187
|
+
|
|
188
|
+
const fullPath = path.join(cacheDir, fileName);
|
|
189
|
+
const stats = await fs.stat(fullPath).catch(() => null);
|
|
190
|
+
if (!stats) continue;
|
|
191
|
+
|
|
192
|
+
const ageMs = now - stats.mtimeMs;
|
|
193
|
+
const ownerPid = parsePidFromBinaryArtifact(fileName);
|
|
194
|
+
if (ownerPid && isProcessRunning(ownerPid)) {
|
|
195
|
+
result.skippedActive += 1;
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
if (ageMs < minAgeMs) continue;
|
|
199
|
+
|
|
200
|
+
await fs.rm(fullPath, { force: true }).catch(() => {});
|
|
201
|
+
result.removed += 1;
|
|
202
|
+
result.removedFiles.push(fileName);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
await updateTelemetry(cacheDir, (telemetry) => {
|
|
206
|
+
telemetry.totals.startupCleanupRuns += 1;
|
|
207
|
+
telemetry.totals.staleTempFilesRemoved += result.removed;
|
|
208
|
+
telemetry.totals.staleTempFilesSkippedActive += result.skippedActive;
|
|
209
|
+
telemetry.lastStartupCleanup = {
|
|
210
|
+
at: new Date().toISOString(),
|
|
211
|
+
scanned: result.scanned,
|
|
212
|
+
removed: result.removed,
|
|
213
|
+
skippedActive: result.skippedActive,
|
|
214
|
+
};
|
|
215
|
+
});
|
|
216
|
+
|
|
217
|
+
if (logger && result.removed > 0) {
|
|
218
|
+
logger.info(
|
|
219
|
+
`[Cache] Startup temp cleanup removed ${result.removed} stale artifact(s) from ${cacheDir}`
|
|
220
|
+
);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
return result;
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function isRetryableRenameError(err) {
|
|
227
|
+
return RETRYABLE_RENAME_ERRORS.has(err?.code);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
async function renameWithRetry(
|
|
231
|
+
source,
|
|
232
|
+
target,
|
|
233
|
+
{ retries = 12, delayMs = 50, maxDelayMs = 1000 } = {}
|
|
234
|
+
) {
|
|
235
|
+
let attempt = 0;
|
|
236
|
+
let delay = delayMs;
|
|
237
|
+
while (true) {
|
|
238
|
+
try {
|
|
239
|
+
await fs.rename(source, target);
|
|
240
|
+
return attempt;
|
|
241
|
+
} catch (err) {
|
|
242
|
+
if (!isRetryableRenameError(err) || attempt >= retries) {
|
|
243
|
+
err.renameRetryCount = attempt;
|
|
244
|
+
throw err;
|
|
245
|
+
}
|
|
246
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
247
|
+
attempt += 1;
|
|
248
|
+
delay = Math.min(delay * 2, maxDelayMs);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async function pathExists(filePath) {
|
|
254
|
+
try {
|
|
255
|
+
await fs.access(filePath);
|
|
256
|
+
return true;
|
|
257
|
+
} catch {
|
|
258
|
+
return false;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
async function removeIfExists(filePath) {
|
|
263
|
+
await fs.rm(filePath, { force: true }).catch(() => {});
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
async function promoteFileWithFallback(source, target, renameOptions = {}, metrics = null) {
|
|
267
|
+
try {
|
|
268
|
+
const retriesUsed = await renameWithRetry(source, target, renameOptions);
|
|
269
|
+
addToMetric(metrics, 'renameRetryCount', retriesUsed);
|
|
270
|
+
return;
|
|
271
|
+
} catch (renameError) {
|
|
272
|
+
const retriesUsed = Number.isFinite(renameError?.renameRetryCount)
|
|
273
|
+
? renameError.renameRetryCount
|
|
274
|
+
: 0;
|
|
275
|
+
addToMetric(metrics, 'renameRetryCount', retriesUsed);
|
|
276
|
+
if (!isRetryableRenameError(renameError)) {
|
|
277
|
+
throw renameError;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
try {
|
|
281
|
+
await fs.copyFile(source, target);
|
|
282
|
+
await removeIfExists(source);
|
|
283
|
+
addToMetric(metrics, 'fallbackCopyCount', 1);
|
|
284
|
+
return;
|
|
285
|
+
} catch (copyError) {
|
|
286
|
+
const wrapped = new Error(
|
|
287
|
+
`rename failed (${renameError.message}); fallback copy failed (${copyError.message})`
|
|
288
|
+
);
|
|
289
|
+
wrapped.code = copyError?.code || renameError?.code;
|
|
290
|
+
throw wrapped;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
async function replaceFilesAtomically(filePairs, renameOptions = {}) {
|
|
296
|
+
const metrics = createTelemetryTotals();
|
|
297
|
+
metrics.atomicReplaceAttempts = 1;
|
|
298
|
+
const cacheDir = filePairs.length > 0 ? path.dirname(filePairs[0].target) : null;
|
|
299
|
+
const backupSuffix = `.bak-${process.pid}-${Date.now()}`;
|
|
300
|
+
const backups = [];
|
|
301
|
+
const replacedTargets = [];
|
|
302
|
+
let operationError = null;
|
|
303
|
+
|
|
304
|
+
try {
|
|
305
|
+
// Stage current files as backups first. If this fails, nothing is replaced.
|
|
306
|
+
for (const pair of filePairs) {
|
|
307
|
+
if (!(await pathExists(pair.target))) continue;
|
|
308
|
+
const backupPath = `${pair.target}${backupSuffix}`;
|
|
309
|
+
await removeIfExists(backupPath);
|
|
310
|
+
await promoteFileWithFallback(pair.target, backupPath, renameOptions, metrics);
|
|
311
|
+
backups.push({ target: pair.target, backupPath });
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Replace targets with new temp files.
|
|
315
|
+
for (const pair of filePairs) {
|
|
316
|
+
await promoteFileWithFallback(pair.source, pair.target, renameOptions, metrics);
|
|
317
|
+
replacedTargets.push(pair.target);
|
|
318
|
+
}
|
|
319
|
+
metrics.atomicReplaceSuccesses = 1;
|
|
320
|
+
} catch (error) {
|
|
321
|
+
operationError = error;
|
|
322
|
+
metrics.atomicReplaceFailures = 1;
|
|
323
|
+
metrics.rollbackCount = 1;
|
|
324
|
+
const rollbackErrors = [];
|
|
325
|
+
|
|
326
|
+
// Remove any partially replaced files before restoring backups.
|
|
327
|
+
for (const target of replacedTargets.reverse()) {
|
|
328
|
+
await removeIfExists(target);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// Restore original files from backups.
|
|
332
|
+
for (const backup of backups.reverse()) {
|
|
333
|
+
try {
|
|
334
|
+
await promoteFileWithFallback(backup.backupPath, backup.target, renameOptions, metrics);
|
|
335
|
+
} catch (restoreErr) {
|
|
336
|
+
rollbackErrors.push(
|
|
337
|
+
`restore ${path.basename(backup.target)} failed: ${restoreErr.message}`
|
|
338
|
+
);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
if (rollbackErrors.length > 0) {
|
|
342
|
+
metrics.rollbackRestoreFailureCount = rollbackErrors.length;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Clean up temp files left from this failed write attempt.
|
|
346
|
+
await Promise.all(filePairs.map((pair) => removeIfExists(pair.source)));
|
|
347
|
+
|
|
348
|
+
if (rollbackErrors.length > 0) {
|
|
349
|
+
error.message = `${error.message}. Rollback issues: ${rollbackErrors.join('; ')}`;
|
|
350
|
+
}
|
|
351
|
+
throw error;
|
|
352
|
+
} finally {
|
|
353
|
+
// Best-effort cleanup for any backup remnants after success/rollback.
|
|
354
|
+
await Promise.all(backups.map((backup) => removeIfExists(backup.backupPath)));
|
|
355
|
+
await updateTelemetry(cacheDir, (telemetry) => {
|
|
356
|
+
telemetry.totals.atomicReplaceAttempts += metrics.atomicReplaceAttempts;
|
|
357
|
+
telemetry.totals.atomicReplaceSuccesses += metrics.atomicReplaceSuccesses;
|
|
358
|
+
telemetry.totals.atomicReplaceFailures += metrics.atomicReplaceFailures;
|
|
359
|
+
telemetry.totals.renameRetryCount += metrics.renameRetryCount;
|
|
360
|
+
telemetry.totals.fallbackCopyCount += metrics.fallbackCopyCount;
|
|
361
|
+
telemetry.totals.rollbackCount += metrics.rollbackCount;
|
|
362
|
+
telemetry.totals.rollbackRestoreFailureCount += metrics.rollbackRestoreFailureCount;
|
|
363
|
+
telemetry.lastAtomicReplace = {
|
|
364
|
+
at: new Date().toISOString(),
|
|
365
|
+
success: metrics.atomicReplaceSuccesses > 0,
|
|
366
|
+
renameRetryCount: metrics.renameRetryCount,
|
|
367
|
+
fallbackCopyCount: metrics.fallbackCopyCount,
|
|
368
|
+
rollbackCount: metrics.rollbackCount,
|
|
369
|
+
rollbackRestoreFailureCount: metrics.rollbackRestoreFailureCount,
|
|
370
|
+
};
|
|
371
|
+
if (operationError) {
|
|
372
|
+
telemetry.lastError = {
|
|
373
|
+
at: new Date().toISOString(),
|
|
374
|
+
message: operationError.message,
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
});
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* Custom error for binary store corruption.
|
|
383
|
+
* Allows cache layer to distinguish corruption from other load failures.
|
|
384
|
+
*/
|
|
385
|
+
export class BinaryStoreCorruptionError extends Error {
|
|
386
|
+
constructor(message) {
|
|
387
|
+
super(message);
|
|
388
|
+
this.name = 'BinaryStoreCorruptionError';
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
function writeMagic(buffer, magic) {
|
|
393
|
+
buffer.write(magic, 0, 'ascii');
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
function readMagic(buffer) {
|
|
397
|
+
return buffer.toString('ascii', 0, 4);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
function ensureLittleEndian() {
|
|
401
|
+
if (os.endianness() !== 'LE') {
|
|
402
|
+
throw new Error('Binary vector store requires little-endian architecture');
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
function getDataView(buffer) {
|
|
407
|
+
return new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
/**
|
|
411
|
+
* Generate a random writeId shared across all files in a single write operation.
|
|
412
|
+
*/
|
|
413
|
+
function generateWriteId() {
|
|
414
|
+
return crypto.randomInt(1, 0xffffffff);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Compute CRC32 checksum over a buffer.
|
|
419
|
+
*/
|
|
424
420
|
function computeCrc32(buffer, initial) {
|
|
425
421
|
return initial !== undefined ? crc32(buffer, initial) >>> 0 : crc32(buffer) >>> 0;
|
|
426
422
|
}
|
|
@@ -483,180 +479,174 @@ async function writeHeaderCrc(handle, crcValue) {
|
|
|
483
479
|
crcView.setUint32(0, crcValue >>> 0, true);
|
|
484
480
|
await handle.write(crcBuffer, 0, crcBuffer.length, 20);
|
|
485
481
|
}
|
|
486
|
-
|
|
487
|
-
function readHeader(buffer, magic, headerSize) {
|
|
488
|
-
if (buffer.length < headerSize) {
|
|
489
|
-
throw new BinaryStoreCorruptionError('Binary store header is truncated');
|
|
490
|
-
}
|
|
491
|
-
const actualMagic = readMagic(buffer);
|
|
492
|
-
if (actualMagic !== magic) {
|
|
493
|
-
throw new BinaryStoreCorruptionError(`Invalid binary store magic (${actualMagic})`);
|
|
494
|
-
}
|
|
495
|
-
const view = getDataView(buffer);
|
|
496
|
-
const version = view.getUint32(4, true);
|
|
497
|
-
if (version !== STORE_VERSION) {
|
|
498
|
-
throw new Error(`Unsupported binary store version (${version})`);
|
|
499
|
-
}
|
|
500
|
-
return view;
|
|
501
|
-
}
|
|
502
|
-
|
|
503
|
-
function writeVectorsHeader(buffer, dim, count, writeId) {
|
|
504
|
-
writeMagic(buffer, MAGIC_VECTORS);
|
|
505
|
-
const view = getDataView(buffer);
|
|
506
|
-
view.setUint32(4, STORE_VERSION, true);
|
|
507
|
-
view.setUint32(8, dim, true);
|
|
508
|
-
view.setUint32(12, count, true);
|
|
509
|
-
view.setUint32(16, writeId, true);
|
|
510
|
-
view.setUint32(20, 0, true); // CRC32 placeholder — filled after payload write
|
|
511
|
-
// bytes 24-31: reserved
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
function writeRecordsHeader(buffer, count, fileCount, writeId) {
|
|
515
|
-
writeMagic(buffer, MAGIC_RECORDS);
|
|
516
|
-
const view = getDataView(buffer);
|
|
517
|
-
view.setUint32(4, STORE_VERSION, true);
|
|
518
|
-
view.setUint32(8, count, true);
|
|
519
|
-
view.setUint32(12, fileCount, true);
|
|
520
|
-
view.setUint32(16, writeId, true);
|
|
521
|
-
view.setUint32(20, 0, true); // CRC32 placeholder
|
|
522
|
-
// bytes 24-31: reserved
|
|
523
|
-
}
|
|
524
|
-
|
|
525
|
-
function writeContentHeader(buffer, totalBytes, writeId) {
|
|
526
|
-
writeMagic(buffer, MAGIC_CONTENT);
|
|
527
|
-
const view = getDataView(buffer);
|
|
528
|
-
view.setUint32(4, STORE_VERSION, true);
|
|
529
|
-
const value = BigInt(totalBytes);
|
|
530
|
-
view.setBigUint64(8, value, true);
|
|
531
|
-
view.setUint32(16, writeId, true);
|
|
532
|
-
view.setUint32(20, 0, true); // CRC32 placeholder
|
|
533
|
-
// bytes 24-31: reserved
|
|
534
|
-
}
|
|
535
|
-
|
|
536
|
-
function readBigUint(view, offset) {
|
|
537
|
-
const value = view.getBigUint64(offset, true);
|
|
538
|
-
if (value > BigInt(Number.MAX_SAFE_INTEGER)) {
|
|
539
|
-
throw new Error('Binary store content offset exceeds safe integer range');
|
|
540
|
-
}
|
|
541
|
-
return Number(value);
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
function normalizeContent(value) {
|
|
545
|
-
if (value === null || value === undefined) return '';
|
|
546
|
-
if (typeof value !== 'string') return String(value);
|
|
547
|
-
return value;
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
export class BinaryVectorStore {
|
|
551
|
-
constructor({
|
|
552
|
-
vectorsBuffer,
|
|
553
|
-
recordsBuffer,
|
|
554
|
-
vectorsHandle,
|
|
555
|
-
vectorsFd,
|
|
556
|
-
contentHandle,
|
|
557
|
-
contentBuffer,
|
|
558
|
-
contentSize,
|
|
559
|
-
files,
|
|
560
|
-
dim,
|
|
561
|
-
count,
|
|
562
|
-
contentCacheEntries,
|
|
563
|
-
vectorCacheEntries,
|
|
564
|
-
}) {
|
|
565
|
-
this.vectorsBuffer = vectorsBuffer;
|
|
566
|
-
this.recordsBuffer = recordsBuffer;
|
|
567
|
-
this.vectorsHandle = vectorsHandle ?? null;
|
|
568
|
-
this.vectorsFd = Number.isInteger(vectorsFd) ? vectorsFd : null;
|
|
569
|
-
this.contentHandle = contentHandle ?? null;
|
|
570
|
-
this.contentBuffer = contentBuffer ?? null;
|
|
571
|
-
this.contentSize = Number.isFinite(contentSize)
|
|
572
|
-
? contentSize
|
|
573
|
-
: contentBuffer
|
|
574
|
-
? Math.max(0, contentBuffer.length - CONTENT_HEADER_SIZE)
|
|
575
|
-
: 0;
|
|
576
|
-
this.files = files;
|
|
577
|
-
this.dim = dim;
|
|
578
|
-
this.count = count;
|
|
579
|
-
this.contentCacheEntries = Number.isInteger(contentCacheEntries) ? contentCacheEntries : 256;
|
|
580
|
-
this.contentCache = new Map();
|
|
581
|
-
this.vectorCacheEntries = Number.isInteger(vectorCacheEntries) ? vectorCacheEntries : 0;
|
|
582
|
-
this.vectorCache = new Map();
|
|
583
|
-
|
|
584
|
-
this.vectorDataOffset = VECTOR_HEADER_SIZE;
|
|
585
|
-
this.recordDataOffset = RECORD_HEADER_SIZE;
|
|
586
|
-
this.contentDataOffset = CONTENT_HEADER_SIZE;
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
async close() {
|
|
590
|
-
this.contentCache.clear();
|
|
591
|
-
this.vectorCache.clear();
|
|
592
|
-
this.vectorsBuffer = null;
|
|
593
|
-
this.recordsBuffer = null;
|
|
594
|
-
this.contentBuffer = null;
|
|
595
|
-
this.files = null;
|
|
596
|
-
if (this.vectorsHandle) {
|
|
597
|
-
try {
|
|
598
|
-
await this.vectorsHandle.close();
|
|
599
|
-
} catch {
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
}
|
|
655
|
-
vectorsBuffer = headerBuffer;
|
|
656
|
-
} else {
|
|
657
|
-
vectorsBuffer = await fs.readFile(vectorsPath);
|
|
658
|
-
}
|
|
659
|
-
|
|
482
|
+
|
|
483
|
+
function readHeader(buffer, magic, headerSize) {
|
|
484
|
+
if (buffer.length < headerSize) {
|
|
485
|
+
throw new BinaryStoreCorruptionError('Binary store header is truncated');
|
|
486
|
+
}
|
|
487
|
+
const actualMagic = readMagic(buffer);
|
|
488
|
+
if (actualMagic !== magic) {
|
|
489
|
+
throw new BinaryStoreCorruptionError(`Invalid binary store magic (${actualMagic})`);
|
|
490
|
+
}
|
|
491
|
+
const view = getDataView(buffer);
|
|
492
|
+
const version = view.getUint32(4, true);
|
|
493
|
+
if (version !== STORE_VERSION) {
|
|
494
|
+
throw new Error(`Unsupported binary store version (${version})`);
|
|
495
|
+
}
|
|
496
|
+
return view;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
function writeVectorsHeader(buffer, dim, count, writeId) {
|
|
500
|
+
writeMagic(buffer, MAGIC_VECTORS);
|
|
501
|
+
const view = getDataView(buffer);
|
|
502
|
+
view.setUint32(4, STORE_VERSION, true);
|
|
503
|
+
view.setUint32(8, dim, true);
|
|
504
|
+
view.setUint32(12, count, true);
|
|
505
|
+
view.setUint32(16, writeId, true);
|
|
506
|
+
view.setUint32(20, 0, true); // CRC32 placeholder — filled after payload write
|
|
507
|
+
// bytes 24-31: reserved
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
function writeRecordsHeader(buffer, count, fileCount, writeId) {
|
|
511
|
+
writeMagic(buffer, MAGIC_RECORDS);
|
|
512
|
+
const view = getDataView(buffer);
|
|
513
|
+
view.setUint32(4, STORE_VERSION, true);
|
|
514
|
+
view.setUint32(8, count, true);
|
|
515
|
+
view.setUint32(12, fileCount, true);
|
|
516
|
+
view.setUint32(16, writeId, true);
|
|
517
|
+
view.setUint32(20, 0, true); // CRC32 placeholder
|
|
518
|
+
// bytes 24-31: reserved
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
function writeContentHeader(buffer, totalBytes, writeId) {
|
|
522
|
+
writeMagic(buffer, MAGIC_CONTENT);
|
|
523
|
+
const view = getDataView(buffer);
|
|
524
|
+
view.setUint32(4, STORE_VERSION, true);
|
|
525
|
+
const value = BigInt(totalBytes);
|
|
526
|
+
view.setBigUint64(8, value, true);
|
|
527
|
+
view.setUint32(16, writeId, true);
|
|
528
|
+
view.setUint32(20, 0, true); // CRC32 placeholder
|
|
529
|
+
// bytes 24-31: reserved
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
function readBigUint(view, offset) {
|
|
533
|
+
const value = view.getBigUint64(offset, true);
|
|
534
|
+
if (value > BigInt(Number.MAX_SAFE_INTEGER)) {
|
|
535
|
+
throw new Error('Binary store content offset exceeds safe integer range');
|
|
536
|
+
}
|
|
537
|
+
return Number(value);
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
function normalizeContent(value) {
|
|
541
|
+
if (value === null || value === undefined) return '';
|
|
542
|
+
if (typeof value !== 'string') return String(value);
|
|
543
|
+
return value;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
export class BinaryVectorStore {
|
|
547
|
+
constructor({
|
|
548
|
+
vectorsBuffer,
|
|
549
|
+
recordsBuffer,
|
|
550
|
+
vectorsHandle,
|
|
551
|
+
vectorsFd,
|
|
552
|
+
contentHandle,
|
|
553
|
+
contentBuffer,
|
|
554
|
+
contentSize,
|
|
555
|
+
files,
|
|
556
|
+
dim,
|
|
557
|
+
count,
|
|
558
|
+
contentCacheEntries,
|
|
559
|
+
vectorCacheEntries,
|
|
560
|
+
}) {
|
|
561
|
+
this.vectorsBuffer = vectorsBuffer;
|
|
562
|
+
this.recordsBuffer = recordsBuffer;
|
|
563
|
+
this.vectorsHandle = vectorsHandle ?? null;
|
|
564
|
+
this.vectorsFd = Number.isInteger(vectorsFd) ? vectorsFd : null;
|
|
565
|
+
this.contentHandle = contentHandle ?? null;
|
|
566
|
+
this.contentBuffer = contentBuffer ?? null;
|
|
567
|
+
this.contentSize = Number.isFinite(contentSize)
|
|
568
|
+
? contentSize
|
|
569
|
+
: contentBuffer
|
|
570
|
+
? Math.max(0, contentBuffer.length - CONTENT_HEADER_SIZE)
|
|
571
|
+
: 0;
|
|
572
|
+
this.files = files;
|
|
573
|
+
this.dim = dim;
|
|
574
|
+
this.count = count;
|
|
575
|
+
this.contentCacheEntries = Number.isInteger(contentCacheEntries) ? contentCacheEntries : 256;
|
|
576
|
+
this.contentCache = new Map();
|
|
577
|
+
this.vectorCacheEntries = Number.isInteger(vectorCacheEntries) ? vectorCacheEntries : 0;
|
|
578
|
+
this.vectorCache = new Map();
|
|
579
|
+
|
|
580
|
+
this.vectorDataOffset = VECTOR_HEADER_SIZE;
|
|
581
|
+
this.recordDataOffset = RECORD_HEADER_SIZE;
|
|
582
|
+
this.contentDataOffset = CONTENT_HEADER_SIZE;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
async close() {
|
|
586
|
+
this.contentCache.clear();
|
|
587
|
+
this.vectorCache.clear();
|
|
588
|
+
this.vectorsBuffer = null;
|
|
589
|
+
this.recordsBuffer = null;
|
|
590
|
+
this.contentBuffer = null;
|
|
591
|
+
this.files = null;
|
|
592
|
+
if (this.vectorsHandle) {
|
|
593
|
+
try {
|
|
594
|
+
await this.vectorsHandle.close();
|
|
595
|
+
} catch {}
|
|
596
|
+
}
|
|
597
|
+
this.vectorsHandle = null;
|
|
598
|
+
if (Number.isInteger(this.vectorsFd)) {
|
|
599
|
+
try {
|
|
600
|
+
fsSync.closeSync(this.vectorsFd);
|
|
601
|
+
} catch {}
|
|
602
|
+
}
|
|
603
|
+
this.vectorsFd = null;
|
|
604
|
+
if (this.contentHandle) {
|
|
605
|
+
try {
|
|
606
|
+
await this.contentHandle.close();
|
|
607
|
+
} catch {}
|
|
608
|
+
}
|
|
609
|
+
this.contentHandle = null;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
static getPaths(cacheDir) {
|
|
613
|
+
return {
|
|
614
|
+
vectorsPath: path.join(cacheDir, VECTORS_FILE),
|
|
615
|
+
recordsPath: path.join(cacheDir, RECORDS_FILE),
|
|
616
|
+
contentPath: path.join(cacheDir, CONTENT_FILE),
|
|
617
|
+
filesPath: path.join(cacheDir, FILES_FILE),
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
static async load(cacheDir, { contentCacheEntries, vectorCacheEntries, vectorLoadMode } = {}) {
|
|
622
|
+
ensureLittleEndian();
|
|
623
|
+
const { vectorsPath, recordsPath, contentPath, filesPath } =
|
|
624
|
+
BinaryVectorStore.getPaths(cacheDir);
|
|
625
|
+
|
|
626
|
+
let contentReadHandle = null;
|
|
627
|
+
let vectorsFd = null;
|
|
628
|
+
|
|
629
|
+
try {
|
|
630
|
+
const loadVectorsFromDisk = String(vectorLoadMode).toLowerCase() === 'disk';
|
|
631
|
+
let vectorsBuffer = null;
|
|
632
|
+
|
|
633
|
+
const [recordsBuffer, filesRaw] = await Promise.all([
|
|
634
|
+
fs.readFile(recordsPath),
|
|
635
|
+
fs.readFile(filesPath, 'utf-8'),
|
|
636
|
+
]);
|
|
637
|
+
|
|
638
|
+
if (loadVectorsFromDisk) {
|
|
639
|
+
vectorsFd = fsSync.openSync(vectorsPath, 'r');
|
|
640
|
+
const headerBuffer = Buffer.alloc(VECTOR_HEADER_SIZE);
|
|
641
|
+
const bytesRead = fsSync.readSync(vectorsFd, headerBuffer, 0, VECTOR_HEADER_SIZE, 0);
|
|
642
|
+
if (bytesRead < VECTOR_HEADER_SIZE) {
|
|
643
|
+
throw new Error('Binary store vectors header is truncated');
|
|
644
|
+
}
|
|
645
|
+
vectorsBuffer = headerBuffer;
|
|
646
|
+
} else {
|
|
647
|
+
vectorsBuffer = await fs.readFile(vectorsPath);
|
|
648
|
+
}
|
|
649
|
+
|
|
660
650
|
const vectorsView = readHeader(vectorsBuffer, MAGIC_VECTORS, VECTOR_HEADER_SIZE);
|
|
661
651
|
const dim = vectorsView.getUint32(8, true);
|
|
662
652
|
const count = vectorsView.getUint32(12, true);
|
|
@@ -665,47 +655,49 @@ export class BinaryVectorStore {
|
|
|
665
655
|
const vectorsPayloadBytes = count * dim * 4;
|
|
666
656
|
|
|
667
657
|
const recordsView = readHeader(recordsBuffer, MAGIC_RECORDS, RECORD_HEADER_SIZE);
|
|
668
|
-
const recordCount = recordsView.getUint32(8, true);
|
|
669
|
-
const fileCount = recordsView.getUint32(12, true);
|
|
670
|
-
const recordsWriteId = recordsView.getUint32(16, true);
|
|
671
|
-
const recordsExpectedCrc = recordsView.getUint32(20, true);
|
|
672
|
-
|
|
673
|
-
if (recordCount !== count) {
|
|
674
|
-
throw new BinaryStoreCorruptionError(
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
const
|
|
695
|
-
|
|
696
|
-
const
|
|
697
|
-
const
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
658
|
+
const recordCount = recordsView.getUint32(8, true);
|
|
659
|
+
const fileCount = recordsView.getUint32(12, true);
|
|
660
|
+
const recordsWriteId = recordsView.getUint32(16, true);
|
|
661
|
+
const recordsExpectedCrc = recordsView.getUint32(20, true);
|
|
662
|
+
|
|
663
|
+
if (recordCount !== count) {
|
|
664
|
+
throw new BinaryStoreCorruptionError(
|
|
665
|
+
`Binary store count mismatch (${recordCount} != ${count})`
|
|
666
|
+
);
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
// Validate writeId consistency between vectors and records
|
|
670
|
+
if (vectorsWriteId !== recordsWriteId) {
|
|
671
|
+
throw new BinaryStoreCorruptionError(
|
|
672
|
+
`Binary store writeId mismatch: vectors=${vectorsWriteId}, records=${recordsWriteId}`
|
|
673
|
+
);
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
contentReadHandle = await fs.open(contentPath, 'r');
|
|
677
|
+
let totalContentBytes = 0;
|
|
678
|
+
|
|
679
|
+
const headerBuffer = Buffer.alloc(CONTENT_HEADER_SIZE);
|
|
680
|
+
const { bytesRead } = await contentReadHandle.read(headerBuffer, 0, CONTENT_HEADER_SIZE, 0);
|
|
681
|
+
if (bytesRead < CONTENT_HEADER_SIZE) {
|
|
682
|
+
throw new BinaryStoreCorruptionError('Binary store content header is truncated');
|
|
683
|
+
}
|
|
684
|
+
const contentView = readHeader(headerBuffer, MAGIC_CONTENT, CONTENT_HEADER_SIZE);
|
|
685
|
+
totalContentBytes = readBigUint(contentView, 8);
|
|
686
|
+
const contentWriteId = contentView.getUint32(16, true);
|
|
687
|
+
const contentExpectedCrc = contentView.getUint32(20, true);
|
|
688
|
+
const stats = await contentReadHandle.stat();
|
|
689
|
+
const expectedContentSize = CONTENT_HEADER_SIZE + totalContentBytes;
|
|
690
|
+
if (stats.size < expectedContentSize) {
|
|
691
|
+
throw new BinaryStoreCorruptionError('Binary store content file truncated');
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
// Validate writeId consistency across all three files
|
|
695
|
+
if (vectorsWriteId !== contentWriteId) {
|
|
696
|
+
throw new BinaryStoreCorruptionError(
|
|
697
|
+
`Binary store writeId mismatch: vectors=${vectorsWriteId}, content=${contentWriteId}`
|
|
698
|
+
);
|
|
699
|
+
}
|
|
700
|
+
|
|
709
701
|
// Validate CRC32 for records payload
|
|
710
702
|
const recordsPayload = recordsBuffer.subarray(RECORD_HEADER_SIZE);
|
|
711
703
|
const recordsActualCrc = computeCrc32(recordsPayload);
|
|
@@ -766,380 +758,370 @@ export class BinaryVectorStore {
|
|
|
766
758
|
`Binary store content CRC32 mismatch (expected ${contentExpectedCrc}, got 0)`
|
|
767
759
|
);
|
|
768
760
|
}
|
|
769
|
-
|
|
761
|
+
|
|
770
762
|
const filesData = JSON.parse(filesRaw);
|
|
771
|
-
// Support new format { writeId, files } and legacy raw array
|
|
772
|
-
let files;
|
|
773
|
-
let filesWriteId = null;
|
|
774
|
-
if (filesData && !Array.isArray(filesData) && Array.isArray(filesData.files)) {
|
|
775
|
-
files = filesData.files;
|
|
776
|
-
filesWriteId = filesData.writeId ?? null;
|
|
777
|
-
} else if (Array.isArray(filesData)) {
|
|
778
|
-
files = filesData;
|
|
779
|
-
} else {
|
|
780
|
-
throw new BinaryStoreCorruptionError('Binary store file table is invalid');
|
|
781
|
-
}
|
|
782
|
-
|
|
783
|
-
if (files.length !== fileCount) {
|
|
784
|
-
throw new BinaryStoreCorruptionError(
|
|
785
|
-
`Binary store file table count mismatch (${files.length} != ${fileCount})`
|
|
786
|
-
);
|
|
787
|
-
}
|
|
788
|
-
|
|
789
|
-
// Validate writeId from files.json if present
|
|
790
|
-
if (filesWriteId !== null && filesWriteId !== vectorsWriteId) {
|
|
791
|
-
throw new BinaryStoreCorruptionError(
|
|
792
|
-
`Binary store writeId mismatch: vectors=${vectorsWriteId}, files.json=${filesWriteId}`
|
|
793
|
-
);
|
|
794
|
-
}
|
|
795
|
-
|
|
796
|
-
return new BinaryVectorStore({
|
|
797
|
-
vectorsBuffer,
|
|
798
|
-
recordsBuffer,
|
|
799
|
-
vectorsHandle: null,
|
|
800
|
-
vectorsFd,
|
|
801
|
-
contentHandle: contentReadHandle,
|
|
802
|
-
contentSize: totalContentBytes,
|
|
803
|
-
files,
|
|
804
|
-
dim,
|
|
805
|
-
count,
|
|
806
|
-
contentCacheEntries,
|
|
807
|
-
vectorCacheEntries,
|
|
808
|
-
});
|
|
809
|
-
} catch (err) {
|
|
810
|
-
if (contentReadHandle) await contentReadHandle.close().catch(() => {});
|
|
811
|
-
if (Number.isInteger(vectorsFd)) {
|
|
812
|
-
try {
|
|
813
|
-
fsSync.closeSync(vectorsFd);
|
|
814
|
-
} catch {
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
const
|
|
830
|
-
|
|
831
|
-
const
|
|
832
|
-
const
|
|
833
|
-
const
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
this.
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
);
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
return
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
if (this.
|
|
909
|
-
const start = this.contentDataOffset + record.contentOffset;
|
|
910
|
-
const
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
const {
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
const
|
|
990
|
-
|
|
991
|
-
const
|
|
992
|
-
const
|
|
993
|
-
const
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
(vectorSource
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
let
|
|
1100
|
-
let
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
view.setUint32(
|
|
1114
|
-
view.setUint32(
|
|
1115
|
-
|
|
1116
|
-
view.setBigUint64(12, BigInt(entry.contentOffset), true);
|
|
1117
|
-
view.setUint32(20, entry.contentLength, true);
|
|
1118
|
-
view.setUint32(24, 0, true);
|
|
1119
|
-
view.setUint32(28, 0, true);
|
|
1120
|
-
|
|
763
|
+
// Support new format { writeId, files } and legacy raw array
|
|
764
|
+
let files;
|
|
765
|
+
let filesWriteId = null;
|
|
766
|
+
if (filesData && !Array.isArray(filesData) && Array.isArray(filesData.files)) {
|
|
767
|
+
files = filesData.files;
|
|
768
|
+
filesWriteId = filesData.writeId ?? null;
|
|
769
|
+
} else if (Array.isArray(filesData)) {
|
|
770
|
+
files = filesData;
|
|
771
|
+
} else {
|
|
772
|
+
throw new BinaryStoreCorruptionError('Binary store file table is invalid');
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
if (files.length !== fileCount) {
|
|
776
|
+
throw new BinaryStoreCorruptionError(
|
|
777
|
+
`Binary store file table count mismatch (${files.length} != ${fileCount})`
|
|
778
|
+
);
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// Validate writeId from files.json if present
|
|
782
|
+
if (filesWriteId !== null && filesWriteId !== vectorsWriteId) {
|
|
783
|
+
throw new BinaryStoreCorruptionError(
|
|
784
|
+
`Binary store writeId mismatch: vectors=${vectorsWriteId}, files.json=${filesWriteId}`
|
|
785
|
+
);
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
return new BinaryVectorStore({
|
|
789
|
+
vectorsBuffer,
|
|
790
|
+
recordsBuffer,
|
|
791
|
+
vectorsHandle: null,
|
|
792
|
+
vectorsFd,
|
|
793
|
+
contentHandle: contentReadHandle,
|
|
794
|
+
contentSize: totalContentBytes,
|
|
795
|
+
files,
|
|
796
|
+
dim,
|
|
797
|
+
count,
|
|
798
|
+
contentCacheEntries,
|
|
799
|
+
vectorCacheEntries,
|
|
800
|
+
});
|
|
801
|
+
} catch (err) {
|
|
802
|
+
if (contentReadHandle) await contentReadHandle.close().catch(() => {});
|
|
803
|
+
if (Number.isInteger(vectorsFd)) {
|
|
804
|
+
try {
|
|
805
|
+
fsSync.closeSync(vectorsFd);
|
|
806
|
+
} catch {}
|
|
807
|
+
}
|
|
808
|
+
throw err;
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
get length() {
|
|
813
|
+
return this.count;
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
getRecord(index) {
|
|
817
|
+
if (index < 0 || index >= this.count) return null;
|
|
818
|
+
const offset = this.recordDataOffset + index * RECORD_SIZE;
|
|
819
|
+
const view = getDataView(this.recordsBuffer);
|
|
820
|
+
|
|
821
|
+
const fileId = view.getUint32(offset, true);
|
|
822
|
+
const startLine = view.getUint32(offset + 4, true);
|
|
823
|
+
const endLine = view.getUint32(offset + 8, true);
|
|
824
|
+
const contentOffset = readBigUint(view, offset + 12);
|
|
825
|
+
const contentLength = view.getUint32(offset + 20, true);
|
|
826
|
+
|
|
827
|
+
return {
|
|
828
|
+
fileId,
|
|
829
|
+
file: this.files[fileId],
|
|
830
|
+
startLine,
|
|
831
|
+
endLine,
|
|
832
|
+
contentOffset,
|
|
833
|
+
contentLength,
|
|
834
|
+
};
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
getVector(index) {
|
|
838
|
+
if (index < 0 || index >= this.count) return null;
|
|
839
|
+
if (this.vectorCacheEntries > 0) {
|
|
840
|
+
const cached = this.vectorCache.get(index);
|
|
841
|
+
if (cached) {
|
|
842
|
+
this.vectorCache.delete(index);
|
|
843
|
+
this.vectorCache.set(index, cached);
|
|
844
|
+
return cached;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
const offset = this.vectorDataOffset + index * this.dim * 4;
|
|
849
|
+
const byteLength = this.dim * 4;
|
|
850
|
+
let vector = null;
|
|
851
|
+
|
|
852
|
+
if (this.vectorsBuffer && this.vectorsBuffer.length >= this.vectorDataOffset + byteLength) {
|
|
853
|
+
vector = new Float32Array(
|
|
854
|
+
this.vectorsBuffer.buffer,
|
|
855
|
+
this.vectorsBuffer.byteOffset + offset,
|
|
856
|
+
this.dim
|
|
857
|
+
);
|
|
858
|
+
} else if (Number.isInteger(this.vectorsFd)) {
|
|
859
|
+
const buffer = Buffer.alloc(byteLength);
|
|
860
|
+
const bytesRead = fsSync.readSync(this.vectorsFd, buffer, 0, byteLength, offset);
|
|
861
|
+
if (bytesRead === byteLength) {
|
|
862
|
+
vector = new Float32Array(buffer.buffer, buffer.byteOffset, this.dim);
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
|
|
866
|
+
if (vector && this.vectorCacheEntries > 0) {
|
|
867
|
+
this.vectorCache.set(index, vector);
|
|
868
|
+
if (this.vectorCache.size > this.vectorCacheEntries) {
|
|
869
|
+
const firstKey = this.vectorCache.keys().next().value;
|
|
870
|
+
this.vectorCache.delete(firstKey);
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
return vector;
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
async getContent(index) {
|
|
878
|
+
if (index < 0 || index >= this.count) return null;
|
|
879
|
+
if (this.contentCacheEntries > 0) {
|
|
880
|
+
const cached = this.contentCache.get(index);
|
|
881
|
+
if (cached !== undefined) {
|
|
882
|
+
this.contentCache.delete(index);
|
|
883
|
+
this.contentCache.set(index, cached);
|
|
884
|
+
return cached;
|
|
885
|
+
}
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
const record = this.getRecord(index);
|
|
889
|
+
if (!record || record.contentLength === 0) return '';
|
|
890
|
+
const contentLimit = record.contentOffset + record.contentLength;
|
|
891
|
+
if (Number.isFinite(this.contentSize) && contentLimit > this.contentSize) {
|
|
892
|
+
return '';
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
let content = '';
|
|
896
|
+
if (this.contentBuffer) {
|
|
897
|
+
const start = this.contentDataOffset + record.contentOffset;
|
|
898
|
+
const end = start + record.contentLength;
|
|
899
|
+
content = this.contentBuffer.slice(start, end).toString('utf-8');
|
|
900
|
+
} else if (this.contentHandle) {
|
|
901
|
+
const start = this.contentDataOffset + record.contentOffset;
|
|
902
|
+
const length = record.contentLength;
|
|
903
|
+
const buffer = Buffer.alloc(length);
|
|
904
|
+
const { bytesRead } = await this.contentHandle.read(buffer, 0, length, start);
|
|
905
|
+
content = buffer.slice(0, bytesRead).toString('utf-8');
|
|
906
|
+
} else {
|
|
907
|
+
return '';
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
if (this.contentCacheEntries > 0) {
|
|
911
|
+
this.contentCache.set(index, content);
|
|
912
|
+
if (this.contentCache.size > this.contentCacheEntries) {
|
|
913
|
+
const firstKey = this.contentCache.keys().next().value;
|
|
914
|
+
this.contentCache.delete(firstKey);
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
return content;
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
async toChunkViews({ includeContent = false, includeVector = true } = {}) {
|
|
922
|
+
const chunks = new Array(this.count);
|
|
923
|
+
for (let i = 0; i < this.count; i += 1) {
|
|
924
|
+
const record = this.getRecord(i);
|
|
925
|
+
if (!record) continue;
|
|
926
|
+
const chunk = {
|
|
927
|
+
file: record.file,
|
|
928
|
+
startLine: record.startLine,
|
|
929
|
+
endLine: record.endLine,
|
|
930
|
+
_index: i,
|
|
931
|
+
_binaryIndex: i,
|
|
932
|
+
};
|
|
933
|
+
if (includeVector) {
|
|
934
|
+
chunk.vector = this.getVector(i);
|
|
935
|
+
}
|
|
936
|
+
if (includeContent) {
|
|
937
|
+
chunk.content = await this.getContent(i);
|
|
938
|
+
}
|
|
939
|
+
chunks[i] = chunk;
|
|
940
|
+
}
|
|
941
|
+
return chunks;
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
getAllFileIndices() {
|
|
945
|
+
const map = new Map();
|
|
946
|
+
for (let i = 0; i < this.count; i++) {
|
|
947
|
+
const record = this.getRecord(i);
|
|
948
|
+
if (record) {
|
|
949
|
+
let list = map.get(record.file);
|
|
950
|
+
if (!list) {
|
|
951
|
+
list = [];
|
|
952
|
+
map.set(record.file, list);
|
|
953
|
+
}
|
|
954
|
+
list.push(i);
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
return map;
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
static async write(
|
|
961
|
+
cacheDir,
|
|
962
|
+
chunks,
|
|
963
|
+
{
|
|
964
|
+
contentCacheEntries,
|
|
965
|
+
vectorCacheEntries,
|
|
966
|
+
vectorLoadMode,
|
|
967
|
+
getContent,
|
|
968
|
+
getVector,
|
|
969
|
+
preRename,
|
|
970
|
+
renameOptions,
|
|
971
|
+
} = {}
|
|
972
|
+
) {
|
|
973
|
+
ensureLittleEndian();
|
|
974
|
+
const { vectorsPath, recordsPath, contentPath, filesPath } =
|
|
975
|
+
BinaryVectorStore.getPaths(cacheDir);
|
|
976
|
+
|
|
977
|
+
const tmpSuffix = `.tmp-${process.pid}`;
|
|
978
|
+
const vectorsTmp = `${vectorsPath}${tmpSuffix}`;
|
|
979
|
+
const recordsTmp = `${recordsPath}${tmpSuffix}`;
|
|
980
|
+
const contentTmp = `${contentPath}${tmpSuffix}`;
|
|
981
|
+
const filesTmp = `${filesPath}${tmpSuffix}`;
|
|
982
|
+
|
|
983
|
+
const fileIds = new Map();
|
|
984
|
+
const files = [];
|
|
985
|
+
const denseChunks = [];
|
|
986
|
+
const denseSourceIndices = [];
|
|
987
|
+
for (let i = 0; i < chunks.length; i += 1) {
|
|
988
|
+
const chunk = chunks[i];
|
|
989
|
+
if (!chunk) continue;
|
|
990
|
+
denseChunks.push(chunk);
|
|
991
|
+
denseSourceIndices.push(i);
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
const resolveVector = async (chunk, sourceIndex) => {
|
|
995
|
+
let vectorSource = chunk.vector;
|
|
996
|
+
if (
|
|
997
|
+
(vectorSource === undefined || vectorSource === null) &&
|
|
998
|
+
typeof getVector === 'function'
|
|
999
|
+
) {
|
|
1000
|
+
vectorSource = getVector(chunk, sourceIndex);
|
|
1001
|
+
if (vectorSource && typeof vectorSource.then === 'function') {
|
|
1002
|
+
vectorSource = await vectorSource;
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
if (vectorSource === undefined || vectorSource === null) {
|
|
1006
|
+
throw new Error(`Missing vector data for binary cache write at index ${sourceIndex}`);
|
|
1007
|
+
}
|
|
1008
|
+
const vector =
|
|
1009
|
+
vectorSource instanceof Float32Array
|
|
1010
|
+
? vectorSource
|
|
1011
|
+
: ArrayBuffer.isView(vectorSource)
|
|
1012
|
+
? Float32Array.from(vectorSource)
|
|
1013
|
+
: new Float32Array(vectorSource);
|
|
1014
|
+
if (!vector || vector.length === 0) {
|
|
1015
|
+
throw new Error(`Empty vector data for binary cache write at index ${sourceIndex}`);
|
|
1016
|
+
}
|
|
1017
|
+
return vector;
|
|
1018
|
+
};
|
|
1019
|
+
|
|
1020
|
+
const resolveContent = async (chunk, sourceIndex) => {
|
|
1021
|
+
const contentSource =
|
|
1022
|
+
chunk.content !== undefined && chunk.content !== null
|
|
1023
|
+
? chunk.content
|
|
1024
|
+
: getContent
|
|
1025
|
+
? await getContent(chunk, sourceIndex)
|
|
1026
|
+
: '';
|
|
1027
|
+
return normalizeContent(contentSource);
|
|
1028
|
+
};
|
|
1029
|
+
|
|
1030
|
+
const recordEntries = new Array(denseChunks.length);
|
|
1031
|
+
let contentOffset = 0;
|
|
1032
|
+
|
|
1033
|
+
for (let i = 0; i < denseChunks.length; i += 1) {
|
|
1034
|
+
const chunk = denseChunks[i];
|
|
1035
|
+
const sourceIndex = denseSourceIndices[i];
|
|
1036
|
+
|
|
1037
|
+
const file = chunk.file;
|
|
1038
|
+
if (!fileIds.has(file)) {
|
|
1039
|
+
fileIds.set(file, files.length);
|
|
1040
|
+
files.push(file);
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
const contentValue = await resolveContent(chunk, sourceIndex);
|
|
1044
|
+
const contentLength = Buffer.byteLength(contentValue, 'utf-8');
|
|
1045
|
+
|
|
1046
|
+
recordEntries[i] = {
|
|
1047
|
+
fileId: fileIds.get(file),
|
|
1048
|
+
startLine: chunk.startLine ?? 0,
|
|
1049
|
+
endLine: chunk.endLine ?? 0,
|
|
1050
|
+
contentOffset,
|
|
1051
|
+
contentLength,
|
|
1052
|
+
};
|
|
1053
|
+
|
|
1054
|
+
contentOffset += contentLength;
|
|
1055
|
+
}
|
|
1056
|
+
|
|
1057
|
+
const count = denseChunks.length;
|
|
1058
|
+
const dim = count > 0 ? (await resolveVector(denseChunks[0], denseSourceIndices[0])).length : 0;
|
|
1059
|
+
|
|
1060
|
+
const writeId = generateWriteId();
|
|
1061
|
+
|
|
1062
|
+
await fs.writeFile(filesTmp, JSON.stringify({ writeId, files }));
|
|
1063
|
+
|
|
1064
|
+
let vectorsHandle = null;
|
|
1065
|
+
let recordsHandle = null;
|
|
1066
|
+
let contentHandle = null;
|
|
1067
|
+
|
|
1068
|
+
try {
|
|
1069
|
+
vectorsHandle = await fs.open(vectorsTmp, 'w');
|
|
1070
|
+
recordsHandle = await fs.open(recordsTmp, 'w');
|
|
1071
|
+
contentHandle = await fs.open(contentTmp, 'w');
|
|
1072
|
+
|
|
1073
|
+
const vectorsHeader = Buffer.alloc(VECTOR_HEADER_SIZE);
|
|
1074
|
+
writeVectorsHeader(vectorsHeader, dim, count, writeId);
|
|
1075
|
+
await vectorsHandle.write(vectorsHeader, 0, vectorsHeader.length, 0);
|
|
1076
|
+
|
|
1077
|
+
const recordsHeader = Buffer.alloc(RECORD_HEADER_SIZE);
|
|
1078
|
+
writeRecordsHeader(recordsHeader, count, files.length, writeId);
|
|
1079
|
+
await recordsHandle.write(recordsHeader, 0, recordsHeader.length, 0);
|
|
1080
|
+
|
|
1081
|
+
const contentHeader = Buffer.alloc(CONTENT_HEADER_SIZE);
|
|
1082
|
+
writeContentHeader(contentHeader, contentOffset, writeId);
|
|
1083
|
+
await contentHandle.write(contentHeader, 0, contentHeader.length, 0);
|
|
1084
|
+
|
|
1085
|
+
// Incremental CRC32 accumulators (zero-alloc — no read-back needed)
|
|
1086
|
+
let vectorsCrc = 0;
|
|
1087
|
+
let recordsCrc = 0;
|
|
1088
|
+
let contentCrc = 0;
|
|
1089
|
+
|
|
1090
|
+
let vectorPos = VECTOR_HEADER_SIZE;
|
|
1091
|
+
let recordPos = RECORD_HEADER_SIZE;
|
|
1092
|
+
let contentPos = CONTENT_HEADER_SIZE;
|
|
1093
|
+
|
|
1094
|
+
for (let i = 0; i < count; i += 1) {
|
|
1095
|
+
const entry = recordEntries[i];
|
|
1096
|
+
if (!entry) continue;
|
|
1097
|
+
|
|
1098
|
+
const recordBuffer = Buffer.alloc(RECORD_SIZE);
|
|
1099
|
+
const view = getDataView(recordBuffer);
|
|
1100
|
+
view.setUint32(0, entry.fileId, true);
|
|
1101
|
+
view.setUint32(4, entry.startLine, true);
|
|
1102
|
+
view.setUint32(8, entry.endLine, true);
|
|
1103
|
+
view.setBigUint64(12, BigInt(entry.contentOffset), true);
|
|
1104
|
+
view.setUint32(20, entry.contentLength, true);
|
|
1105
|
+
view.setUint32(24, 0, true);
|
|
1106
|
+
view.setUint32(28, 0, true);
|
|
1107
|
+
|
|
1121
1108
|
await recordsHandle.write(recordBuffer, 0, recordBuffer.length, recordPos);
|
|
1122
1109
|
recordPos += recordBuffer.length;
|
|
1123
1110
|
recordsCrc = updateCrc32(recordsCrc, recordBuffer);
|
|
1124
|
-
|
|
1125
|
-
const chunk = denseChunks[i];
|
|
1126
|
-
const sourceIndex = denseSourceIndices[i];
|
|
1127
|
-
const vector = await resolveVector(chunk, sourceIndex);
|
|
1128
|
-
if (vector.length !== dim) {
|
|
1129
|
-
throw new Error('Vector dimension mismatch in binary cache write');
|
|
1130
|
-
}
|
|
1131
|
-
const vectorBuffer = Buffer.from(
|
|
1132
|
-
vector.buffer,
|
|
1133
|
-
vector.byteOffset,
|
|
1134
|
-
vector.byteLength
|
|
1135
|
-
);
|
|
1111
|
+
|
|
1112
|
+
const chunk = denseChunks[i];
|
|
1113
|
+
const sourceIndex = denseSourceIndices[i];
|
|
1114
|
+
const vector = await resolveVector(chunk, sourceIndex);
|
|
1115
|
+
if (vector.length !== dim) {
|
|
1116
|
+
throw new Error('Vector dimension mismatch in binary cache write');
|
|
1117
|
+
}
|
|
1118
|
+
const vectorBuffer = Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
|
|
1136
1119
|
await vectorsHandle.write(vectorBuffer, 0, vectorBuffer.length, vectorPos);
|
|
1137
1120
|
vectorPos += vectorBuffer.length;
|
|
1138
1121
|
vectorsCrc = updateCrc32(vectorsCrc, vectorBuffer);
|
|
1139
|
-
|
|
1140
|
-
if (entry.contentLength > 0) {
|
|
1141
|
-
|
|
1142
|
-
const val = await resolveContent(chunk, sourceIndex);
|
|
1122
|
+
|
|
1123
|
+
if (entry.contentLength > 0) {
|
|
1124
|
+
const val = await resolveContent(chunk, sourceIndex);
|
|
1143
1125
|
const contentBuffer = Buffer.from(val, 'utf-8');
|
|
1144
1126
|
await contentHandle.write(contentBuffer, 0, contentBuffer.length, contentPos);
|
|
1145
1127
|
contentPos += contentBuffer.length;
|
|
@@ -1152,32 +1134,32 @@ export class BinaryVectorStore {
|
|
|
1152
1134
|
await writeHeaderCrc(recordsHandle, recordsCrc);
|
|
1153
1135
|
}
|
|
1154
1136
|
await writeHeaderCrc(contentHandle, contentCrc);
|
|
1155
|
-
} finally {
|
|
1156
|
-
const closes = [];
|
|
1157
|
-
if (vectorsHandle) closes.push(vectorsHandle.close().catch(() => {}));
|
|
1158
|
-
if (recordsHandle) closes.push(recordsHandle.close().catch(() => {}));
|
|
1159
|
-
if (contentHandle) closes.push(contentHandle.close().catch(() => {}));
|
|
1160
|
-
await Promise.all(closes);
|
|
1161
|
-
}
|
|
1162
|
-
|
|
1163
|
-
if (preRename) {
|
|
1164
|
-
await preRename();
|
|
1165
|
-
}
|
|
1166
|
-
|
|
1167
|
-
await replaceFilesAtomically(
|
|
1168
|
-
[
|
|
1169
|
-
{ source: vectorsTmp, target: vectorsPath },
|
|
1170
|
-
{ source: recordsTmp, target: recordsPath },
|
|
1171
|
-
{ source: contentTmp, target: contentPath },
|
|
1172
|
-
{ source: filesTmp, target: filesPath },
|
|
1173
|
-
],
|
|
1174
|
-
renameOptions
|
|
1175
|
-
);
|
|
1176
|
-
|
|
1177
|
-
return BinaryVectorStore.load(cacheDir, {
|
|
1178
|
-
contentCacheEntries,
|
|
1179
|
-
vectorCacheEntries,
|
|
1180
|
-
vectorLoadMode,
|
|
1181
|
-
});
|
|
1182
|
-
}
|
|
1183
|
-
}
|
|
1137
|
+
} finally {
|
|
1138
|
+
const closes = [];
|
|
1139
|
+
if (vectorsHandle) closes.push(vectorsHandle.close().catch(() => {}));
|
|
1140
|
+
if (recordsHandle) closes.push(recordsHandle.close().catch(() => {}));
|
|
1141
|
+
if (contentHandle) closes.push(contentHandle.close().catch(() => {}));
|
|
1142
|
+
await Promise.all(closes);
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
if (preRename) {
|
|
1146
|
+
await preRename();
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
await replaceFilesAtomically(
|
|
1150
|
+
[
|
|
1151
|
+
{ source: vectorsTmp, target: vectorsPath },
|
|
1152
|
+
{ source: recordsTmp, target: recordsPath },
|
|
1153
|
+
{ source: contentTmp, target: contentPath },
|
|
1154
|
+
{ source: filesTmp, target: filesPath },
|
|
1155
|
+
],
|
|
1156
|
+
renameOptions
|
|
1157
|
+
);
|
|
1158
|
+
|
|
1159
|
+
return BinaryVectorStore.load(cacheDir, {
|
|
1160
|
+
contentCacheEntries,
|
|
1161
|
+
vectorCacheEntries,
|
|
1162
|
+
vectorLoadMode,
|
|
1163
|
+
});
|
|
1164
|
+
}
|
|
1165
|
+
}
|