verso-db 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.md +13 -7
- package/dist/BinaryHeap.d.ts +11 -1
- package/dist/BinaryHeap.d.ts.map +1 -1
- package/dist/BinaryHeap.js +138 -0
- package/dist/BinaryHeap.js.map +1 -0
- package/dist/Collection.d.ts +30 -4
- package/dist/Collection.d.ts.map +1 -1
- package/dist/Collection.js +1186 -0
- package/dist/Collection.js.map +1 -0
- package/dist/HNSWIndex.d.ts +59 -0
- package/dist/HNSWIndex.d.ts.map +1 -1
- package/dist/HNSWIndex.js +2818 -0
- package/dist/HNSWIndex.js.map +1 -0
- package/dist/MaxBinaryHeap.d.ts +2 -64
- package/dist/MaxBinaryHeap.d.ts.map +1 -1
- package/dist/MaxBinaryHeap.js +5 -0
- package/dist/MaxBinaryHeap.js.map +1 -0
- package/dist/SearchWorker.d.ts +57 -4
- package/dist/SearchWorker.d.ts.map +1 -1
- package/dist/SearchWorker.js +573 -0
- package/dist/SearchWorker.js.map +1 -0
- package/dist/VectorDB.d.ts.map +1 -1
- package/dist/VectorDB.js +246 -0
- package/dist/VectorDB.js.map +1 -0
- package/dist/WorkerPool.d.ts +32 -2
- package/dist/WorkerPool.d.ts.map +1 -1
- package/dist/WorkerPool.js +266 -0
- package/dist/WorkerPool.js.map +1 -0
- package/dist/backends/JsDistanceBackend.d.ts.map +1 -1
- package/dist/backends/JsDistanceBackend.js +163 -0
- package/dist/backends/JsDistanceBackend.js.map +1 -0
- package/dist/encoding/DeltaEncoder.d.ts +2 -2
- package/dist/encoding/DeltaEncoder.d.ts.map +1 -1
- package/dist/encoding/DeltaEncoder.js +199 -0
- package/dist/encoding/DeltaEncoder.js.map +1 -0
- package/dist/errors.js +97 -0
- package/dist/errors.js.map +1 -0
- package/dist/index.d.ts +3 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +61 -42
- package/dist/index.js.map +1 -9
- package/dist/presets.js +205 -0
- package/dist/presets.js.map +1 -0
- package/dist/quantization/ScalarQuantizer.d.ts +0 -34
- package/dist/quantization/ScalarQuantizer.d.ts.map +1 -1
- package/dist/quantization/ScalarQuantizer.js +346 -0
- package/dist/quantization/ScalarQuantizer.js.map +1 -0
- package/dist/storage/BatchWriter.js +351 -0
- package/dist/storage/BatchWriter.js.map +1 -0
- package/dist/storage/BunStorageBackend.d.ts +7 -3
- package/dist/storage/BunStorageBackend.d.ts.map +1 -1
- package/dist/storage/BunStorageBackend.js +182 -0
- package/dist/storage/BunStorageBackend.js.map +1 -0
- package/dist/storage/MemoryBackend.js +109 -0
- package/dist/storage/MemoryBackend.js.map +1 -0
- package/dist/storage/OPFSBackend.d.ts.map +1 -1
- package/dist/storage/OPFSBackend.js +325 -0
- package/dist/storage/OPFSBackend.js.map +1 -0
- package/dist/storage/StorageBackend.js +12 -0
- package/dist/storage/StorageBackend.js.map +1 -0
- package/dist/storage/WriteAheadLog.js +321 -0
- package/dist/storage/WriteAheadLog.js.map +1 -0
- package/dist/storage/createStorageBackend.d.ts +4 -0
- package/dist/storage/createStorageBackend.d.ts.map +1 -1
- package/dist/storage/createStorageBackend.js +119 -0
- package/dist/storage/createStorageBackend.js.map +1 -0
- package/{src/storage/index.ts → dist/storage/index.js} +7 -27
- package/dist/storage/index.js.map +1 -0
- package/dist/storage/nodeFsRuntime.d.ts +14 -0
- package/dist/storage/nodeFsRuntime.d.ts.map +1 -0
- package/dist/storage/nodeFsRuntime.js +105 -0
- package/dist/storage/nodeFsRuntime.js.map +1 -0
- package/package.json +9 -7
- package/src/BinaryHeap.ts +0 -136
- package/src/Collection.ts +0 -1262
- package/src/HNSWIndex.ts +0 -2894
- package/src/MaxBinaryHeap.ts +0 -181
- package/src/SearchWorker.ts +0 -264
- package/src/VectorDB.ts +0 -319
- package/src/WorkerPool.ts +0 -222
- package/src/backends/JsDistanceBackend.ts +0 -171
- package/src/encoding/DeltaEncoder.ts +0 -236
- package/src/errors.ts +0 -110
- package/src/index.ts +0 -106
- package/src/presets.ts +0 -229
- package/src/quantization/ScalarQuantizer.ts +0 -487
- package/src/storage/BatchWriter.ts +0 -420
- package/src/storage/BunStorageBackend.ts +0 -199
- package/src/storage/MemoryBackend.ts +0 -122
- package/src/storage/OPFSBackend.ts +0 -348
- package/src/storage/StorageBackend.ts +0 -74
- package/src/storage/WriteAheadLog.ts +0 -379
- package/src/storage/createStorageBackend.ts +0 -137
|
@@ -1,420 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Batch Write Coalescing for Storage Backends
|
|
3
|
-
*
|
|
4
|
-
* Buffers writes in memory and flushes them in batches to reduce I/O operations.
|
|
5
|
-
* This is especially beneficial for:
|
|
6
|
-
* - OPFS where each write has overhead
|
|
7
|
-
* - IndexedDB where transactions have cost
|
|
8
|
-
* - Network storage where round-trips are expensive
|
|
9
|
-
*
|
|
10
|
-
* Features:
|
|
11
|
-
* - Configurable flush thresholds (count and size)
|
|
12
|
-
* - Automatic flushing when thresholds are reached
|
|
13
|
-
* - Explicit flush for durability guarantees
|
|
14
|
-
* - Coalesces multiple writes to the same key
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
import type { StorageBackend } from './StorageBackend';
|
|
18
|
-
|
|
19
|
-
export interface BatchWriterOptions {
|
|
20
|
-
/** Maximum number of pending writes before auto-flush (default: 100) */
|
|
21
|
-
maxPendingWrites?: number;
|
|
22
|
-
/** Maximum total size of pending data in bytes before auto-flush (default: 1MB) */
|
|
23
|
-
maxPendingBytes?: number;
|
|
24
|
-
/** Auto-flush interval in milliseconds (0 = disabled, default: 0) */
|
|
25
|
-
autoFlushInterval?: number;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
interface PendingWrite {
|
|
29
|
-
key: string;
|
|
30
|
-
data: Uint8Array;
|
|
31
|
-
append: boolean;
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
/**
|
|
35
|
-
* BatchWriter - Coalesces multiple writes into batched flushes
|
|
36
|
-
*/
|
|
37
|
-
export class BatchWriter {
|
|
38
|
-
private backend: StorageBackend;
|
|
39
|
-
private pendingWrites: Map<string, PendingWrite> = new Map();
|
|
40
|
-
private pendingAppends: Map<string, Uint8Array[]> = new Map();
|
|
41
|
-
private pendingBytes: number = 0;
|
|
42
|
-
private maxPendingWrites: number;
|
|
43
|
-
private maxPendingBytes: number;
|
|
44
|
-
private autoFlushInterval: number;
|
|
45
|
-
private flushTimer: ReturnType<typeof setTimeout> | null = null;
|
|
46
|
-
private isFlushing: boolean = false;
|
|
47
|
-
private flushPromise: Promise<void> | null = null;
|
|
48
|
-
|
|
49
|
-
constructor(backend: StorageBackend, options: BatchWriterOptions = {}) {
|
|
50
|
-
this.backend = backend;
|
|
51
|
-
this.maxPendingWrites = options.maxPendingWrites ?? 100;
|
|
52
|
-
this.maxPendingBytes = options.maxPendingBytes ?? 1024 * 1024; // 1MB
|
|
53
|
-
this.autoFlushInterval = options.autoFlushInterval ?? 0;
|
|
54
|
-
|
|
55
|
-
if (this.autoFlushInterval > 0) {
|
|
56
|
-
this.startAutoFlush();
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
/**
|
|
61
|
-
* Write data to a key (buffered)
|
|
62
|
-
* Multiple writes to the same key will coalesce to the last value
|
|
63
|
-
*/
|
|
64
|
-
async write(key: string, data: ArrayBuffer | Uint8Array): Promise<void> {
|
|
65
|
-
const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
|
|
66
|
-
const bytesCopy = new Uint8Array(bytes.length);
|
|
67
|
-
bytesCopy.set(bytes);
|
|
68
|
-
|
|
69
|
-
// If there's already a pending write, subtract its size
|
|
70
|
-
const existing = this.pendingWrites.get(key);
|
|
71
|
-
if (existing) {
|
|
72
|
-
this.pendingBytes -= existing.data.length;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// Clear any pending appends for this key (write overwrites)
|
|
76
|
-
const existingAppends = this.pendingAppends.get(key);
|
|
77
|
-
if (existingAppends) {
|
|
78
|
-
for (const append of existingAppends) {
|
|
79
|
-
this.pendingBytes -= append.length;
|
|
80
|
-
}
|
|
81
|
-
this.pendingAppends.delete(key);
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
this.pendingWrites.set(key, {
|
|
85
|
-
key,
|
|
86
|
-
data: bytesCopy,
|
|
87
|
-
append: false
|
|
88
|
-
});
|
|
89
|
-
this.pendingBytes += bytesCopy.length;
|
|
90
|
-
|
|
91
|
-
await this.checkThresholds();
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
/**
|
|
95
|
-
* Append data to a key (buffered)
|
|
96
|
-
* Multiple appends to the same key will be concatenated
|
|
97
|
-
*/
|
|
98
|
-
async append(key: string, data: ArrayBuffer | Uint8Array): Promise<void> {
|
|
99
|
-
const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
|
|
100
|
-
const bytesCopy = new Uint8Array(bytes.length);
|
|
101
|
-
bytesCopy.set(bytes);
|
|
102
|
-
|
|
103
|
-
// If there's a pending write, append to it instead
|
|
104
|
-
const existingWrite = this.pendingWrites.get(key);
|
|
105
|
-
if (existingWrite) {
|
|
106
|
-
// Concatenate with existing write
|
|
107
|
-
const newData = new Uint8Array(existingWrite.data.length + bytesCopy.length);
|
|
108
|
-
newData.set(existingWrite.data);
|
|
109
|
-
newData.set(bytesCopy, existingWrite.data.length);
|
|
110
|
-
this.pendingBytes -= existingWrite.data.length;
|
|
111
|
-
this.pendingBytes += newData.length;
|
|
112
|
-
existingWrite.data = newData;
|
|
113
|
-
} else {
|
|
114
|
-
// Add to pending appends
|
|
115
|
-
let appends = this.pendingAppends.get(key);
|
|
116
|
-
if (!appends) {
|
|
117
|
-
appends = [];
|
|
118
|
-
this.pendingAppends.set(key, appends);
|
|
119
|
-
}
|
|
120
|
-
appends.push(bytesCopy);
|
|
121
|
-
this.pendingBytes += bytesCopy.length;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
await this.checkThresholds();
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
/**
|
|
128
|
-
* Delete a key (buffered)
|
|
129
|
-
* Clears any pending writes/appends for this key
|
|
130
|
-
*/
|
|
131
|
-
async delete(key: string): Promise<void> {
|
|
132
|
-
// Clear pending operations
|
|
133
|
-
const existing = this.pendingWrites.get(key);
|
|
134
|
-
if (existing) {
|
|
135
|
-
this.pendingBytes -= existing.data.length;
|
|
136
|
-
this.pendingWrites.delete(key);
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
const existingAppends = this.pendingAppends.get(key);
|
|
140
|
-
if (existingAppends) {
|
|
141
|
-
for (const append of existingAppends) {
|
|
142
|
-
this.pendingBytes -= append.length;
|
|
143
|
-
}
|
|
144
|
-
this.pendingAppends.delete(key);
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
// Perform delete immediately (can't batch deletes effectively)
|
|
148
|
-
await this.backend.delete(key);
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
/**
|
|
152
|
-
* Read data from a key
|
|
153
|
-
* Returns pending data if available, otherwise reads from backend
|
|
154
|
-
*/
|
|
155
|
-
async read(key: string): Promise<ArrayBuffer | null> {
|
|
156
|
-
// Check for pending write
|
|
157
|
-
const pending = this.pendingWrites.get(key);
|
|
158
|
-
if (pending) {
|
|
159
|
-
const buffer = pending.data.buffer.slice(
|
|
160
|
-
pending.data.byteOffset,
|
|
161
|
-
pending.data.byteOffset + pending.data.length
|
|
162
|
-
);
|
|
163
|
-
return buffer as ArrayBuffer;
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
// Check for pending appends - need to combine with existing data.
|
|
167
|
-
// Re-validate pending state after backend read to avoid returning stale
|
|
168
|
-
// appends that may have already been flushed.
|
|
169
|
-
while (true) {
|
|
170
|
-
const appends = this.pendingAppends.get(key);
|
|
171
|
-
if (!appends || appends.length === 0) {
|
|
172
|
-
// No pending data, read from backend
|
|
173
|
-
return this.backend.read(key);
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
const appendCount = appends.length;
|
|
177
|
-
const existing = await this.backend.read(key);
|
|
178
|
-
|
|
179
|
-
// A write supersedes appends and should take precedence if it appeared
|
|
180
|
-
// while we were waiting on backend I/O.
|
|
181
|
-
const latestWrite = this.pendingWrites.get(key);
|
|
182
|
-
if (latestWrite) {
|
|
183
|
-
const buffer = latestWrite.data.buffer.slice(
|
|
184
|
-
latestWrite.data.byteOffset,
|
|
185
|
-
latestWrite.data.byteOffset + latestWrite.data.length
|
|
186
|
-
);
|
|
187
|
-
return buffer as ArrayBuffer;
|
|
188
|
-
}
|
|
189
|
-
|
|
190
|
-
const latestAppends = this.pendingAppends.get(key);
|
|
191
|
-
if (!latestAppends || latestAppends !== appends || latestAppends.length !== appendCount) {
|
|
192
|
-
// Pending append state changed (flushed/new appends/etc.), retry with
|
|
193
|
-
// a fresh snapshot to avoid duplicate or stale data.
|
|
194
|
-
continue;
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
const existingBytes = existing ? new Uint8Array(existing) : new Uint8Array(0);
|
|
198
|
-
|
|
199
|
-
// Calculate total size
|
|
200
|
-
let totalSize = existingBytes.length;
|
|
201
|
-
for (let i = 0; i < appendCount; i++) {
|
|
202
|
-
totalSize += latestAppends[i].length;
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
// Combine
|
|
206
|
-
const result = new Uint8Array(totalSize);
|
|
207
|
-
result.set(existingBytes);
|
|
208
|
-
let offset = existingBytes.length;
|
|
209
|
-
for (let i = 0; i < appendCount; i++) {
|
|
210
|
-
const append = latestAppends[i];
|
|
211
|
-
result.set(append, offset);
|
|
212
|
-
offset += append.length;
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
return result.buffer;
|
|
216
|
-
}
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
/**
|
|
220
|
-
* Check if thresholds are exceeded and flush if needed
|
|
221
|
-
*/
|
|
222
|
-
private async checkThresholds(): Promise<void> {
|
|
223
|
-
const totalWrites = this.pendingWrites.size + this.pendingAppends.size;
|
|
224
|
-
|
|
225
|
-
if (totalWrites >= this.maxPendingWrites || this.pendingBytes >= this.maxPendingBytes) {
|
|
226
|
-
await this.flush();
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
/**
|
|
231
|
-
* Flush all pending writes to the backend
|
|
232
|
-
*/
|
|
233
|
-
async flush(): Promise<void> {
|
|
234
|
-
// If already flushing, wait for it to complete then re-check
|
|
235
|
-
if (this.isFlushing && this.flushPromise) {
|
|
236
|
-
await this.flushPromise;
|
|
237
|
-
// Re-check: writes may have arrived during the previous flush
|
|
238
|
-
if (this.pendingWrites.size === 0 && this.pendingAppends.size === 0) {
|
|
239
|
-
return;
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
if (this.pendingWrites.size === 0 && this.pendingAppends.size === 0) {
|
|
244
|
-
return;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
this.isFlushing = true;
|
|
248
|
-
this.flushPromise = this.doFlush();
|
|
249
|
-
|
|
250
|
-
try {
|
|
251
|
-
await this.flushPromise;
|
|
252
|
-
} finally {
|
|
253
|
-
this.isFlushing = false;
|
|
254
|
-
this.flushPromise = null;
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
private async doFlush(): Promise<void> {
|
|
259
|
-
// Capture current pending writes and clear
|
|
260
|
-
const writes = Array.from(this.pendingWrites.entries());
|
|
261
|
-
const appendEntries = Array.from(this.pendingAppends.entries());
|
|
262
|
-
|
|
263
|
-
this.pendingWrites.clear();
|
|
264
|
-
this.pendingAppends.clear();
|
|
265
|
-
this.pendingBytes = 0;
|
|
266
|
-
|
|
267
|
-
// Execute writes and appends sequentially per-key to avoid partial-failure
|
|
268
|
-
// ambiguity with Promise.all (where we can't tell which appends succeeded,
|
|
269
|
-
// risking duplicate data on retry since append is not idempotent).
|
|
270
|
-
const failedWrites: Array<[string, PendingWrite]> = [];
|
|
271
|
-
const failedAppends: Array<[string, Uint8Array[]]> = [];
|
|
272
|
-
let firstError: unknown = null;
|
|
273
|
-
|
|
274
|
-
for (const [key, write] of writes) {
|
|
275
|
-
try {
|
|
276
|
-
await this.backend.write(write.key, write.data);
|
|
277
|
-
} catch (err) {
|
|
278
|
-
if (!firstError) firstError = err;
|
|
279
|
-
failedWrites.push([key, write]);
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
|
|
283
|
-
for (const [key, chunks] of appendEntries) {
|
|
284
|
-
// Concatenate all appends for this key
|
|
285
|
-
let totalSize = 0;
|
|
286
|
-
for (const chunk of chunks) {
|
|
287
|
-
totalSize += chunk.length;
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
const combined = new Uint8Array(totalSize);
|
|
291
|
-
let offset = 0;
|
|
292
|
-
for (const chunk of chunks) {
|
|
293
|
-
combined.set(chunk, offset);
|
|
294
|
-
offset += chunk.length;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
try {
|
|
298
|
-
await this.backend.append(key, combined);
|
|
299
|
-
} catch (err) {
|
|
300
|
-
if (!firstError) firstError = err;
|
|
301
|
-
failedAppends.push([key, chunks]);
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
if (firstError) {
|
|
306
|
-
// Only restore entries that actually failed — successful ones are persisted.
|
|
307
|
-
let restoredBytes = 0;
|
|
308
|
-
for (const [key, write] of failedWrites) {
|
|
309
|
-
if (!this.pendingWrites.has(key)) {
|
|
310
|
-
this.pendingWrites.set(key, write);
|
|
311
|
-
restoredBytes += write.data.length;
|
|
312
|
-
}
|
|
313
|
-
}
|
|
314
|
-
for (const [key, chunks] of failedAppends) {
|
|
315
|
-
// If a newer pending write exists for this key, it supersedes old appends.
|
|
316
|
-
// Restoring those stale appends would violate write ordering and corrupt data.
|
|
317
|
-
if (this.pendingWrites.has(key)) {
|
|
318
|
-
continue;
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
const existing = this.pendingAppends.get(key);
|
|
322
|
-
if (existing) {
|
|
323
|
-
// Prepend old chunks before any new ones
|
|
324
|
-
this.pendingAppends.set(key, [...chunks, ...existing]);
|
|
325
|
-
} else {
|
|
326
|
-
this.pendingAppends.set(key, chunks);
|
|
327
|
-
}
|
|
328
|
-
for (const chunk of chunks) {
|
|
329
|
-
restoredBytes += chunk.length;
|
|
330
|
-
}
|
|
331
|
-
}
|
|
332
|
-
this.pendingBytes += restoredBytes;
|
|
333
|
-
throw firstError;
|
|
334
|
-
}
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
/**
|
|
338
|
-
* Get statistics about pending writes
|
|
339
|
-
*/
|
|
340
|
-
getStats(): {
|
|
341
|
-
pendingWrites: number;
|
|
342
|
-
pendingAppends: number;
|
|
343
|
-
pendingBytes: number;
|
|
344
|
-
maxPendingWrites: number;
|
|
345
|
-
maxPendingBytes: number;
|
|
346
|
-
} {
|
|
347
|
-
return {
|
|
348
|
-
pendingWrites: this.pendingWrites.size,
|
|
349
|
-
pendingAppends: this.pendingAppends.size,
|
|
350
|
-
pendingBytes: this.pendingBytes,
|
|
351
|
-
maxPendingWrites: this.maxPendingWrites,
|
|
352
|
-
maxPendingBytes: this.maxPendingBytes
|
|
353
|
-
};
|
|
354
|
-
}
|
|
355
|
-
|
|
356
|
-
/**
|
|
357
|
-
* Check if there are pending writes
|
|
358
|
-
*/
|
|
359
|
-
hasPendingWrites(): boolean {
|
|
360
|
-
return this.pendingWrites.size > 0 || this.pendingAppends.size > 0;
|
|
361
|
-
}
|
|
362
|
-
|
|
363
|
-
/**
|
|
364
|
-
* Start auto-flush timer
|
|
365
|
-
*/
|
|
366
|
-
private startAutoFlush(): void {
|
|
367
|
-
if (this.flushTimer) return;
|
|
368
|
-
|
|
369
|
-
this.flushTimer = setInterval(async () => {
|
|
370
|
-
if (this.hasPendingWrites()) {
|
|
371
|
-
try {
|
|
372
|
-
await this.flush();
|
|
373
|
-
} catch {
|
|
374
|
-
// Preserve pending data and retry on next interval/explicit flush.
|
|
375
|
-
}
|
|
376
|
-
}
|
|
377
|
-
}, this.autoFlushInterval);
|
|
378
|
-
|
|
379
|
-
// Allow the process to exit even if the timer is still running.
|
|
380
|
-
// Without this, the timer keeps the event loop alive indefinitely.
|
|
381
|
-
if (typeof this.flushTimer === 'object' && 'unref' in this.flushTimer) {
|
|
382
|
-
(this.flushTimer as any).unref();
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
/**
|
|
387
|
-
* Stop auto-flush timer
|
|
388
|
-
*/
|
|
389
|
-
stopAutoFlush(): void {
|
|
390
|
-
if (this.flushTimer) {
|
|
391
|
-
clearInterval(this.flushTimer);
|
|
392
|
-
this.flushTimer = null;
|
|
393
|
-
}
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
/**
|
|
397
|
-
* Close the batch writer, flushing any pending writes
|
|
398
|
-
*/
|
|
399
|
-
async close(): Promise<void> {
|
|
400
|
-
this.stopAutoFlush();
|
|
401
|
-
await this.flush();
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
/**
|
|
405
|
-
* Get the underlying storage backend
|
|
406
|
-
*/
|
|
407
|
-
getBackend(): StorageBackend {
|
|
408
|
-
return this.backend;
|
|
409
|
-
}
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
/**
|
|
413
|
-
* Create a batch writer that wraps an existing storage backend
|
|
414
|
-
*/
|
|
415
|
-
export function createBatchWriter(
|
|
416
|
-
backend: StorageBackend,
|
|
417
|
-
options?: BatchWriterOptions
|
|
418
|
-
): BatchWriter {
|
|
419
|
-
return new BatchWriter(backend, options);
|
|
420
|
-
}
|
|
@@ -1,199 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Bun Storage Backend
|
|
3
|
-
*
|
|
4
|
-
* High-performance file system storage using Bun's native APIs.
|
|
5
|
-
* Features:
|
|
6
|
-
* - Auto-initializing (no manual init() required)
|
|
7
|
-
* - Automatic memory mapping for large files
|
|
8
|
-
* - Efficient async I/O with O(1) append
|
|
9
|
-
* - Native TypedArray support
|
|
10
|
-
*/
|
|
11
|
-
|
|
12
|
-
import { mkdir, readdir, unlink, rm, appendFile, rename } from 'fs/promises';
|
|
13
|
-
import * as path from 'path';
|
|
14
|
-
import type { StorageBackend } from './StorageBackend';
|
|
15
|
-
|
|
16
|
-
export class BunStorageBackend implements StorageBackend {
|
|
17
|
-
readonly type = 'bun';
|
|
18
|
-
private basePath: string;
|
|
19
|
-
// Cache of directories we've already ensured exist - avoids redundant mkdir calls
|
|
20
|
-
// Profiling showed 5-10% write overhead from mkdir on every write
|
|
21
|
-
private dirCache: Set<string> = new Set();
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* Create a new Bun storage backend
|
|
25
|
-
* @param basePath Base directory for all storage operations
|
|
26
|
-
*/
|
|
27
|
-
constructor(basePath: string = './vectordb_data') {
|
|
28
|
-
this.basePath = basePath;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
/**
|
|
32
|
-
* Ensure directory exists, using cache to avoid redundant mkdir calls
|
|
33
|
-
*/
|
|
34
|
-
private async ensureDir(dir: string): Promise<void> {
|
|
35
|
-
if (this.dirCache.has(dir)) return;
|
|
36
|
-
try {
|
|
37
|
-
await mkdir(dir, { recursive: true });
|
|
38
|
-
this.dirCache.add(dir);
|
|
39
|
-
} catch (err: unknown) {
|
|
40
|
-
// EEXIST is fine - directory already exists
|
|
41
|
-
if (err instanceof Error && (err as NodeJS.ErrnoException).code === 'EEXIST') {
|
|
42
|
-
this.dirCache.add(dir);
|
|
43
|
-
return;
|
|
44
|
-
}
|
|
45
|
-
// Re-throw real errors (permissions, disk full, etc.) so callers get
|
|
46
|
-
// a clear failure instead of a confusing "file not found" on the
|
|
47
|
-
// subsequent write attempt.
|
|
48
|
-
throw err;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
/**
|
|
53
|
-
* Get the full path for a key.
|
|
54
|
-
* Validates that the resolved path stays within basePath to prevent path traversal.
|
|
55
|
-
*/
|
|
56
|
-
private getFullPath(key: string): string {
|
|
57
|
-
const fullPath = path.resolve(this.basePath, key);
|
|
58
|
-
const resolvedBase = path.resolve(this.basePath);
|
|
59
|
-
if (!fullPath.startsWith(resolvedBase + path.sep) && fullPath !== resolvedBase) {
|
|
60
|
-
throw new Error(`Path traversal detected: key '${key}' resolves outside storage directory`);
|
|
61
|
-
}
|
|
62
|
-
return fullPath;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Ensure base directory exists (optional - operations auto-initialize)
|
|
67
|
-
* @deprecated No longer required - write/append create directories automatically
|
|
68
|
-
*/
|
|
69
|
-
async init(): Promise<void> {
|
|
70
|
-
await mkdir(this.basePath, { recursive: true }).catch(() => {});
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
async read(key: string): Promise<ArrayBuffer | null> {
|
|
74
|
-
const fullPath = this.getFullPath(key);
|
|
75
|
-
const file = Bun.file(fullPath);
|
|
76
|
-
|
|
77
|
-
if (!(await file.exists())) {
|
|
78
|
-
return null;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
return file.arrayBuffer();
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
async write(key: string, data: ArrayBuffer | Uint8Array): Promise<void> {
|
|
85
|
-
const fullPath = this.getFullPath(key);
|
|
86
|
-
|
|
87
|
-
// Ensure parent directory exists (cached to avoid redundant syscalls)
|
|
88
|
-
await this.ensureDir(path.dirname(fullPath));
|
|
89
|
-
|
|
90
|
-
// Atomic write: write to temp file then rename.
|
|
91
|
-
// If the process crashes mid-write, the original file is untouched.
|
|
92
|
-
const tmpPath = fullPath + '.tmp';
|
|
93
|
-
await Bun.write(tmpPath, data);
|
|
94
|
-
await rename(tmpPath, fullPath);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
async append(key: string, data: ArrayBuffer | Uint8Array): Promise<void> {
|
|
98
|
-
const fullPath = this.getFullPath(key);
|
|
99
|
-
|
|
100
|
-
// Ensure parent directory exists (cached to avoid redundant syscalls)
|
|
101
|
-
await this.ensureDir(path.dirname(fullPath));
|
|
102
|
-
|
|
103
|
-
// Use true O(1) append instead of O(n) read-modify-write
|
|
104
|
-
const appendData = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
|
|
105
|
-
await appendFile(fullPath, appendData);
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
async delete(key: string): Promise<void> {
|
|
109
|
-
const fullPath = this.getFullPath(key);
|
|
110
|
-
try {
|
|
111
|
-
await unlink(fullPath);
|
|
112
|
-
} catch (err: any) {
|
|
113
|
-
// File not existing is fine — all other errors should propagate
|
|
114
|
-
if (err?.code !== 'ENOENT') {
|
|
115
|
-
throw err;
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
async exists(key: string): Promise<boolean> {
|
|
121
|
-
const fullPath = this.getFullPath(key);
|
|
122
|
-
const file = Bun.file(fullPath);
|
|
123
|
-
return file.exists();
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
async list(prefix?: string): Promise<string[]> {
|
|
127
|
-
const searchPath = prefix ? this.getFullPath(prefix) : this.basePath;
|
|
128
|
-
|
|
129
|
-
try {
|
|
130
|
-
const entries = await readdir(searchPath, { recursive: true, withFileTypes: true });
|
|
131
|
-
const results: string[] = [];
|
|
132
|
-
for (const entry of entries) {
|
|
133
|
-
if (entry.isFile()) {
|
|
134
|
-
// parentPath is available on Dirent when using recursive readdir
|
|
135
|
-
const entryDir = (entry as any).parentPath ?? (entry as any).path ?? searchPath;
|
|
136
|
-
const fullPath = path.join(entryDir, entry.name);
|
|
137
|
-
results.push(path.relative(this.basePath, fullPath));
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
return results;
|
|
141
|
-
} catch {
|
|
142
|
-
return [];
|
|
143
|
-
}
|
|
144
|
-
}
|
|
145
|
-
|
|
146
|
-
async mkdir(dirPath: string): Promise<void> {
|
|
147
|
-
const fullPath = this.getFullPath(dirPath);
|
|
148
|
-
try {
|
|
149
|
-
await mkdir(fullPath, { recursive: true });
|
|
150
|
-
} catch (err: unknown) {
|
|
151
|
-
if (err instanceof Error && (err as NodeJS.ErrnoException).code === 'EEXIST') {
|
|
152
|
-
return;
|
|
153
|
-
}
|
|
154
|
-
throw err;
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
/**
|
|
159
|
-
* Delete all data in the storage directory
|
|
160
|
-
*/
|
|
161
|
-
async clear(): Promise<void> {
|
|
162
|
-
await rm(this.basePath, { recursive: true, force: true }).catch(() => {});
|
|
163
|
-
await mkdir(this.basePath, { recursive: true }).catch(() => {});
|
|
164
|
-
// Clear directory cache since directories were deleted
|
|
165
|
-
this.dirCache.clear();
|
|
166
|
-
this.dirCache.add(path.resolve(this.basePath));
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
/**
|
|
170
|
-
* Get file size without reading the entire file
|
|
171
|
-
*/
|
|
172
|
-
async size(key: string): Promise<number> {
|
|
173
|
-
const fullPath = this.getFullPath(key);
|
|
174
|
-
const file = Bun.file(fullPath);
|
|
175
|
-
|
|
176
|
-
if (!(await file.exists())) {
|
|
177
|
-
return 0;
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
return file.size;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
/**
|
|
184
|
-
* Read a file as a stream (for large files).
|
|
185
|
-
* Note: Callers should check exists() before calling stream(),
|
|
186
|
-
* as Bun.file().stream() on a non-existent file will error at read time.
|
|
187
|
-
*/
|
|
188
|
-
stream(key: string): ReadableStream<Uint8Array> | null {
|
|
189
|
-
const fullPath = this.getFullPath(key);
|
|
190
|
-
return Bun.file(fullPath).stream();
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
/**
|
|
194
|
-
* Get the base path
|
|
195
|
-
*/
|
|
196
|
-
getBasePath(): string {
|
|
197
|
-
return this.basePath;
|
|
198
|
-
}
|
|
199
|
-
}
|