verso-db 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.md +13 -7
  3. package/dist/BinaryHeap.d.ts +11 -1
  4. package/dist/BinaryHeap.d.ts.map +1 -1
  5. package/dist/BinaryHeap.js +138 -0
  6. package/dist/BinaryHeap.js.map +1 -0
  7. package/dist/Collection.d.ts +30 -4
  8. package/dist/Collection.d.ts.map +1 -1
  9. package/dist/Collection.js +1186 -0
  10. package/dist/Collection.js.map +1 -0
  11. package/dist/HNSWIndex.d.ts +59 -0
  12. package/dist/HNSWIndex.d.ts.map +1 -1
  13. package/dist/HNSWIndex.js +2818 -0
  14. package/dist/HNSWIndex.js.map +1 -0
  15. package/dist/MaxBinaryHeap.d.ts +2 -64
  16. package/dist/MaxBinaryHeap.d.ts.map +1 -1
  17. package/dist/MaxBinaryHeap.js +5 -0
  18. package/dist/MaxBinaryHeap.js.map +1 -0
  19. package/dist/SearchWorker.d.ts +57 -4
  20. package/dist/SearchWorker.d.ts.map +1 -1
  21. package/dist/SearchWorker.js +573 -0
  22. package/dist/SearchWorker.js.map +1 -0
  23. package/dist/VectorDB.d.ts.map +1 -1
  24. package/dist/VectorDB.js +246 -0
  25. package/dist/VectorDB.js.map +1 -0
  26. package/dist/WorkerPool.d.ts +32 -2
  27. package/dist/WorkerPool.d.ts.map +1 -1
  28. package/dist/WorkerPool.js +266 -0
  29. package/dist/WorkerPool.js.map +1 -0
  30. package/dist/backends/JsDistanceBackend.d.ts.map +1 -1
  31. package/dist/backends/JsDistanceBackend.js +163 -0
  32. package/dist/backends/JsDistanceBackend.js.map +1 -0
  33. package/dist/encoding/DeltaEncoder.d.ts +2 -2
  34. package/dist/encoding/DeltaEncoder.d.ts.map +1 -1
  35. package/dist/encoding/DeltaEncoder.js +199 -0
  36. package/dist/encoding/DeltaEncoder.js.map +1 -0
  37. package/dist/errors.js +97 -0
  38. package/dist/errors.js.map +1 -0
  39. package/dist/index.d.ts +3 -3
  40. package/dist/index.d.ts.map +1 -1
  41. package/dist/index.js +61 -42
  42. package/dist/index.js.map +1 -9
  43. package/dist/presets.js +205 -0
  44. package/dist/presets.js.map +1 -0
  45. package/dist/quantization/ScalarQuantizer.d.ts +0 -34
  46. package/dist/quantization/ScalarQuantizer.d.ts.map +1 -1
  47. package/dist/quantization/ScalarQuantizer.js +346 -0
  48. package/dist/quantization/ScalarQuantizer.js.map +1 -0
  49. package/dist/storage/BatchWriter.js +351 -0
  50. package/dist/storage/BatchWriter.js.map +1 -0
  51. package/dist/storage/BunStorageBackend.d.ts +7 -3
  52. package/dist/storage/BunStorageBackend.d.ts.map +1 -1
  53. package/dist/storage/BunStorageBackend.js +182 -0
  54. package/dist/storage/BunStorageBackend.js.map +1 -0
  55. package/dist/storage/MemoryBackend.js +109 -0
  56. package/dist/storage/MemoryBackend.js.map +1 -0
  57. package/dist/storage/OPFSBackend.d.ts.map +1 -1
  58. package/dist/storage/OPFSBackend.js +325 -0
  59. package/dist/storage/OPFSBackend.js.map +1 -0
  60. package/dist/storage/StorageBackend.js +12 -0
  61. package/dist/storage/StorageBackend.js.map +1 -0
  62. package/dist/storage/WriteAheadLog.js +321 -0
  63. package/dist/storage/WriteAheadLog.js.map +1 -0
  64. package/dist/storage/createStorageBackend.d.ts +4 -0
  65. package/dist/storage/createStorageBackend.d.ts.map +1 -1
  66. package/dist/storage/createStorageBackend.js +119 -0
  67. package/dist/storage/createStorageBackend.js.map +1 -0
  68. package/{src/storage/index.ts → dist/storage/index.js} +7 -27
  69. package/dist/storage/index.js.map +1 -0
  70. package/dist/storage/nodeFsRuntime.d.ts +14 -0
  71. package/dist/storage/nodeFsRuntime.d.ts.map +1 -0
  72. package/dist/storage/nodeFsRuntime.js +105 -0
  73. package/dist/storage/nodeFsRuntime.js.map +1 -0
  74. package/package.json +9 -7
  75. package/src/BinaryHeap.ts +0 -136
  76. package/src/Collection.ts +0 -1262
  77. package/src/HNSWIndex.ts +0 -2894
  78. package/src/MaxBinaryHeap.ts +0 -181
  79. package/src/SearchWorker.ts +0 -264
  80. package/src/VectorDB.ts +0 -319
  81. package/src/WorkerPool.ts +0 -222
  82. package/src/backends/JsDistanceBackend.ts +0 -171
  83. package/src/encoding/DeltaEncoder.ts +0 -236
  84. package/src/errors.ts +0 -110
  85. package/src/index.ts +0 -106
  86. package/src/presets.ts +0 -229
  87. package/src/quantization/ScalarQuantizer.ts +0 -487
  88. package/src/storage/BatchWriter.ts +0 -420
  89. package/src/storage/BunStorageBackend.ts +0 -199
  90. package/src/storage/MemoryBackend.ts +0 -122
  91. package/src/storage/OPFSBackend.ts +0 -348
  92. package/src/storage/StorageBackend.ts +0 -74
  93. package/src/storage/WriteAheadLog.ts +0 -379
  94. package/src/storage/createStorageBackend.ts +0 -137
@@ -1,379 +0,0 @@
1
- /**
2
- * Write-Ahead Log (WAL) for Incremental Index Updates
3
- *
4
- * Provides durability and efficient incremental writes for HNSW index operations.
5
- * Instead of rewriting the entire index on each update, operations are appended
6
- * to a log file. The log can be compacted into a full snapshot periodically.
7
- *
8
- * Benefits:
9
- * - Fast appends (no full serialization)
10
- * - Crash recovery (replay log after restart)
11
- * - Reduced I/O for frequent updates
12
- */
13
-
14
- import type { StorageBackend } from './StorageBackend';
15
-
16
- export enum WALOperationType {
17
- ADD_VECTOR = 1,
18
- ADD_NEIGHBORS = 2,
19
- UPDATE_ENTRY_POINT = 3,
20
- CHECKPOINT = 4,
21
- }
22
-
23
- export interface WALEntry {
24
- type: WALOperationType;
25
- timestamp: number;
26
- data: ArrayBuffer;
27
- }
28
-
29
- /**
30
- * WriteAheadLog - Append-only log for incremental index updates
31
- */
32
- export class WriteAheadLog {
33
- private storage: StorageBackend;
34
- private logKey: string;
35
- private pendingEntries: WALEntry[] = [];
36
- private flushThreshold: number;
37
- private entryCount: number = 0;
38
- private flushPromise: Promise<void> | null = null;
39
-
40
- /**
41
- * Create a new WAL
42
- * @param storage StorageBackend for persistence
43
- * @param logKey Storage key for the WAL data (e.g., "myindex.wal")
44
- * @param flushThreshold Number of entries before auto-flush (default: 100)
45
- */
46
- constructor(storage: StorageBackend, logKey: string, flushThreshold: number = 100) {
47
- this.storage = storage;
48
- this.logKey = logKey;
49
- this.flushThreshold = flushThreshold;
50
- }
51
-
52
- /**
53
- * Get the WAL storage key
54
- */
55
- getKey(): string {
56
- return this.logKey;
57
- }
58
-
59
- /**
60
- * Check if WAL data exists
61
- */
62
- async exists(): Promise<boolean> {
63
- return this.storage.exists(this.logKey);
64
- }
65
-
66
- /**
67
- * Append a vector addition operation to the log
68
- */
69
- async appendVector(id: number, vector: Float32Array): Promise<void> {
70
- // Format: [id (4 bytes)] [vector length (4 bytes)] [vector data]
71
- const dataSize = 4 + 4 + vector.length * 4;
72
- const buffer = new ArrayBuffer(dataSize);
73
- const view = new DataView(buffer);
74
-
75
- view.setUint32(0, id, true);
76
- view.setUint32(4, vector.length, true);
77
-
78
- const floatView = new Float32Array(buffer, 8);
79
- floatView.set(vector);
80
-
81
- const entry: WALEntry = {
82
- type: WALOperationType.ADD_VECTOR,
83
- timestamp: Date.now(),
84
- data: buffer,
85
- };
86
-
87
- this.pendingEntries.push(entry);
88
- this.entryCount++;
89
-
90
- if (this.pendingEntries.length >= this.flushThreshold) {
91
- await this.flush();
92
- }
93
- }
94
-
95
- /**
96
- * Append a neighbor update operation to the log
97
- */
98
- async appendNeighbors(nodeId: number, layer: number, neighbors: number[]): Promise<void> {
99
- // Format: [nodeId (4)] [layer (4)] [neighborCount (4)] [neighbors...]
100
- const dataSize = 4 + 4 + 4 + neighbors.length * 4;
101
- const buffer = new ArrayBuffer(dataSize);
102
- const view = new DataView(buffer);
103
-
104
- view.setUint32(0, nodeId, true);
105
- view.setUint32(4, layer, true);
106
- view.setUint32(8, neighbors.length, true);
107
-
108
- let offset = 12;
109
- for (const neighbor of neighbors) {
110
- view.setUint32(offset, neighbor, true);
111
- offset += 4;
112
- }
113
-
114
- const entry: WALEntry = {
115
- type: WALOperationType.ADD_NEIGHBORS,
116
- timestamp: Date.now(),
117
- data: buffer,
118
- };
119
-
120
- this.pendingEntries.push(entry);
121
- this.entryCount++;
122
-
123
- if (this.pendingEntries.length >= this.flushThreshold) {
124
- await this.flush();
125
- }
126
- }
127
-
128
- /**
129
- * Append entry point update to the log
130
- */
131
- async appendEntryPointUpdate(entryPointId: number, maxLevel: number): Promise<void> {
132
- const buffer = new ArrayBuffer(8);
133
- const view = new DataView(buffer);
134
-
135
- view.setInt32(0, entryPointId, true);
136
- view.setInt32(4, maxLevel, true);
137
-
138
- const entry: WALEntry = {
139
- type: WALOperationType.UPDATE_ENTRY_POINT,
140
- timestamp: Date.now(),
141
- data: buffer,
142
- };
143
-
144
- this.pendingEntries.push(entry);
145
- this.entryCount++;
146
-
147
- if (this.pendingEntries.length >= this.flushThreshold) {
148
- await this.flush();
149
- }
150
- }
151
-
152
- /**
153
- * Write a checkpoint marker to the log
154
- */
155
- async checkpoint(): Promise<void> {
156
- const buffer = new ArrayBuffer(8);
157
- const view = new DataView(buffer);
158
- view.setFloat64(0, Date.now(), true);
159
-
160
- const entry: WALEntry = {
161
- type: WALOperationType.CHECKPOINT,
162
- timestamp: Date.now(),
163
- data: buffer,
164
- };
165
-
166
- this.pendingEntries.push(entry);
167
- this.entryCount++;
168
- await this.flush();
169
- }
170
-
171
- /**
172
- * Serialize a WAL entry to bytes
173
- */
174
- private serializeEntry(entry: WALEntry): Uint8Array {
175
- // Format: [type (1)] [timestamp (8)] [dataLength (4)] [data...]
176
- const headerSize = 1 + 8 + 4;
177
- const totalSize = headerSize + entry.data.byteLength;
178
- const buffer = new ArrayBuffer(totalSize);
179
- const view = new DataView(buffer);
180
-
181
- view.setUint8(0, entry.type);
182
- view.setFloat64(1, entry.timestamp, true);
183
- view.setUint32(9, entry.data.byteLength, true);
184
-
185
- const dataView = new Uint8Array(buffer, headerSize);
186
- dataView.set(new Uint8Array(entry.data));
187
-
188
- return new Uint8Array(buffer);
189
- }
190
-
191
- /**
192
- * Flush pending entries to storage
193
- * Uses append for efficient O(1) writes
194
- */
195
- async flush(): Promise<void> {
196
- while (true) {
197
- if (this.flushPromise) {
198
- await this.flushPromise;
199
- }
200
-
201
- if (this.pendingEntries.length === 0) {
202
- return;
203
- }
204
-
205
- // Capture and clear atomically before async work to avoid losing
206
- // entries added by concurrent appendVector() calls during the await.
207
- const toFlush = this.pendingEntries;
208
- this.pendingEntries = [];
209
- const currentFlush = this.flushEntries(toFlush);
210
- this.flushPromise = currentFlush;
211
-
212
- try {
213
- await currentFlush;
214
- } finally {
215
- if (this.flushPromise === currentFlush) {
216
- this.flushPromise = null;
217
- }
218
- }
219
- }
220
- }
221
-
222
- private async flushEntries(toFlush: WALEntry[]): Promise<void> {
223
- // Serialize captured entries
224
- const serializedEntries = toFlush.map(e => this.serializeEntry(e));
225
-
226
- // Calculate total size
227
- let totalSize = 0;
228
- for (const entry of serializedEntries) {
229
- totalSize += entry.length;
230
- }
231
-
232
- // Combine into single buffer
233
- const combined = new Uint8Array(totalSize);
234
- let offset = 0;
235
- for (const entry of serializedEntries) {
236
- combined.set(entry, offset);
237
- offset += entry.length;
238
- }
239
-
240
- // Append to storage
241
- try {
242
- await this.storage.append(this.logKey, combined);
243
- } catch (err) {
244
- // Restore entries on failure so they aren't lost
245
- this.pendingEntries = [...toFlush, ...this.pendingEntries];
246
- throw err;
247
- }
248
- }
249
-
250
- /**
251
- * Read all entries from the WAL
252
- */
253
- async readEntries(): Promise<WALEntry[]> {
254
- const data = await this.storage.read(this.logKey);
255
- if (!data || data.byteLength === 0) {
256
- return [];
257
- }
258
-
259
- const buffer = data;
260
- const view = new DataView(buffer);
261
- const entries: WALEntry[] = [];
262
-
263
- const headerSize = 13; // 1 (type) + 8 (timestamp) + 4 (dataLength)
264
- let offset = 0;
265
- while (offset < buffer.byteLength) {
266
- // Check we can read the header
267
- if (offset + headerSize > buffer.byteLength) {
268
- console.warn(`WAL: truncated entry header at offset ${offset}, stopping replay`);
269
- break;
270
- }
271
-
272
- // Read header
273
- const type = view.getUint8(offset) as WALOperationType;
274
- const timestamp = view.getFloat64(offset + 1, true);
275
- const dataLength = view.getUint32(offset + 9, true);
276
-
277
- // Validate data length
278
- if (dataLength > buffer.byteLength - offset - headerSize) {
279
- console.warn(`WAL: truncated entry data at offset ${offset} (expected ${dataLength} bytes, only ${buffer.byteLength - offset - headerSize} available), stopping replay`);
280
- break;
281
- }
282
-
283
- // Validate operation type
284
- if (type < 1 || type > 4) {
285
- console.warn(`WAL: unknown operation type ${type} at offset ${offset}, stopping replay`);
286
- break;
287
- }
288
-
289
- // Read data
290
- const entryData = buffer.slice(offset + headerSize, offset + headerSize + dataLength);
291
-
292
- entries.push({ type, timestamp, data: entryData });
293
-
294
- offset += headerSize + dataLength;
295
- }
296
-
297
- this.entryCount = entries.length + this.pendingEntries.length;
298
- return entries;
299
- }
300
-
301
- /**
302
- * Parse a vector addition entry
303
- */
304
- static parseVectorEntry(data: ArrayBuffer): { id: number; vector: Float32Array } {
305
- const view = new DataView(data);
306
- const id = view.getUint32(0, true);
307
- const vectorLength = view.getUint32(4, true);
308
- const vector = new Float32Array(data, 8, vectorLength);
309
- return { id, vector };
310
- }
311
-
312
- /**
313
- * Parse a neighbor update entry
314
- */
315
- static parseNeighborsEntry(data: ArrayBuffer): { nodeId: number; layer: number; neighbors: number[] } {
316
- const view = new DataView(data);
317
- const nodeId = view.getUint32(0, true);
318
- const layer = view.getUint32(4, true);
319
- const neighborCount = view.getUint32(8, true);
320
-
321
- const neighbors: number[] = [];
322
- for (let i = 0; i < neighborCount; i++) {
323
- neighbors.push(view.getUint32(12 + i * 4, true));
324
- }
325
-
326
- return { nodeId, layer, neighbors };
327
- }
328
-
329
- /**
330
- * Parse an entry point update entry
331
- */
332
- static parseEntryPointEntry(data: ArrayBuffer): { entryPointId: number; maxLevel: number } {
333
- const view = new DataView(data);
334
- return {
335
- entryPointId: view.getInt32(0, true),
336
- maxLevel: view.getInt32(4, true),
337
- };
338
- }
339
-
340
- /**
341
- * Get entry count since last compact
342
- */
343
- getEntryCount(): number {
344
- return this.entryCount;
345
- }
346
-
347
- /**
348
- * Clear the WAL (after successful compaction)
349
- */
350
- async clear(): Promise<void> {
351
- if (this.flushPromise) {
352
- await this.flushPromise.catch(() => {});
353
- }
354
- // Clear in-memory state first to prevent flush() from writing entries
355
- // after we truncate the on-disk WAL.
356
- this.pendingEntries = [];
357
- this.entryCount = 0;
358
- if (await this.storage.exists(this.logKey)) {
359
- await this.storage.write(this.logKey, new Uint8Array(0));
360
- }
361
- }
362
-
363
- /**
364
- * Delete the WAL data
365
- */
366
- async delete(): Promise<void> {
367
- if (this.flushPromise) {
368
- await this.flushPromise.catch(() => {});
369
- }
370
- // Clear in-memory state first to prevent race with concurrent appends
371
- this.pendingEntries = [];
372
- this.entryCount = 0;
373
- try {
374
- await this.storage.delete(this.logKey);
375
- } catch {
376
- // Key may not exist
377
- }
378
- }
379
- }
@@ -1,137 +0,0 @@
1
- /**
2
- * Storage Backend Factory
3
- *
4
- * Auto-detects the best storage backend for the current environment:
5
- * - Bun/Node.js: BunStorageBackend (file system)
6
- * - Modern browsers: OPFSBackend (Origin Private File System)
7
- * - Fallback: MemoryBackend (in-memory)
8
- */
9
-
10
- import type { StorageBackend, StorageOptions } from './StorageBackend';
11
- import { BunStorageBackend } from './BunStorageBackend';
12
- import { MemoryBackend } from './MemoryBackend';
13
- import { OPFSBackend } from './OPFSBackend';
14
-
15
- export type StorageType = 'auto' | 'bun' | 'opfs' | 'memory';
16
-
17
- export interface CreateStorageOptions extends StorageOptions {
18
- /** Force a specific storage type */
19
- type?: StorageType;
20
- }
21
-
22
- /**
23
- * Detect the current runtime environment
24
- */
25
- function detectEnvironment(): 'bun' | 'browser' | 'unknown' {
26
- // Check for Bun
27
- if (typeof Bun !== 'undefined') {
28
- return 'bun';
29
- }
30
-
31
- // Check for browser
32
- if (typeof window !== 'undefined' && typeof navigator !== 'undefined') {
33
- return 'browser';
34
- }
35
-
36
- return 'unknown';
37
- }
38
-
39
- /**
40
- * Create the optimal storage backend for the current environment
41
- *
42
- * @param options Configuration options
43
- * @returns Initialized storage backend
44
- *
45
- * @example
46
- * ```typescript
47
- * // Auto-detect best backend
48
- * const storage = await createStorageBackend();
49
- *
50
- * // Force specific backend
51
- * const bunStorage = await createStorageBackend({ type: 'bun', path: './data' });
52
- * const memStorage = await createStorageBackend({ type: 'memory' });
53
- * ```
54
- */
55
- export async function createStorageBackend(options?: CreateStorageOptions): Promise<StorageBackend> {
56
- const type = options?.type ?? 'auto';
57
-
58
- // Force specific backend type
59
- if (type === 'bun') {
60
- const backend = new BunStorageBackend(options?.path ?? './vectordb_data');
61
- await backend.init();
62
- return backend;
63
- }
64
-
65
- if (type === 'opfs') {
66
- if (!OPFSBackend.isAvailable()) {
67
- throw new Error('OPFS not available in this environment');
68
- }
69
- const backend = new OPFSBackend();
70
- await backend.init();
71
- return backend;
72
- }
73
-
74
- if (type === 'memory') {
75
- return new MemoryBackend();
76
- }
77
-
78
- // Auto-detect
79
- const env = detectEnvironment();
80
-
81
- if (env === 'bun') {
82
- const backend = new BunStorageBackend(options?.path ?? './vectordb_data');
83
- await backend.init();
84
- return backend;
85
- }
86
-
87
- if (env === 'browser') {
88
- // Try OPFS first (modern browsers)
89
- if (OPFSBackend.isAvailable()) {
90
- try {
91
- const backend = new OPFSBackend();
92
- await backend.init();
93
- return backend;
94
- } catch {
95
- // Fall through to memory backend
96
- }
97
- }
98
- }
99
-
100
- // Fallback to memory backend
101
- return new MemoryBackend();
102
- }
103
-
104
- /**
105
- * Get the recommended storage type for the current environment
106
- */
107
- export function getRecommendedStorageType(): StorageType {
108
- const env = detectEnvironment();
109
-
110
- if (env === 'bun') {
111
- return 'bun';
112
- }
113
-
114
- if (env === 'browser' && OPFSBackend.isAvailable()) {
115
- return 'opfs';
116
- }
117
-
118
- return 'memory';
119
- }
120
-
121
- /**
122
- * Check if a specific storage type is available
123
- */
124
- export function isStorageTypeAvailable(type: StorageType): boolean {
125
- switch (type) {
126
- case 'bun':
127
- return typeof Bun !== 'undefined';
128
- case 'opfs':
129
- return OPFSBackend.isAvailable();
130
- case 'memory':
131
- return true;
132
- case 'auto':
133
- return true;
134
- default:
135
- return false;
136
- }
137
- }