verso-db 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/README.md +13 -7
  3. package/dist/BinaryHeap.d.ts +11 -1
  4. package/dist/BinaryHeap.d.ts.map +1 -1
  5. package/dist/BinaryHeap.js +138 -0
  6. package/dist/BinaryHeap.js.map +1 -0
  7. package/dist/Collection.d.ts +30 -4
  8. package/dist/Collection.d.ts.map +1 -1
  9. package/dist/Collection.js +1186 -0
  10. package/dist/Collection.js.map +1 -0
  11. package/dist/HNSWIndex.d.ts +59 -0
  12. package/dist/HNSWIndex.d.ts.map +1 -1
  13. package/dist/HNSWIndex.js +2818 -0
  14. package/dist/HNSWIndex.js.map +1 -0
  15. package/dist/MaxBinaryHeap.d.ts +2 -64
  16. package/dist/MaxBinaryHeap.d.ts.map +1 -1
  17. package/dist/MaxBinaryHeap.js +5 -0
  18. package/dist/MaxBinaryHeap.js.map +1 -0
  19. package/dist/SearchWorker.d.ts +57 -4
  20. package/dist/SearchWorker.d.ts.map +1 -1
  21. package/dist/SearchWorker.js +573 -0
  22. package/dist/SearchWorker.js.map +1 -0
  23. package/dist/VectorDB.d.ts.map +1 -1
  24. package/dist/VectorDB.js +246 -0
  25. package/dist/VectorDB.js.map +1 -0
  26. package/dist/WorkerPool.d.ts +32 -2
  27. package/dist/WorkerPool.d.ts.map +1 -1
  28. package/dist/WorkerPool.js +266 -0
  29. package/dist/WorkerPool.js.map +1 -0
  30. package/dist/backends/JsDistanceBackend.d.ts.map +1 -1
  31. package/dist/backends/JsDistanceBackend.js +163 -0
  32. package/dist/backends/JsDistanceBackend.js.map +1 -0
  33. package/dist/encoding/DeltaEncoder.d.ts +2 -2
  34. package/dist/encoding/DeltaEncoder.d.ts.map +1 -1
  35. package/dist/encoding/DeltaEncoder.js +199 -0
  36. package/dist/encoding/DeltaEncoder.js.map +1 -0
  37. package/dist/errors.js +97 -0
  38. package/dist/errors.js.map +1 -0
  39. package/dist/index.d.ts +3 -3
  40. package/dist/index.d.ts.map +1 -1
  41. package/dist/index.js +61 -42
  42. package/dist/index.js.map +1 -9
  43. package/dist/presets.js +205 -0
  44. package/dist/presets.js.map +1 -0
  45. package/dist/quantization/ScalarQuantizer.d.ts +0 -34
  46. package/dist/quantization/ScalarQuantizer.d.ts.map +1 -1
  47. package/dist/quantization/ScalarQuantizer.js +346 -0
  48. package/dist/quantization/ScalarQuantizer.js.map +1 -0
  49. package/dist/storage/BatchWriter.js +351 -0
  50. package/dist/storage/BatchWriter.js.map +1 -0
  51. package/dist/storage/BunStorageBackend.d.ts +7 -3
  52. package/dist/storage/BunStorageBackend.d.ts.map +1 -1
  53. package/dist/storage/BunStorageBackend.js +182 -0
  54. package/dist/storage/BunStorageBackend.js.map +1 -0
  55. package/dist/storage/MemoryBackend.js +109 -0
  56. package/dist/storage/MemoryBackend.js.map +1 -0
  57. package/dist/storage/OPFSBackend.d.ts.map +1 -1
  58. package/dist/storage/OPFSBackend.js +325 -0
  59. package/dist/storage/OPFSBackend.js.map +1 -0
  60. package/dist/storage/StorageBackend.js +12 -0
  61. package/dist/storage/StorageBackend.js.map +1 -0
  62. package/dist/storage/WriteAheadLog.js +321 -0
  63. package/dist/storage/WriteAheadLog.js.map +1 -0
  64. package/dist/storage/createStorageBackend.d.ts +4 -0
  65. package/dist/storage/createStorageBackend.d.ts.map +1 -1
  66. package/dist/storage/createStorageBackend.js +119 -0
  67. package/dist/storage/createStorageBackend.js.map +1 -0
  68. package/{src/storage/index.ts → dist/storage/index.js} +7 -27
  69. package/dist/storage/index.js.map +1 -0
  70. package/dist/storage/nodeFsRuntime.d.ts +14 -0
  71. package/dist/storage/nodeFsRuntime.d.ts.map +1 -0
  72. package/dist/storage/nodeFsRuntime.js +105 -0
  73. package/dist/storage/nodeFsRuntime.js.map +1 -0
  74. package/package.json +9 -7
  75. package/src/BinaryHeap.ts +0 -136
  76. package/src/Collection.ts +0 -1262
  77. package/src/HNSWIndex.ts +0 -2894
  78. package/src/MaxBinaryHeap.ts +0 -181
  79. package/src/SearchWorker.ts +0 -264
  80. package/src/VectorDB.ts +0 -319
  81. package/src/WorkerPool.ts +0 -222
  82. package/src/backends/JsDistanceBackend.ts +0 -171
  83. package/src/encoding/DeltaEncoder.ts +0 -236
  84. package/src/errors.ts +0 -110
  85. package/src/index.ts +0 -106
  86. package/src/presets.ts +0 -229
  87. package/src/quantization/ScalarQuantizer.ts +0 -487
  88. package/src/storage/BatchWriter.ts +0 -420
  89. package/src/storage/BunStorageBackend.ts +0 -199
  90. package/src/storage/MemoryBackend.ts +0 -122
  91. package/src/storage/OPFSBackend.ts +0 -348
  92. package/src/storage/StorageBackend.ts +0 -74
  93. package/src/storage/WriteAheadLog.ts +0 -379
  94. package/src/storage/createStorageBackend.ts +0 -137
@@ -1,487 +0,0 @@
1
- /**
2
- * Scalar Quantizer for Int8 vector compression
3
- *
4
- * Provides 4x memory reduction by converting Float32 vectors to Int8.
5
- * Expected performance:
6
- * - Memory: 4x reduction (32 bits → 8 bits per dimension)
7
- * - Speed: ~3.5x faster distance calculations (smaller data, better cache)
8
- * - Recall: <2% loss compared to float32
9
- *
10
- * Uses range quantization: maps [min, max] to [-128, 127]
11
- */
12
-
13
- import { QuantizationError, DimensionMismatchError } from '../errors';
14
-
15
- export interface QuantizationParams {
16
- min: Float32Array; // Per-dimension minimum
17
- max: Float32Array; // Per-dimension maximum
18
- scale: Float32Array; // Per-dimension scale factor
19
- offset: Float32Array; // Per-dimension offset
20
- }
21
-
22
- export class ScalarQuantizer {
23
- private dimension: number;
24
- private params: QuantizationParams | null = null;
25
- private trained: boolean = false;
26
-
27
- constructor(dimension: number) {
28
- this.dimension = dimension;
29
- }
30
-
31
- /**
32
- * Train the quantizer on a set of vectors to determine optimal range
33
- */
34
- train(vectors: Float32Array[]): void {
35
- if (vectors.length === 0) {
36
- throw new QuantizationError('Cannot train quantizer with empty vector set');
37
- }
38
-
39
- const dim = this.dimension;
40
-
41
- // Validate all training vectors have correct dimension
42
- for (let i = 0; i < vectors.length; i++) {
43
- if (vectors[i].length !== dim) {
44
- throw new DimensionMismatchError(dim, vectors[i].length, `Training vector at index ${i}`);
45
- }
46
- }
47
-
48
- const min = new Float32Array(dim).fill(Infinity);
49
- const max = new Float32Array(dim).fill(-Infinity);
50
-
51
- // Find per-dimension min/max
52
- for (const vector of vectors) {
53
- for (let d = 0; d < dim; d++) {
54
- if (vector[d] < min[d]) min[d] = vector[d];
55
- if (vector[d] > max[d]) max[d] = vector[d];
56
- }
57
- }
58
-
59
- // Compute scale and offset for mapping to [-128, 127]
60
- const scale = new Float32Array(dim);
61
- const offset = new Float32Array(dim);
62
-
63
- for (let d = 0; d < dim; d++) {
64
- const range = max[d] - min[d];
65
- if (range === 0) {
66
- // Handle constant dimensions
67
- scale[d] = 1;
68
- offset[d] = min[d];
69
- } else {
70
- // Map [min, max] to [-128, 127]
71
- scale[d] = 255 / range;
72
- offset[d] = min[d];
73
- }
74
- }
75
-
76
- this.params = { min, max, scale, offset };
77
- this.trained = true;
78
- }
79
-
80
- /**
81
- * Get training status
82
- */
83
- isTrained(): boolean {
84
- return this.trained;
85
- }
86
-
87
- /**
88
- * Get quantization parameters
89
- */
90
- getParams(): QuantizationParams | null {
91
- return this.params;
92
- }
93
-
94
- private validateParams(params: QuantizationParams): void {
95
- const arrays: Array<{ name: string; value: Float32Array }> = [
96
- { name: 'min', value: params.min },
97
- { name: 'max', value: params.max },
98
- { name: 'scale', value: params.scale },
99
- { name: 'offset', value: params.offset },
100
- ];
101
-
102
- for (const { name, value } of arrays) {
103
- if (value.length !== this.dimension) {
104
- throw new DimensionMismatchError(this.dimension, value.length, `Quantizer parameter '${name}'`);
105
- }
106
- for (let i = 0; i < value.length; i++) {
107
- if (!Number.isFinite(value[i])) {
108
- throw new QuantizationError(`Invalid quantizer parameter '${name}' at index ${i}: ${value[i]}`);
109
- }
110
- }
111
- }
112
-
113
- for (let d = 0; d < this.dimension; d++) {
114
- if (params.max[d] < params.min[d]) {
115
- throw new QuantizationError(
116
- `Invalid quantizer parameters at dimension ${d}: max (${params.max[d]}) < min (${params.min[d]})`
117
- );
118
- }
119
- if (params.scale[d] === 0) {
120
- throw new QuantizationError(`Invalid quantizer scale at dimension ${d}: scale must be non-zero`);
121
- }
122
- }
123
- }
124
-
125
- /**
126
- * Set quantization parameters (for loading saved quantizer)
127
- */
128
- setParams(params: QuantizationParams): void {
129
- this.validateParams(params);
130
- this.params = params;
131
- this.trained = true;
132
- }
133
-
134
- /**
135
- * Quantize a single float32 vector to int8
136
- */
137
- quantize(vector: Float32Array): Int8Array {
138
- if (!this.params) {
139
- throw new QuantizationError('Quantizer not trained');
140
- }
141
-
142
- if (vector.length !== this.dimension) {
143
- throw new DimensionMismatchError(this.dimension, vector.length, 'Quantization');
144
- }
145
-
146
- const dim = this.dimension;
147
- const result = new Int8Array(dim);
148
- const { scale, offset } = this.params;
149
-
150
- for (let d = 0; d < dim; d++) {
151
- // Map to [0, 255] then shift to [-128, 127]
152
- const normalized = (vector[d] - offset[d]) * scale[d];
153
- result[d] = Math.max(-128, Math.min(127, Math.round(normalized - 128)));
154
- }
155
-
156
- return result;
157
- }
158
-
159
- /**
160
- * Quantize a float32 vector directly into a target Int8Array at the given offset.
161
- * Zero-allocation: avoids creating a new Int8Array per vector.
162
- *
163
- * @param vector Source float32 vector
164
- * @param target Target Int8Array to write into
165
- * @param targetOffset Byte offset in target where quantized values start
166
- */
167
- quantizeInto(vector: Float32Array, target: Int8Array, targetOffset: number): void {
168
- if (!this.params) {
169
- throw new QuantizationError('Quantizer not trained');
170
- }
171
-
172
- if (vector.length !== this.dimension) {
173
- throw new DimensionMismatchError(this.dimension, vector.length, 'Quantization');
174
- }
175
-
176
- const dim = this.dimension;
177
- const { scale, offset } = this.params;
178
-
179
- for (let d = 0; d < dim; d++) {
180
- const normalized = (vector[d] - offset[d]) * scale[d];
181
- target[targetOffset + d] = Math.max(-128, Math.min(127, Math.round(normalized - 128)));
182
- }
183
- }
184
-
185
- /**
186
- * Quantize multiple vectors
187
- */
188
- quantizeBatch(vectors: Float32Array[]): Int8Array[] {
189
- const result = new Array<Int8Array>(vectors.length);
190
- for (let i = 0; i < vectors.length; i++) {
191
- result[i] = this.quantize(vectors[i]);
192
- }
193
- return result;
194
- }
195
-
196
- /**
197
- * Dequantize an int8 vector back to float32 (for rescoring)
198
- */
199
- dequantize(vector: Int8Array): Float32Array {
200
- if (!this.params) {
201
- throw new QuantizationError('Quantizer not trained');
202
- }
203
-
204
- if (vector.length !== this.dimension) {
205
- throw new DimensionMismatchError(this.dimension, vector.length, 'Dequantization');
206
- }
207
-
208
- const dim = this.dimension;
209
- const result = new Float32Array(dim);
210
- const { scale, offset } = this.params;
211
-
212
- for (let d = 0; d < dim; d++) {
213
- // Reverse the quantization
214
- result[d] = ((vector[d] + 128) / scale[d]) + offset[d];
215
- }
216
-
217
- return result;
218
- }
219
-
220
- /**
221
- * Serialize quantization parameters for saving
222
- */
223
- serialize(): ArrayBuffer {
224
- if (!this.params) {
225
- throw new QuantizationError('Quantizer not trained');
226
- }
227
-
228
- const dim = this.dimension;
229
- // 4 bytes header (dimension) + 4 * dim * 4 bytes (4 float arrays)
230
- const buffer = new ArrayBuffer(4 + 4 * dim * 4);
231
- const view = new DataView(buffer);
232
-
233
- view.setInt32(0, dim, true);
234
-
235
- let offset = 4;
236
- for (const arr of [this.params.min, this.params.max, this.params.scale, this.params.offset]) {
237
- for (let d = 0; d < dim; d++) {
238
- view.setFloat32(offset, arr[d], true);
239
- offset += 4;
240
- }
241
- }
242
-
243
- return buffer;
244
- }
245
-
246
- /**
247
- * Load quantization parameters
248
- */
249
- static deserialize(buffer: ArrayBuffer): ScalarQuantizer {
250
- if (buffer.byteLength < 4) {
251
- throw new QuantizationError('Invalid quantizer data: buffer too small');
252
- }
253
-
254
- const view = new DataView(buffer);
255
- const dim = view.getInt32(0, true);
256
-
257
- if (dim <= 0) {
258
- throw new QuantizationError(`Invalid quantizer data: dimension must be positive, got ${dim}`);
259
- }
260
-
261
- const expectedSize = 4 + dim * 4 * 4;
262
- if (buffer.byteLength < expectedSize) {
263
- throw new QuantizationError(`Invalid quantizer data: expected ${expectedSize} bytes, got ${buffer.byteLength}`);
264
- }
265
-
266
- const quantizer = new ScalarQuantizer(dim);
267
- const min = new Float32Array(dim);
268
- const max = new Float32Array(dim);
269
- const scale = new Float32Array(dim);
270
- const offsetArr = new Float32Array(dim);
271
-
272
- let offset = 4;
273
- for (const arr of [min, max, scale, offsetArr]) {
274
- for (let d = 0; d < dim; d++) {
275
- arr[d] = view.getFloat32(offset, true);
276
- offset += 4;
277
- }
278
- }
279
-
280
- quantizer.setParams({ min, max, scale, offset: offsetArr });
281
- return quantizer;
282
- }
283
- }
284
-
285
- /**
286
- * Fast Int8 distance calculations
287
- * These are optimized for quantized vectors and provide significant speedup
288
- */
289
-
290
- /**
291
- * Compute dot product between two Int8 vectors
292
- * Uses 8-wide unrolling for better ILP (instruction-level parallelism)
293
- */
294
- export function dotProductInt8(a: Int8Array, b: Int8Array): number {
295
- if (a.length !== b.length) {
296
- throw new DimensionMismatchError(a.length, b.length, 'Int8 dot product');
297
- }
298
- const len = a.length;
299
- let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
300
- let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
301
- let i = 0;
302
-
303
- // 8-wide unrolling for high-dimensional vectors
304
- const limit8 = len - 7;
305
- for (; i < limit8; i += 8) {
306
- sum0 += a[i] * b[i];
307
- sum1 += a[i + 1] * b[i + 1];
308
- sum2 += a[i + 2] * b[i + 2];
309
- sum3 += a[i + 3] * b[i + 3];
310
- sum4 += a[i + 4] * b[i + 4];
311
- sum5 += a[i + 5] * b[i + 5];
312
- sum6 += a[i + 6] * b[i + 6];
313
- sum7 += a[i + 7] * b[i + 7];
314
- }
315
-
316
- // Handle remaining elements
317
- for (; i < len; i++) {
318
- sum0 += a[i] * b[i];
319
- }
320
-
321
- return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
322
- }
323
-
324
- /**
325
- * Compute L2 squared distance between two Int8 vectors
326
- * Uses 8-wide unrolling for better ILP
327
- */
328
- export function l2SquaredInt8(a: Int8Array, b: Int8Array): number {
329
- if (a.length !== b.length) {
330
- throw new DimensionMismatchError(a.length, b.length, 'Int8 L2 distance');
331
- }
332
- const len = a.length;
333
- let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
334
- let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
335
- let i = 0;
336
-
337
- // 8-wide unrolling for high-dimensional vectors
338
- const limit8 = len - 7;
339
- for (; i < limit8; i += 8) {
340
- const d0 = a[i] - b[i];
341
- const d1 = a[i + 1] - b[i + 1];
342
- const d2 = a[i + 2] - b[i + 2];
343
- const d3 = a[i + 3] - b[i + 3];
344
- const d4 = a[i + 4] - b[i + 4];
345
- const d5 = a[i + 5] - b[i + 5];
346
- const d6 = a[i + 6] - b[i + 6];
347
- const d7 = a[i + 7] - b[i + 7];
348
- sum0 += d0 * d0;
349
- sum1 += d1 * d1;
350
- sum2 += d2 * d2;
351
- sum3 += d3 * d3;
352
- sum4 += d4 * d4;
353
- sum5 += d5 * d5;
354
- sum6 += d6 * d6;
355
- sum7 += d7 * d7;
356
- }
357
-
358
- // Handle remaining elements
359
- for (; i < len; i++) {
360
- const d = a[i] - b[i];
361
- sum0 += d * d;
362
- }
363
-
364
- return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
365
- }
366
-
367
- /**
368
- * Compute approximate cosine distance for Int8 vectors
369
- * Note: This is approximate because quantization changes magnitude
370
- * Uses 8-wide unrolling with separate accumulators for better ILP
371
- */
372
- export function cosineDistanceInt8(a: Int8Array, b: Int8Array): number {
373
- if (a.length !== b.length) {
374
- throw new DimensionMismatchError(a.length, b.length, 'Int8 cosine distance');
375
- }
376
- const len = a.length;
377
- // Use separate accumulators for better ILP
378
- let dot0 = 0, dot1 = 0, dot2 = 0, dot3 = 0;
379
- let normA0 = 0, normA1 = 0, normA2 = 0, normA3 = 0;
380
- let normB0 = 0, normB1 = 0, normB2 = 0, normB3 = 0;
381
-
382
- let i = 0;
383
- const limit8 = len - 7;
384
- for (; i < limit8; i += 8) {
385
- dot0 += a[i] * b[i] + a[i + 4] * b[i + 4];
386
- dot1 += a[i + 1] * b[i + 1] + a[i + 5] * b[i + 5];
387
- dot2 += a[i + 2] * b[i + 2] + a[i + 6] * b[i + 6];
388
- dot3 += a[i + 3] * b[i + 3] + a[i + 7] * b[i + 7];
389
- normA0 += a[i] * a[i] + a[i + 4] * a[i + 4];
390
- normA1 += a[i + 1] * a[i + 1] + a[i + 5] * a[i + 5];
391
- normA2 += a[i + 2] * a[i + 2] + a[i + 6] * a[i + 6];
392
- normA3 += a[i + 3] * a[i + 3] + a[i + 7] * a[i + 7];
393
- normB0 += b[i] * b[i] + b[i + 4] * b[i + 4];
394
- normB1 += b[i + 1] * b[i + 1] + b[i + 5] * b[i + 5];
395
- normB2 += b[i + 2] * b[i + 2] + b[i + 6] * b[i + 6];
396
- normB3 += b[i + 3] * b[i + 3] + b[i + 7] * b[i + 7];
397
- }
398
-
399
- // Handle remaining elements
400
- for (; i < len; i++) {
401
- dot0 += a[i] * b[i];
402
- normA0 += a[i] * a[i];
403
- normB0 += b[i] * b[i];
404
- }
405
-
406
- const dot = dot0 + dot1 + dot2 + dot3;
407
- const normA = normA0 + normA1 + normA2 + normA3;
408
- const normB = normB0 + normB1 + normB2 + normB3;
409
-
410
- const magnitude = Math.sqrt(normA * normB);
411
- if (magnitude === 0) return 1;
412
-
413
- const distance = 1 - (dot / magnitude);
414
- return distance < 0 ? 0 : distance;
415
- }
416
-
417
- /**
418
- * QuantizedVectorStore - Efficient storage for quantized vectors
419
- */
420
- export class QuantizedVectorStore {
421
- private quantizer: ScalarQuantizer;
422
- private vectors: Int8Array[]; // Quantized vectors
423
- private originalVectors: Float32Array[] | null; // Keep originals for rescoring
424
- private keepOriginals: boolean;
425
-
426
- constructor(dimension: number, keepOriginals = true) {
427
- this.quantizer = new ScalarQuantizer(dimension);
428
- this.vectors = [];
429
- this.originalVectors = keepOriginals ? [] : null;
430
- this.keepOriginals = keepOriginals;
431
- }
432
-
433
- /**
434
- * Train the quantizer and add vectors
435
- */
436
- addVectors(vectors: Float32Array[]): void {
437
- if (!this.quantizer.isTrained()) {
438
- this.quantizer.train(vectors);
439
- }
440
-
441
- for (const v of vectors) {
442
- this.vectors.push(this.quantizer.quantize(v));
443
- if (this.keepOriginals && this.originalVectors) {
444
- this.originalVectors.push(v);
445
- }
446
- }
447
- }
448
-
449
- /**
450
- * Get quantized vector by index
451
- */
452
- getQuantized(index: number): Int8Array {
453
- if (!Number.isInteger(index) || index < 0 || index >= this.vectors.length) {
454
- throw new RangeError(`Quantized vector index out of bounds: ${index}`);
455
- }
456
- return this.vectors[index];
457
- }
458
-
459
- /**
460
- * Get original float32 vector by index (for rescoring)
461
- */
462
- getOriginal(index: number): Float32Array | null {
463
- if (!this.originalVectors) return null;
464
- if (!Number.isInteger(index) || index < 0 || index >= this.originalVectors.length) {
465
- throw new RangeError(`Original vector index out of bounds: ${index}`);
466
- }
467
- return this.originalVectors[index];
468
- }
469
-
470
- /**
471
- * Get number of vectors
472
- */
473
- size(): number {
474
- return this.vectors.length;
475
- }
476
-
477
- /**
478
- * Calculate memory usage in bytes
479
- */
480
- memoryUsage(): { quantized: number; original: number; total: number } {
481
- const quantized = this.vectors.reduce((sum, v) => sum + v.length, 0);
482
- const original = this.originalVectors
483
- ? this.originalVectors.reduce((sum, v) => sum + v.length * 4, 0)
484
- : 0;
485
- return { quantized, original, total: quantized + original };
486
- }
487
- }