verso-db 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +46 -0
  2. package/LICENSE +21 -0
  3. package/README.md +252 -0
  4. package/dist/BinaryHeap.d.ts +25 -0
  5. package/dist/BinaryHeap.d.ts.map +1 -0
  6. package/dist/Collection.d.ts +156 -0
  7. package/dist/Collection.d.ts.map +1 -0
  8. package/dist/HNSWIndex.d.ts +357 -0
  9. package/dist/HNSWIndex.d.ts.map +1 -0
  10. package/dist/MaxBinaryHeap.d.ts +63 -0
  11. package/dist/MaxBinaryHeap.d.ts.map +1 -0
  12. package/dist/Storage.d.ts +54 -0
  13. package/dist/Storage.d.ts.map +1 -0
  14. package/dist/VectorDB.d.ts +44 -0
  15. package/dist/VectorDB.d.ts.map +1 -0
  16. package/dist/backends/DistanceBackend.d.ts +5 -0
  17. package/dist/backends/DistanceBackend.d.ts.map +1 -0
  18. package/dist/backends/JsDistanceBackend.d.ts +37 -0
  19. package/dist/backends/JsDistanceBackend.d.ts.map +1 -0
  20. package/dist/encoding/DeltaEncoder.d.ts +61 -0
  21. package/dist/encoding/DeltaEncoder.d.ts.map +1 -0
  22. package/dist/errors.d.ts +58 -0
  23. package/dist/errors.d.ts.map +1 -0
  24. package/dist/index.d.ts +64 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +3732 -0
  27. package/dist/presets.d.ts +91 -0
  28. package/dist/presets.d.ts.map +1 -0
  29. package/dist/quantization/ScalarQuantizer.d.ts +114 -0
  30. package/dist/quantization/ScalarQuantizer.d.ts.map +1 -0
  31. package/dist/storage/BatchWriter.d.ts +104 -0
  32. package/dist/storage/BatchWriter.d.ts.map +1 -0
  33. package/dist/storage/BunStorageBackend.d.ts +58 -0
  34. package/dist/storage/BunStorageBackend.d.ts.map +1 -0
  35. package/dist/storage/MemoryBackend.d.ts +44 -0
  36. package/dist/storage/MemoryBackend.d.ts.map +1 -0
  37. package/dist/storage/OPFSBackend.d.ts +59 -0
  38. package/dist/storage/OPFSBackend.d.ts.map +1 -0
  39. package/dist/storage/StorageBackend.d.ts +66 -0
  40. package/dist/storage/StorageBackend.d.ts.map +1 -0
  41. package/dist/storage/WriteAheadLog.d.ts +111 -0
  42. package/dist/storage/WriteAheadLog.d.ts.map +1 -0
  43. package/dist/storage/createStorageBackend.d.ts +40 -0
  44. package/dist/storage/createStorageBackend.d.ts.map +1 -0
  45. package/dist/storage/index.d.ts +30 -0
  46. package/dist/storage/index.d.ts.map +1 -0
  47. package/package.json +98 -0
  48. package/src/BinaryHeap.ts +131 -0
  49. package/src/Collection.ts +695 -0
  50. package/src/HNSWIndex.ts +1839 -0
  51. package/src/MaxBinaryHeap.ts +175 -0
  52. package/src/Storage.ts +435 -0
  53. package/src/VectorDB.ts +109 -0
  54. package/src/backends/DistanceBackend.ts +17 -0
  55. package/src/backends/JsDistanceBackend.ts +227 -0
  56. package/src/encoding/DeltaEncoder.ts +217 -0
  57. package/src/errors.ts +110 -0
  58. package/src/index.ts +138 -0
  59. package/src/presets.ts +229 -0
  60. package/src/quantization/ScalarQuantizer.ts +383 -0
  61. package/src/storage/BatchWriter.ts +336 -0
  62. package/src/storage/BunStorageBackend.ts +161 -0
  63. package/src/storage/MemoryBackend.ts +120 -0
  64. package/src/storage/OPFSBackend.ts +250 -0
  65. package/src/storage/StorageBackend.ts +74 -0
  66. package/src/storage/WriteAheadLog.ts +326 -0
  67. package/src/storage/createStorageBackend.ts +137 -0
  68. package/src/storage/index.ts +53 -0
package/src/presets.ts ADDED
@@ -0,0 +1,229 @@
1
+ /**
2
+ * HNSW Parameter Presets
3
+ *
4
+ * These presets are optimized based on extensive benchmarking to achieve
5
+ * the target recall@10 >= 95% for different dataset sizes and dimensions.
6
+ *
7
+ * Key parameters:
8
+ * - M: Maximum connections per node (higher = better recall, more memory)
9
+ * - efConstruction: Beam width during index building (higher = better quality, slower build)
10
+ * - efSearch: Beam width during search (higher = better recall, slower search)
11
+ */
12
+
13
+ export interface HNSWPreset {
14
+ name: string;
15
+ description: string;
16
+ M: number;
17
+ efConstruction: number;
18
+ efSearch: number;
19
+ expectedRecall: number;
20
+ targetDimensions: string;
21
+ targetDatasetSize: string;
22
+ }
23
+
24
+ /**
25
+ * Preset for low-dimensional vectors (128D or less)
26
+ * Suitable for: Image features, word2vec, GloVe embeddings
27
+ */
28
+ export const PRESET_LOW_DIM: HNSWPreset = {
29
+ name: 'low-dim',
30
+ description: 'Optimized for low-dimensional vectors (<=128D)',
31
+ M: 16,
32
+ efConstruction: 200,
33
+ efSearch: 100,
34
+ expectedRecall: 0.99,
35
+ targetDimensions: '<=128',
36
+ targetDatasetSize: '1K-100K',
37
+ };
38
+
39
+ /**
40
+ * Preset for medium-dimensional vectors (256-512D)
41
+ * Suitable for: Sentence embeddings, smaller transformer outputs
42
+ */
43
+ export const PRESET_MEDIUM_DIM: HNSWPreset = {
44
+ name: 'medium-dim',
45
+ description: 'Optimized for medium-dimensional vectors (256-512D)',
46
+ M: 24,
47
+ efConstruction: 200,
48
+ efSearch: 150,
49
+ expectedRecall: 0.97,
50
+ targetDimensions: '256-512',
51
+ targetDatasetSize: '1K-100K',
52
+ };
53
+
54
+ /**
55
+ * Preset for high-dimensional vectors (768D+)
56
+ * Suitable for: BERT, GPT embeddings, Cohere, OpenAI embeddings
57
+ * This is the recommended preset for RAG applications
58
+ *
59
+ * Benchmarked on Cohere Wikipedia 1024D (495K vectors):
60
+ * - efSearch=128: 99.2% recall, 168 QPS, 10.72ms P99
61
+ */
62
+ export const PRESET_HIGH_DIM: HNSWPreset = {
63
+ name: 'high-dim',
64
+ description: 'Optimized for high-dimensional vectors (768D+)',
65
+ M: 32,
66
+ efConstruction: 200,
67
+ efSearch: 128,
68
+ expectedRecall: 0.99,
69
+ targetDimensions: '>=768',
70
+ targetDatasetSize: '1K-500K',
71
+ };
72
+
73
+ /**
74
+ * Preset for very high-dimensional vectors (1536D+)
75
+ * Suitable for: OpenAI text-embedding-ada-002, text-embedding-3-large
76
+ *
77
+ * Scaled from PRESET_HIGH_DIM benchmarks (higher M for higher dimensions)
78
+ */
79
+ export const PRESET_VERY_HIGH_DIM: HNSWPreset = {
80
+ name: 'very-high-dim',
81
+ description: 'Optimized for very high-dimensional vectors (1536D+)',
82
+ M: 48,
83
+ efConstruction: 300,
84
+ efSearch: 150,
85
+ expectedRecall: 0.99,
86
+ targetDimensions: '>=1536',
87
+ targetDatasetSize: '1K-500K',
88
+ };
89
+
90
+ /**
91
+ * Preset for small datasets (<10K vectors)
92
+ * Prioritizes recall over speed since brute-force is viable
93
+ */
94
+ export const PRESET_SMALL_DATASET: HNSWPreset = {
95
+ name: 'small-dataset',
96
+ description: 'Optimized for small datasets (<10K vectors)',
97
+ M: 16,
98
+ efConstruction: 200,
99
+ efSearch: 200,
100
+ expectedRecall: 0.99,
101
+ targetDimensions: 'any',
102
+ targetDatasetSize: '<10K',
103
+ };
104
+
105
+ /**
106
+ * Preset for large datasets (100K-1M vectors)
107
+ * Balances recall with build time and memory
108
+ *
109
+ * Benchmarked on Cohere Wikipedia 1024D (495K vectors):
110
+ * - efSearch=128: 99.2% recall, 168 QPS
111
+ */
112
+ export const PRESET_LARGE_DATASET: HNSWPreset = {
113
+ name: 'large-dataset',
114
+ description: 'Optimized for large datasets (100K-1M vectors)',
115
+ M: 32,
116
+ efConstruction: 200,
117
+ efSearch: 128,
118
+ expectedRecall: 0.99,
119
+ targetDimensions: 'any',
120
+ targetDatasetSize: '100K-1M',
121
+ };
122
+
123
+ /**
124
+ * Preset for maximum recall (prioritizes accuracy over speed)
125
+ * Use when recall is critical and latency is acceptable
126
+ */
127
+ export const PRESET_MAX_RECALL: HNSWPreset = {
128
+ name: 'max-recall',
129
+ description: 'Maximum recall configuration',
130
+ M: 48,
131
+ efConstruction: 500,
132
+ efSearch: 400,
133
+ expectedRecall: 0.99,
134
+ targetDimensions: 'any',
135
+ targetDatasetSize: 'any',
136
+ };
137
+
138
+ /**
139
+ * Preset for minimum latency (prioritizes speed over recall)
140
+ * Use when latency is critical and 90% recall is acceptable
141
+ */
142
+ export const PRESET_LOW_LATENCY: HNSWPreset = {
143
+ name: 'low-latency',
144
+ description: 'Minimum latency configuration (90% recall)',
145
+ M: 12,
146
+ efConstruction: 100,
147
+ efSearch: 50,
148
+ expectedRecall: 0.90,
149
+ targetDimensions: 'any',
150
+ targetDatasetSize: 'any',
151
+ };
152
+
153
+ /**
154
+ * All available presets
155
+ */
156
+ export const PRESETS: Record<string, HNSWPreset> = {
157
+ 'low-dim': PRESET_LOW_DIM,
158
+ 'medium-dim': PRESET_MEDIUM_DIM,
159
+ 'high-dim': PRESET_HIGH_DIM,
160
+ 'very-high-dim': PRESET_VERY_HIGH_DIM,
161
+ 'small-dataset': PRESET_SMALL_DATASET,
162
+ 'large-dataset': PRESET_LARGE_DATASET,
163
+ 'max-recall': PRESET_MAX_RECALL,
164
+ 'low-latency': PRESET_LOW_LATENCY,
165
+ };
166
+
167
+ /**
168
+ * Get recommended preset based on dimension and dataset size
169
+ *
170
+ * For high-dimensional vectors (768D+), dimension takes priority over dataset size
171
+ * because recall degrades significantly without higher M values.
172
+ */
173
+ export function getRecommendedPreset(dimension: number, datasetSize?: number): HNSWPreset {
174
+ // For high-dimensional vectors, always use dimension-based presets
175
+ // (dimension matters more than dataset size for recall)
176
+ // Note: Check 1536 BEFORE 768 since 1536 >= 768 would match first
177
+ if (dimension >= 1536) return PRESET_VERY_HIGH_DIM;
178
+ if (dimension >= 768) return PRESET_HIGH_DIM;
179
+
180
+ // For lower dimensions, consider dataset size
181
+ if (datasetSize !== undefined) {
182
+ if (datasetSize < 10000) return PRESET_SMALL_DATASET;
183
+ if (datasetSize > 100000) return PRESET_LARGE_DATASET;
184
+ }
185
+
186
+ // Dimension-based selection for medium dimensions
187
+ if (dimension <= 128) return PRESET_LOW_DIM;
188
+ if (dimension <= 512) return PRESET_MEDIUM_DIM;
189
+
190
+ return PRESET_HIGH_DIM;
191
+ }
192
+
193
+ /**
194
+ * Get preset by name
195
+ */
196
+ export function getPreset(name: string): HNSWPreset | undefined {
197
+ return PRESETS[name];
198
+ }
199
+
200
+ /**
201
+ * RAG-specific preset recommendation
202
+ * For typical RAG applications using popular embedding models
203
+ */
204
+ export function getRAGPreset(embeddingModel: string): HNSWPreset {
205
+ const model = embeddingModel.toLowerCase();
206
+
207
+ // OpenAI models
208
+ if (model.includes('ada-002') || model.includes('text-embedding-3')) {
209
+ return PRESET_VERY_HIGH_DIM;
210
+ }
211
+
212
+ // Cohere models
213
+ if (model.includes('cohere') || model.includes('embed-')) {
214
+ return PRESET_HIGH_DIM;
215
+ }
216
+
217
+ // BERT/Sentence Transformers
218
+ if (model.includes('bert') || model.includes('minilm') || model.includes('mpnet')) {
219
+ return PRESET_HIGH_DIM;
220
+ }
221
+
222
+ // E5 models
223
+ if (model.includes('e5-')) {
224
+ return model.includes('large') ? PRESET_HIGH_DIM : PRESET_MEDIUM_DIM;
225
+ }
226
+
227
+ // Default to high-dim for unknown models (most modern embeddings are 768D+)
228
+ return PRESET_HIGH_DIM;
229
+ }
@@ -0,0 +1,383 @@
1
+ /**
2
+ * Scalar Quantizer for Int8 vector compression
3
+ *
4
+ * Provides 4x memory reduction by converting Float32 vectors to Int8.
5
+ * Expected performance:
6
+ * - Memory: 4x reduction (32 bits → 8 bits per dimension)
7
+ * - Speed: ~3.5x faster distance calculations (smaller data, better cache)
8
+ * - Recall: <2% loss compared to float32
9
+ *
10
+ * Uses range quantization: maps [min, max] to [-128, 127]
11
+ */
12
+
13
+ export interface QuantizationParams {
14
+ min: Float32Array; // Per-dimension minimum
15
+ max: Float32Array; // Per-dimension maximum
16
+ scale: Float32Array; // Per-dimension scale factor
17
+ offset: Float32Array; // Per-dimension offset
18
+ }
19
+
20
+ export class ScalarQuantizer {
21
+ private dimension: number;
22
+ private params: QuantizationParams | null = null;
23
+ private trained: boolean = false;
24
+
25
+ constructor(dimension: number) {
26
+ this.dimension = dimension;
27
+ }
28
+
29
+ /**
30
+ * Train the quantizer on a set of vectors to determine optimal range
31
+ */
32
+ train(vectors: Float32Array[]): void {
33
+ if (vectors.length === 0) {
34
+ throw new Error('Cannot train quantizer with empty vector set');
35
+ }
36
+
37
+ const dim = this.dimension;
38
+ const min = new Float32Array(dim).fill(Infinity);
39
+ const max = new Float32Array(dim).fill(-Infinity);
40
+
41
+ // Find per-dimension min/max
42
+ for (const vector of vectors) {
43
+ for (let d = 0; d < dim; d++) {
44
+ if (vector[d] < min[d]) min[d] = vector[d];
45
+ if (vector[d] > max[d]) max[d] = vector[d];
46
+ }
47
+ }
48
+
49
+ // Compute scale and offset for mapping to [-128, 127]
50
+ const scale = new Float32Array(dim);
51
+ const offset = new Float32Array(dim);
52
+
53
+ for (let d = 0; d < dim; d++) {
54
+ const range = max[d] - min[d];
55
+ if (range === 0) {
56
+ // Handle constant dimensions
57
+ scale[d] = 1;
58
+ offset[d] = min[d];
59
+ } else {
60
+ // Map [min, max] to [-128, 127]
61
+ scale[d] = 255 / range;
62
+ offset[d] = min[d];
63
+ }
64
+ }
65
+
66
+ this.params = { min, max, scale, offset };
67
+ this.trained = true;
68
+ }
69
+
70
+ /**
71
+ * Get training status
72
+ */
73
+ isTrained(): boolean {
74
+ return this.trained;
75
+ }
76
+
77
+ /**
78
+ * Get quantization parameters
79
+ */
80
+ getParams(): QuantizationParams | null {
81
+ return this.params;
82
+ }
83
+
84
+ /**
85
+ * Set quantization parameters (for loading saved quantizer)
86
+ */
87
+ setParams(params: QuantizationParams): void {
88
+ this.params = params;
89
+ this.trained = true;
90
+ }
91
+
92
+ /**
93
+ * Quantize a single float32 vector to int8
94
+ */
95
+ quantize(vector: Float32Array): Int8Array {
96
+ if (!this.params) {
97
+ throw new Error('Quantizer not trained');
98
+ }
99
+
100
+ const dim = this.dimension;
101
+ const result = new Int8Array(dim);
102
+ const { scale, offset } = this.params;
103
+
104
+ for (let d = 0; d < dim; d++) {
105
+ // Map to [0, 255] then shift to [-128, 127]
106
+ const normalized = (vector[d] - offset[d]) * scale[d];
107
+ result[d] = Math.max(-128, Math.min(127, Math.round(normalized - 128)));
108
+ }
109
+
110
+ return result;
111
+ }
112
+
113
+ /**
114
+ * Quantize multiple vectors
115
+ */
116
+ quantizeBatch(vectors: Float32Array[]): Int8Array[] {
117
+ const result = new Array<Int8Array>(vectors.length);
118
+ for (let i = 0; i < vectors.length; i++) {
119
+ result[i] = this.quantize(vectors[i]);
120
+ }
121
+ return result;
122
+ }
123
+
124
+ /**
125
+ * Dequantize an int8 vector back to float32 (for rescoring)
126
+ */
127
+ dequantize(vector: Int8Array): Float32Array {
128
+ if (!this.params) {
129
+ throw new Error('Quantizer not trained');
130
+ }
131
+
132
+ const dim = this.dimension;
133
+ const result = new Float32Array(dim);
134
+ const { scale, offset } = this.params;
135
+
136
+ for (let d = 0; d < dim; d++) {
137
+ // Reverse the quantization
138
+ result[d] = ((vector[d] + 128) / scale[d]) + offset[d];
139
+ }
140
+
141
+ return result;
142
+ }
143
+
144
+ /**
145
+ * Serialize quantization parameters for saving
146
+ */
147
+ serialize(): ArrayBuffer {
148
+ if (!this.params) {
149
+ throw new Error('Quantizer not trained');
150
+ }
151
+
152
+ const dim = this.dimension;
153
+ // 4 bytes header (dimension) + 4 * dim * 4 bytes (4 float arrays)
154
+ const buffer = new ArrayBuffer(4 + 4 * dim * 4);
155
+ const view = new DataView(buffer);
156
+
157
+ view.setInt32(0, dim, true);
158
+
159
+ let offset = 4;
160
+ for (const arr of [this.params.min, this.params.max, this.params.scale, this.params.offset]) {
161
+ for (let d = 0; d < dim; d++) {
162
+ view.setFloat32(offset, arr[d], true);
163
+ offset += 4;
164
+ }
165
+ }
166
+
167
+ return buffer;
168
+ }
169
+
170
+ /**
171
+ * Load quantization parameters
172
+ */
173
+ static deserialize(buffer: ArrayBuffer): ScalarQuantizer {
174
+ const view = new DataView(buffer);
175
+ const dim = view.getInt32(0, true);
176
+
177
+ const quantizer = new ScalarQuantizer(dim);
178
+ const min = new Float32Array(dim);
179
+ const max = new Float32Array(dim);
180
+ const scale = new Float32Array(dim);
181
+ const offsetArr = new Float32Array(dim);
182
+
183
+ let offset = 4;
184
+ for (const arr of [min, max, scale, offsetArr]) {
185
+ for (let d = 0; d < dim; d++) {
186
+ arr[d] = view.getFloat32(offset, true);
187
+ offset += 4;
188
+ }
189
+ }
190
+
191
+ quantizer.setParams({ min, max, scale, offset: offsetArr });
192
+ return quantizer;
193
+ }
194
+ }
195
+
196
+ /**
197
+ * Fast Int8 distance calculations
198
+ * These are optimized for quantized vectors and provide significant speedup
199
+ */
200
+
201
+ /**
202
+ * Compute dot product between two Int8 vectors
203
+ * Uses 8-wide unrolling for better ILP (instruction-level parallelism)
204
+ */
205
+ export function dotProductInt8(a: Int8Array, b: Int8Array): number {
206
+ const len = a.length;
207
+ let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
208
+ let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
209
+ let i = 0;
210
+
211
+ // 8-wide unrolling for high-dimensional vectors
212
+ const limit8 = len - 7;
213
+ for (; i < limit8; i += 8) {
214
+ sum0 += a[i] * b[i];
215
+ sum1 += a[i + 1] * b[i + 1];
216
+ sum2 += a[i + 2] * b[i + 2];
217
+ sum3 += a[i + 3] * b[i + 3];
218
+ sum4 += a[i + 4] * b[i + 4];
219
+ sum5 += a[i + 5] * b[i + 5];
220
+ sum6 += a[i + 6] * b[i + 6];
221
+ sum7 += a[i + 7] * b[i + 7];
222
+ }
223
+
224
+ // Handle remaining elements
225
+ for (; i < len; i++) {
226
+ sum0 += a[i] * b[i];
227
+ }
228
+
229
+ return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
230
+ }
231
+
232
+ /**
233
+ * Compute L2 squared distance between two Int8 vectors
234
+ * Uses 8-wide unrolling for better ILP
235
+ */
236
+ export function l2SquaredInt8(a: Int8Array, b: Int8Array): number {
237
+ const len = a.length;
238
+ let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
239
+ let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
240
+ let i = 0;
241
+
242
+ // 8-wide unrolling for high-dimensional vectors
243
+ const limit8 = len - 7;
244
+ for (; i < limit8; i += 8) {
245
+ const d0 = a[i] - b[i];
246
+ const d1 = a[i + 1] - b[i + 1];
247
+ const d2 = a[i + 2] - b[i + 2];
248
+ const d3 = a[i + 3] - b[i + 3];
249
+ const d4 = a[i + 4] - b[i + 4];
250
+ const d5 = a[i + 5] - b[i + 5];
251
+ const d6 = a[i + 6] - b[i + 6];
252
+ const d7 = a[i + 7] - b[i + 7];
253
+ sum0 += d0 * d0;
254
+ sum1 += d1 * d1;
255
+ sum2 += d2 * d2;
256
+ sum3 += d3 * d3;
257
+ sum4 += d4 * d4;
258
+ sum5 += d5 * d5;
259
+ sum6 += d6 * d6;
260
+ sum7 += d7 * d7;
261
+ }
262
+
263
+ // Handle remaining elements
264
+ for (; i < len; i++) {
265
+ const d = a[i] - b[i];
266
+ sum0 += d * d;
267
+ }
268
+
269
+ return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
270
+ }
271
+
272
+ /**
273
+ * Compute approximate cosine distance for Int8 vectors
274
+ * Note: This is approximate because quantization changes magnitude
275
+ * Uses 8-wide unrolling with separate accumulators for better ILP
276
+ */
277
+ export function cosineDistanceInt8(a: Int8Array, b: Int8Array): number {
278
+ const len = a.length;
279
+ // Use separate accumulators for better ILP
280
+ let dot0 = 0, dot1 = 0, dot2 = 0, dot3 = 0;
281
+ let normA0 = 0, normA1 = 0, normA2 = 0, normA3 = 0;
282
+ let normB0 = 0, normB1 = 0, normB2 = 0, normB3 = 0;
283
+
284
+ let i = 0;
285
+ const limit8 = len - 7;
286
+ for (; i < limit8; i += 8) {
287
+ dot0 += a[i] * b[i] + a[i + 4] * b[i + 4];
288
+ dot1 += a[i + 1] * b[i + 1] + a[i + 5] * b[i + 5];
289
+ dot2 += a[i + 2] * b[i + 2] + a[i + 6] * b[i + 6];
290
+ dot3 += a[i + 3] * b[i + 3] + a[i + 7] * b[i + 7];
291
+ normA0 += a[i] * a[i] + a[i + 4] * a[i + 4];
292
+ normA1 += a[i + 1] * a[i + 1] + a[i + 5] * a[i + 5];
293
+ normA2 += a[i + 2] * a[i + 2] + a[i + 6] * a[i + 6];
294
+ normA3 += a[i + 3] * a[i + 3] + a[i + 7] * a[i + 7];
295
+ normB0 += b[i] * b[i] + b[i + 4] * b[i + 4];
296
+ normB1 += b[i + 1] * b[i + 1] + b[i + 5] * b[i + 5];
297
+ normB2 += b[i + 2] * b[i + 2] + b[i + 6] * b[i + 6];
298
+ normB3 += b[i + 3] * b[i + 3] + b[i + 7] * b[i + 7];
299
+ }
300
+
301
+ // Handle remaining elements
302
+ for (; i < len; i++) {
303
+ dot0 += a[i] * b[i];
304
+ normA0 += a[i] * a[i];
305
+ normB0 += b[i] * b[i];
306
+ }
307
+
308
+ const dot = dot0 + dot1 + dot2 + dot3;
309
+ const normA = normA0 + normA1 + normA2 + normA3;
310
+ const normB = normB0 + normB1 + normB2 + normB3;
311
+
312
+ const magnitude = Math.sqrt(normA * normB);
313
+ if (magnitude === 0) return 1;
314
+
315
+ const distance = 1 - (dot / magnitude);
316
+ return distance < 1e-10 ? 0 : distance;
317
+ }
318
+
319
+ /**
320
+ * QuantizedVectorStore - Efficient storage for quantized vectors
321
+ */
322
+ export class QuantizedVectorStore {
323
+ private quantizer: ScalarQuantizer;
324
+ private vectors: Int8Array[]; // Quantized vectors
325
+ private originalVectors: Float32Array[] | null; // Keep originals for rescoring
326
+ private keepOriginals: boolean;
327
+
328
+ constructor(dimension: number, keepOriginals = true) {
329
+ this.quantizer = new ScalarQuantizer(dimension);
330
+ this.vectors = [];
331
+ this.originalVectors = keepOriginals ? [] : null;
332
+ this.keepOriginals = keepOriginals;
333
+ }
334
+
335
+ /**
336
+ * Train the quantizer and add vectors
337
+ */
338
+ addVectors(vectors: Float32Array[]): void {
339
+ if (!this.quantizer.isTrained()) {
340
+ this.quantizer.train(vectors);
341
+ }
342
+
343
+ for (const v of vectors) {
344
+ this.vectors.push(this.quantizer.quantize(v));
345
+ if (this.keepOriginals && this.originalVectors) {
346
+ this.originalVectors.push(v);
347
+ }
348
+ }
349
+ }
350
+
351
+ /**
352
+ * Get quantized vector by index
353
+ */
354
+ getQuantized(index: number): Int8Array {
355
+ return this.vectors[index];
356
+ }
357
+
358
+ /**
359
+ * Get original float32 vector by index (for rescoring)
360
+ */
361
+ getOriginal(index: number): Float32Array | null {
362
+ if (!this.originalVectors) return null;
363
+ return this.originalVectors[index];
364
+ }
365
+
366
+ /**
367
+ * Get number of vectors
368
+ */
369
+ size(): number {
370
+ return this.vectors.length;
371
+ }
372
+
373
+ /**
374
+ * Calculate memory usage in bytes
375
+ */
376
+ memoryUsage(): { quantized: number; original: number; total: number } {
377
+ const quantized = this.vectors.reduce((sum, v) => sum + v.length, 0);
378
+ const original = this.originalVectors
379
+ ? this.originalVectors.reduce((sum, v) => sum + v.length * 4, 0)
380
+ : 0;
381
+ return { quantized, original, total: quantized + original };
382
+ }
383
+ }