verso-db 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/CHANGELOG.md +46 -0
  2. package/LICENSE +21 -0
  3. package/README.md +252 -0
  4. package/dist/BinaryHeap.d.ts +25 -0
  5. package/dist/BinaryHeap.d.ts.map +1 -0
  6. package/dist/Collection.d.ts +156 -0
  7. package/dist/Collection.d.ts.map +1 -0
  8. package/dist/HNSWIndex.d.ts +357 -0
  9. package/dist/HNSWIndex.d.ts.map +1 -0
  10. package/dist/MaxBinaryHeap.d.ts +63 -0
  11. package/dist/MaxBinaryHeap.d.ts.map +1 -0
  12. package/dist/Storage.d.ts +54 -0
  13. package/dist/Storage.d.ts.map +1 -0
  14. package/dist/VectorDB.d.ts +44 -0
  15. package/dist/VectorDB.d.ts.map +1 -0
  16. package/dist/backends/DistanceBackend.d.ts +5 -0
  17. package/dist/backends/DistanceBackend.d.ts.map +1 -0
  18. package/dist/backends/JsDistanceBackend.d.ts +37 -0
  19. package/dist/backends/JsDistanceBackend.d.ts.map +1 -0
  20. package/dist/encoding/DeltaEncoder.d.ts +61 -0
  21. package/dist/encoding/DeltaEncoder.d.ts.map +1 -0
  22. package/dist/errors.d.ts +58 -0
  23. package/dist/errors.d.ts.map +1 -0
  24. package/dist/index.d.ts +64 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +3732 -0
  27. package/dist/presets.d.ts +91 -0
  28. package/dist/presets.d.ts.map +1 -0
  29. package/dist/quantization/ScalarQuantizer.d.ts +114 -0
  30. package/dist/quantization/ScalarQuantizer.d.ts.map +1 -0
  31. package/dist/storage/BatchWriter.d.ts +104 -0
  32. package/dist/storage/BatchWriter.d.ts.map +1 -0
  33. package/dist/storage/BunStorageBackend.d.ts +58 -0
  34. package/dist/storage/BunStorageBackend.d.ts.map +1 -0
  35. package/dist/storage/MemoryBackend.d.ts +44 -0
  36. package/dist/storage/MemoryBackend.d.ts.map +1 -0
  37. package/dist/storage/OPFSBackend.d.ts +59 -0
  38. package/dist/storage/OPFSBackend.d.ts.map +1 -0
  39. package/dist/storage/StorageBackend.d.ts +66 -0
  40. package/dist/storage/StorageBackend.d.ts.map +1 -0
  41. package/dist/storage/WriteAheadLog.d.ts +111 -0
  42. package/dist/storage/WriteAheadLog.d.ts.map +1 -0
  43. package/dist/storage/createStorageBackend.d.ts +40 -0
  44. package/dist/storage/createStorageBackend.d.ts.map +1 -0
  45. package/dist/storage/index.d.ts +30 -0
  46. package/dist/storage/index.d.ts.map +1 -0
  47. package/package.json +98 -0
  48. package/src/BinaryHeap.ts +131 -0
  49. package/src/Collection.ts +695 -0
  50. package/src/HNSWIndex.ts +1839 -0
  51. package/src/MaxBinaryHeap.ts +175 -0
  52. package/src/Storage.ts +435 -0
  53. package/src/VectorDB.ts +109 -0
  54. package/src/backends/DistanceBackend.ts +17 -0
  55. package/src/backends/JsDistanceBackend.ts +227 -0
  56. package/src/encoding/DeltaEncoder.ts +217 -0
  57. package/src/errors.ts +110 -0
  58. package/src/index.ts +138 -0
  59. package/src/presets.ts +229 -0
  60. package/src/quantization/ScalarQuantizer.ts +383 -0
  61. package/src/storage/BatchWriter.ts +336 -0
  62. package/src/storage/BunStorageBackend.ts +161 -0
  63. package/src/storage/MemoryBackend.ts +120 -0
  64. package/src/storage/OPFSBackend.ts +250 -0
  65. package/src/storage/StorageBackend.ts +74 -0
  66. package/src/storage/WriteAheadLog.ts +326 -0
  67. package/src/storage/createStorageBackend.ts +137 -0
  68. package/src/storage/index.ts +53 -0
@@ -0,0 +1,1839 @@
1
+ // Bun-native file operations - no fs import needed
2
+ import type { DistanceBackend } from './backends/DistanceBackend';
3
+ import { JsDistanceBackend, dotProductFast, l2SquaredFast } from './backends/JsDistanceBackend';
4
+ import { BinaryHeap } from './BinaryHeap';
5
+ import { MaxBinaryHeap } from './MaxBinaryHeap';
6
+ import { ScalarQuantizer, l2SquaredInt8, cosineDistanceInt8 } from './quantization/ScalarQuantizer';
7
+ import { deltaEncodeNeighbors, deltaDecodeNeighbors, deltaEncodedSize } from './encoding/DeltaEncoder';
8
+ export type DistanceMetric = 'cosine' | 'euclidean' | 'dot_product';
9
+
10
+ export interface Node {
11
+ id: number;
12
+ level: number;
13
+ vector: Float32Array;
14
+ neighbors: number[][]; // neighbors[layer][neighbor_index] = neighbor_id
15
+ }
16
+
17
+ export class HNSWIndex {
18
+ private M: number; // Max number of connections per node per level
19
+ private M0: number; // Max number of connections for level 0 (typically M * 2)
20
+ private efConstruction: number; // Size of candidate list during construction
21
+ private levelMult: number; // Probability multiplier for level selection
22
+ private maxLevel: number;
23
+ private entryPointId: number;
24
+ // OPTIMIZATION: Use array instead of Map for O(1) indexed access (3-5x faster than Map.get)
25
+ private nodes: (Node | undefined)[];
26
+ private nodeCount: number = 0;
27
+ private dimension: number;
28
+ private metric: DistanceMetric;
29
+ private maxLayers: number;
30
+ private distanceBackend: DistanceBackend;
31
+
32
+ // OPTIMIZATION: Flat vector storage for cache-friendly batch distance calculations
33
+ // All vectors stored contiguously: [v0_d0, v0_d1, ..., v1_d0, v1_d1, ...]
34
+ private flatVectors: Float32Array;
35
+ private flatVectorsCapacity: number = 0;
36
+
37
+ // Heap and scratch buffers for reuse
38
+ private searchHeap: BinaryHeap;
39
+ // TypedArray for fast visited tracking - much faster than Set<number>
40
+ private visitedArray: Uint8Array;
41
+ private visitedArraySize: number;
42
+ private visitedGeneration: number = 0; // Increment to "clear" without filling
43
+ // Reusable heaps for searchLayer - avoids allocation on every call
44
+ private candidatesHeap: BinaryHeap;
45
+ private resultsHeap: MaxBinaryHeap;
46
+ private selectionHeap: BinaryHeap; // Reusable heap for selectNeighbors
47
+ private heapCapacity: number;
48
+ // Pre-normalization optimization for cosine distance
49
+ private vectorsAreNormalized: boolean = false;
50
+ // Cached distance function to avoid switch overhead
51
+ private distanceFn: (a: Float32Array, b: Float32Array) => number;
52
+
53
+ // Quantization support for 3-4x faster search with Int8
54
+ private scalarQuantizer: ScalarQuantizer | null = null;
55
+ // OPTIMIZATION: Use array instead of Map for int8 vectors too
56
+ private int8Vectors: (Int8Array | undefined)[] = [];
57
+ private quantizationEnabled: boolean = false;
58
+
59
+ // Lazy loading support (v3+ format)
60
+ private lazyLoadEnabled: boolean = false;
61
+ private vectorOffsets: Map<number, number> = new Map(); // nodeId -> byte offset in file
62
+ private vectorBuffer: ArrayBuffer | null = null; // Cached buffer for lazy loading
63
+ private vectorsLoaded: Set<number> = new Set(); // Track which vectors are loaded
64
+
65
+ // Reusable query buffer for search operations - avoids allocation per query
66
+ // Profiling showed 17% improvement with buffer reuse (99.5ms → 82.7ms for 1000 queries)
67
+ private queryNormBuffer: Float32Array;
68
+
69
+ // Bulk construction optimization - O(1) neighbor lookup during construction
70
+ // Uses parallel Set<number>[] alongside neighbor arrays for fast membership testing
71
+ // Memory is released after construction completes
72
+ private neighborSets: Map<number, Set<number>[]> = new Map();
73
+ private constructionMode: boolean = false;
74
+
75
+ constructor(dimension: number, metric: DistanceMetric = 'cosine', M = 24, efConstruction = 200, distanceBackend?: DistanceBackend) {
76
+ this.dimension = dimension;
77
+ this.metric = metric;
78
+ this.M = M;
79
+ this.M0 = M * 2;
80
+ this.efConstruction = efConstruction;
81
+ this.levelMult = 1 / Math.log(M);
82
+ this.maxLevel = -1;
83
+ this.entryPointId = -1;
84
+ // OPTIMIZATION: Pre-allocate node array with initial capacity
85
+ const initialCapacity = 10000;
86
+ this.nodes = new Array(initialCapacity);
87
+ this.nodeCount = 0;
88
+ // OPTIMIZATION: Pre-allocate flat vector storage
89
+ this.flatVectorsCapacity = initialCapacity;
90
+ this.flatVectors = new Float32Array(initialCapacity * dimension);
91
+ this.maxLayers = 32; // Maximum possible layers to pre-allocate
92
+ this.distanceBackend = distanceBackend ?? new JsDistanceBackend();
93
+
94
+ // Initialize heap and scratch buffers
95
+ this.searchHeap = new BinaryHeap(1000); // Initial capacity
96
+
97
+ // Initialize visited tracking with TypedArray for speed
98
+ // Start with reasonable size, will grow as needed
99
+ this.visitedArraySize = 10000;
100
+ this.visitedArray = new Uint8Array(this.visitedArraySize);
101
+ this.visitedGeneration = 1;
102
+
103
+ // Pre-allocate searchLayer heaps - sized for typical ef values
104
+ // Will be resized if needed for larger ef
105
+ this.heapCapacity = Math.max(efConstruction * 2, 500);
106
+ this.candidatesHeap = new BinaryHeap(this.heapCapacity);
107
+ this.resultsHeap = new MaxBinaryHeap(this.heapCapacity);
108
+ // Selection heap sized for M0 (largest neighbor list size)
109
+ this.selectionHeap = new BinaryHeap(Math.max(M * 2, efConstruction));
110
+
111
+ // For cosine metric, we pre-normalize vectors for faster distance computation
112
+ this.vectorsAreNormalized = (metric === 'cosine');
113
+
114
+ // Pre-allocate query normalization buffer - reused across all searches
115
+ this.queryNormBuffer = new Float32Array(dimension);
116
+
117
+ // Initialize cached distance function based on metric
118
+ // This avoids switch statement overhead on every distance calculation
119
+ if (metric === 'cosine') {
120
+ // For cosine with pre-normalized vectors, just compute 1 - dot product
121
+ this.distanceFn = (a: Float32Array, b: Float32Array): number => {
122
+ const dot = dotProductFast(a, b);
123
+ const distance = 1 - dot;
124
+ return distance < 1e-10 ? 0 : distance;
125
+ };
126
+ } else if (metric === 'euclidean') {
127
+ this.distanceFn = (a: Float32Array, b: Float32Array): number => {
128
+ return Math.sqrt(l2SquaredFast(a, b));
129
+ };
130
+ } else if (metric === 'dot_product') {
131
+ this.distanceFn = (a: Float32Array, b: Float32Array): number => {
132
+ return -dotProductFast(a, b);
133
+ };
134
+ } else {
135
+ throw new Error(`Unsupported metric: ${metric}`);
136
+ }
137
+ }
138
+
139
+ // ============================================
140
+ // OPTIMIZATION: Capacity and flat storage helpers
141
+ // ============================================
142
+
143
+ /**
144
+ * Ensure node array and flat vector storage have enough capacity
145
+ */
146
+ private ensureCapacity(minCapacity: number): void {
147
+ // Grow node array if needed
148
+ if (minCapacity > this.nodes.length) {
149
+ const newCapacity = Math.max(this.nodes.length * 2, minCapacity);
150
+ const newNodes = new Array(newCapacity);
151
+ for (let i = 0; i < this.nodeCount; i++) {
152
+ newNodes[i] = this.nodes[i];
153
+ }
154
+ this.nodes = newNodes;
155
+ }
156
+
157
+ // Grow flat vector storage if needed
158
+ if (minCapacity > this.flatVectorsCapacity) {
159
+ const newCapacity = Math.max(this.flatVectorsCapacity * 2, minCapacity);
160
+ const newFlatVectors = new Float32Array(newCapacity * this.dimension);
161
+ newFlatVectors.set(this.flatVectors);
162
+ this.flatVectors = newFlatVectors;
163
+ this.flatVectorsCapacity = newCapacity;
164
+ }
165
+ }
166
+
167
+ /**
168
+ * Get vector from flat storage by node ID
169
+ * Returns a subarray view (no copy) for efficiency
170
+ */
171
+ private getFlatVector(nodeId: number): Float32Array {
172
+ const offset = nodeId * this.dimension;
173
+ return this.flatVectors.subarray(offset, offset + this.dimension);
174
+ }
175
+
176
+ /**
177
+ * Set vector in flat storage
178
+ */
179
+ private setFlatVector(nodeId: number, vector: Float32Array): void {
180
+ const offset = nodeId * this.dimension;
181
+ this.flatVectors.set(vector, offset);
182
+ }
183
+
184
+ /**
185
+ * Get node by ID (O(1) array access)
186
+ */
187
+ private getNode(id: number): Node | undefined {
188
+ return this.nodes[id];
189
+ }
190
+
191
+ /**
192
+ * Set node by ID
193
+ */
194
+ private setNode(node: Node): void {
195
+ const id = node.id;
196
+ this.ensureCapacity(id + 1);
197
+ this.nodes[id] = node;
198
+ // Store vector in flat storage too
199
+ this.setFlatVector(id, node.vector);
200
+ // Track node count
201
+ if (id >= this.nodeCount) {
202
+ this.nodeCount = id + 1;
203
+ }
204
+ }
205
+
206
+ // OPTIMIZATION: Reusable arrays for batch distance calculation
207
+ private batchNeighborIds: number[] = [];
208
+ private batchDistances: number[] = [];
209
+
210
+ /**
211
+ * OPTIMIZATION: Batch distance calculation for better cache locality
212
+ * Computes distances from query to multiple neighbors at once
213
+ * Uses flat vector storage for contiguous memory access
214
+ */
215
+ private calculateDistancesBatch(
216
+ query: Float32Array,
217
+ neighborIds: number[],
218
+ outDistances: number[]
219
+ ): void {
220
+ const dim = this.dimension;
221
+ const flatVectors = this.flatVectors;
222
+
223
+ for (let i = 0; i < neighborIds.length; i++) {
224
+ const neighborId = neighborIds[i];
225
+ const offset = neighborId * dim;
226
+
227
+ // Inline distance calculation for better performance
228
+ // This avoids function call overhead per neighbor
229
+ if (this.metric === 'cosine') {
230
+ // Pre-normalized vectors: distance = 1 - dot(a, b)
231
+ let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
232
+ let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
233
+ let d = 0;
234
+ const limit8 = dim - 7;
235
+
236
+ for (; d < limit8; d += 8) {
237
+ sum0 += flatVectors[offset + d] * query[d];
238
+ sum1 += flatVectors[offset + d + 1] * query[d + 1];
239
+ sum2 += flatVectors[offset + d + 2] * query[d + 2];
240
+ sum3 += flatVectors[offset + d + 3] * query[d + 3];
241
+ sum4 += flatVectors[offset + d + 4] * query[d + 4];
242
+ sum5 += flatVectors[offset + d + 5] * query[d + 5];
243
+ sum6 += flatVectors[offset + d + 6] * query[d + 6];
244
+ sum7 += flatVectors[offset + d + 7] * query[d + 7];
245
+ }
246
+ for (; d < dim; d++) {
247
+ sum0 += flatVectors[offset + d] * query[d];
248
+ }
249
+
250
+ const dot = sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
251
+ const dist = 1 - dot;
252
+ outDistances[i] = dist < 1e-10 ? 0 : dist;
253
+ } else if (this.metric === 'euclidean') {
254
+ // L2 squared distance
255
+ let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
256
+ let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
257
+ let d = 0;
258
+ const limit8 = dim - 7;
259
+
260
+ for (; d < limit8; d += 8) {
261
+ const d0 = flatVectors[offset + d] - query[d];
262
+ const d1 = flatVectors[offset + d + 1] - query[d + 1];
263
+ const d2 = flatVectors[offset + d + 2] - query[d + 2];
264
+ const d3 = flatVectors[offset + d + 3] - query[d + 3];
265
+ const d4 = flatVectors[offset + d + 4] - query[d + 4];
266
+ const d5 = flatVectors[offset + d + 5] - query[d + 5];
267
+ const d6 = flatVectors[offset + d + 6] - query[d + 6];
268
+ const d7 = flatVectors[offset + d + 7] - query[d + 7];
269
+ sum0 += d0 * d0;
270
+ sum1 += d1 * d1;
271
+ sum2 += d2 * d2;
272
+ sum3 += d3 * d3;
273
+ sum4 += d4 * d4;
274
+ sum5 += d5 * d5;
275
+ sum6 += d6 * d6;
276
+ sum7 += d7 * d7;
277
+ }
278
+ for (; d < dim; d++) {
279
+ const diff = flatVectors[offset + d] - query[d];
280
+ sum0 += diff * diff;
281
+ }
282
+
283
+ outDistances[i] = Math.sqrt(sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7);
284
+ } else {
285
+ // dot_product: negative dot product
286
+ let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
287
+ let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
288
+ let d = 0;
289
+ const limit8 = dim - 7;
290
+
291
+ for (; d < limit8; d += 8) {
292
+ sum0 += flatVectors[offset + d] * query[d];
293
+ sum1 += flatVectors[offset + d + 1] * query[d + 1];
294
+ sum2 += flatVectors[offset + d + 2] * query[d + 2];
295
+ sum3 += flatVectors[offset + d + 3] * query[d + 3];
296
+ sum4 += flatVectors[offset + d + 4] * query[d + 4];
297
+ sum5 += flatVectors[offset + d + 5] * query[d + 5];
298
+ sum6 += flatVectors[offset + d + 6] * query[d + 6];
299
+ sum7 += flatVectors[offset + d + 7] * query[d + 7];
300
+ }
301
+ for (; d < dim; d++) {
302
+ sum0 += flatVectors[offset + d] * query[d];
303
+ }
304
+
305
+ outDistances[i] = -(sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7);
306
+ }
307
+ }
308
+ }
309
+
310
+ /**
311
+ * Check if a node has been visited in the current search.
312
+ * Uses generation counting to avoid clearing the array.
313
+ */
314
+ private isVisited(id: number): boolean {
315
+ if (id >= this.visitedArraySize) {
316
+ return false; // Not in array = not visited
317
+ }
318
+ return this.visitedArray[id] === this.visitedGeneration;
319
+ }
320
+
321
+ /**
322
+ * Mark a node as visited in the current search.
323
+ * Grows the array if needed.
324
+ */
325
+ private markVisited(id: number): void {
326
+ if (id >= this.visitedArraySize) {
327
+ // Grow array to accommodate larger IDs
328
+ const newSize = Math.max(this.visitedArraySize * 2, id + 1000);
329
+ const newArray = new Uint8Array(newSize);
330
+ newArray.set(this.visitedArray);
331
+ this.visitedArray = newArray;
332
+ this.visitedArraySize = newSize;
333
+ }
334
+ this.visitedArray[id] = this.visitedGeneration;
335
+ }
336
+
337
+ /**
338
+ * Clear all visited markers by incrementing the generation.
339
+ * Much faster than filling the array with zeros.
340
+ */
341
+ private clearVisited(): void {
342
+ this.visitedGeneration++;
343
+ // Wrap around to avoid overflow (255 is max for Uint8)
344
+ if (this.visitedGeneration > 250) {
345
+ this.visitedArray.fill(0);
346
+ this.visitedGeneration = 1;
347
+ }
348
+ }
349
+
350
+ private normalizeVector(vector: Float32Array): Float32Array {
351
+ const len = vector.length;
352
+ // Use 8 accumulators for better ILP
353
+ let s0 = 0, s1 = 0, s2 = 0, s3 = 0;
354
+ let s4 = 0, s5 = 0, s6 = 0, s7 = 0;
355
+ let i = 0;
356
+ const limit8 = len - 7;
357
+
358
+ // 8-wide unrolling for norm computation
359
+ for (; i < limit8; i += 8) {
360
+ s0 += vector[i] * vector[i];
361
+ s1 += vector[i + 1] * vector[i + 1];
362
+ s2 += vector[i + 2] * vector[i + 2];
363
+ s3 += vector[i + 3] * vector[i + 3];
364
+ s4 += vector[i + 4] * vector[i + 4];
365
+ s5 += vector[i + 5] * vector[i + 5];
366
+ s6 += vector[i + 6] * vector[i + 6];
367
+ s7 += vector[i + 7] * vector[i + 7];
368
+ }
369
+ // Handle remaining elements
370
+ for (; i < len; i++) {
371
+ s0 += vector[i] * vector[i];
372
+ }
373
+
374
+ const normSq = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
375
+ const norm = Math.sqrt(normSq);
376
+ if (norm > 0) {
377
+ const invNorm = 1 / norm;
378
+ for (let j = 0; j < len; j++) {
379
+ vector[j] *= invNorm;
380
+ }
381
+ }
382
+ return vector;
383
+ }
384
+
385
+ private selectLevel(): number {
386
+ const r = Math.random();
387
+ const level = Math.floor(-Math.log(r) * this.levelMult);
388
+ return Math.min(level, this.maxLayers - 1);
389
+ }
390
+
391
+ /**
392
+ * Calculate distance between two vectors using the configured metric.
393
+ * Uses cached function pointer to avoid switch overhead.
394
+ */
395
+ calculateDistance(a: Float32Array, b: Float32Array): number {
396
+ return this.distanceFn(a, b);
397
+ }
398
+
399
+ /**
400
+ * Get a node's vector, loading it if necessary (for lazy loading support)
401
+ */
402
+ private getNodeVector(nodeId: number): Float32Array | null {
403
+ const node = this.nodes[nodeId];
404
+ if (!node) return null;
405
+
406
+ if (this.lazyLoadEnabled && !this.vectorsLoaded.has(nodeId)) {
407
+ this.loadVector(nodeId);
408
+ }
409
+
410
+ return node.vector;
411
+ }
412
+
413
+ private getLayerMaxConnections(layer: number): number {
414
+ return layer === 0 ? this.M0 : this.M;
415
+ }
416
+
417
+ private selectNeighbors(currentId: number, candidates: Array<{ id: number; distance: number }>, layer: number): number[] {
418
+ const maxConnections = this.getLayerMaxConnections(layer);
419
+
420
+ // Reuse selection heap - clear and ensure capacity
421
+ this.selectionHeap.clear();
422
+
423
+ // Add all candidates to the heap (skip self-reference)
424
+ for (const candidate of candidates) {
425
+ if (candidate.id !== currentId) {
426
+ this.selectionHeap.push(candidate.id, candidate.distance);
427
+ }
428
+ }
429
+
430
+ // Extract the best neighbors (closest first)
431
+ // Pre-allocate array for expected size
432
+ const selected = new Array<number>(Math.min(maxConnections, this.selectionHeap.size()));
433
+ let idx = 0;
434
+
435
+ // Extract up to maxConnections elements from heap
436
+ // Candidates from searchLayer are already unique (visited tracking)
437
+ while (idx < maxConnections && !this.selectionHeap.isEmpty()) {
438
+ const id = this.selectionHeap.pop();
439
+ if (id !== -1) {
440
+ selected[idx++] = id;
441
+ }
442
+ }
443
+
444
+ // Trim if needed
445
+ if (idx < selected.length) {
446
+ selected.length = idx;
447
+ }
448
+
449
+ return selected;
450
+ }
451
+
452
+ private addBidirectionalConnection(fromId: number, toId: number, level: number): void {
453
+ const fromNode = this.nodes[fromId];
454
+ const toNode = this.nodes[toId];
455
+
456
+ if (!fromNode || !toNode) return;
457
+
458
+ // Ensure neighbor arrays exist
459
+ if (!fromNode.neighbors[level]) {
460
+ fromNode.neighbors[level] = [];
461
+ }
462
+ if (!toNode.neighbors[level]) {
463
+ toNode.neighbors[level] = [];
464
+ }
465
+
466
+ if (this.constructionMode) {
467
+ // O(1) lookup using Sets during bulk construction
468
+ let fromSets = this.neighborSets.get(fromId);
469
+ if (!fromSets) {
470
+ fromSets = [];
471
+ this.neighborSets.set(fromId, fromSets);
472
+ }
473
+ if (!fromSets[level]) {
474
+ fromSets[level] = new Set(fromNode.neighbors[level]);
475
+ }
476
+
477
+ let toSets = this.neighborSets.get(toId);
478
+ if (!toSets) {
479
+ toSets = [];
480
+ this.neighborSets.set(toId, toSets);
481
+ }
482
+ if (!toSets[level]) {
483
+ toSets[level] = new Set(toNode.neighbors[level]);
484
+ }
485
+
486
+ // O(1) membership test with Set.has()
487
+ if (!fromSets[level].has(toId)) {
488
+ fromSets[level].add(toId);
489
+ fromNode.neighbors[level].push(toId);
490
+ }
491
+
492
+ if (!toSets[level].has(fromId)) {
493
+ toSets[level].add(fromId);
494
+ toNode.neighbors[level].push(fromId);
495
+ }
496
+ } else {
497
+ // Original O(M) lookup for single inserts (fallback)
498
+ if (!fromNode.neighbors[level].includes(toId)) {
499
+ fromNode.neighbors[level].push(toId);
500
+ }
501
+ if (!toNode.neighbors[level].includes(fromId)) {
502
+ toNode.neighbors[level].push(fromId);
503
+ }
504
+ }
505
+ }
506
+
507
+ /**
508
+ * Ensure heap capacity is sufficient for the given ef value.
509
+ * Resizes heaps if needed.
510
+ */
511
+ private ensureHeapCapacity(ef: number): void {
512
+ const requiredCapacity = Math.max(ef * 2, 100);
513
+ if (requiredCapacity > this.heapCapacity) {
514
+ this.heapCapacity = requiredCapacity;
515
+ this.candidatesHeap = new BinaryHeap(this.heapCapacity);
516
+ this.resultsHeap = new MaxBinaryHeap(this.heapCapacity);
517
+ }
518
+ }
519
+
520
+ /**
521
+ * Search a layer using the standard two-heap HNSW algorithm.
522
+ *
523
+ * Uses two heaps:
524
+ * - candidatesHeap (min-heap): Tracks nodes to explore, prioritizing closest
525
+ * - resultsHeap (max-heap): Tracks top-ef results, allowing O(log n) eviction of furthest
526
+ *
527
+ * Termination: Stops when closest unvisited candidate is farther than furthest result.
528
+ */
529
+ private searchLayer(
530
+ query: Float32Array,
531
+ nearest: { id: number; distance: number },
532
+ layer: number,
533
+ ef: number
534
+ ): Array<{ id: number; distance: number }> {
535
+ // Clear visited tracking
536
+ this.clearVisited();
537
+
538
+ // Ensure heaps are large enough, then clear and reuse
539
+ this.ensureHeapCapacity(ef);
540
+ this.candidatesHeap.clear();
541
+ this.resultsHeap.clear();
542
+
543
+ // Initialize with entry point
544
+ this.markVisited(nearest.id);
545
+ this.candidatesHeap.push(nearest.id, nearest.distance);
546
+ this.resultsHeap.push(nearest.id, nearest.distance);
547
+
548
+ // Cache the furthest result distance - only changes when resultsHeap is modified
549
+ let furthestResultDist = nearest.distance;
550
+
551
+ // OPTIMIZATION: Pre-allocate batch arrays for distance calculation
552
+ // Reuse across iterations to avoid allocation
553
+ const batchIds = this.batchNeighborIds;
554
+ const batchDists = this.batchDistances;
555
+
556
+ while (!this.candidatesHeap.isEmpty()) {
557
+ // Get closest unexplored candidate
558
+ const closestCandidateDist = this.candidatesHeap.peekValue();
559
+ const closestCandidateId = this.candidatesHeap.pop();
560
+
561
+ if (closestCandidateId === -1) continue;
562
+
563
+ // TERMINATION: Stop if closest candidate is farther than worst result
564
+ if (this.resultsHeap.size() >= ef && closestCandidateDist > furthestResultDist) {
565
+ break;
566
+ }
567
+
568
+ const node = this.nodes[closestCandidateId];
569
+ if (!node) continue;
570
+
571
+ const neighbors = node.neighbors[layer] || [];
572
+
573
+ // Use batch distance calculation for non-lazy indices (better cache locality)
574
+ // Fall back to one-by-one for lazy-loaded indices (vectors may not be in flatVectors)
575
+ if (!this.lazyLoadEnabled) {
576
+ // OPTIMIZATION: Collect unvisited neighbors and compute distances in batch
577
+ let batchCount = 0;
578
+ for (let i = 0; i < neighbors.length; i++) {
579
+ const neighborId = neighbors[i];
580
+ if (!this.isVisited(neighborId)) {
581
+ this.markVisited(neighborId);
582
+ batchIds[batchCount] = neighborId;
583
+ batchCount++;
584
+ }
585
+ }
586
+
587
+ // Calculate all distances at once (better cache utilization)
588
+ if (batchCount > 0) {
589
+ // Ensure batch arrays are large enough
590
+ if (batchDists.length < batchCount) {
591
+ this.batchDistances.length = batchCount;
592
+ }
593
+
594
+ this.calculateDistancesBatch(query, batchIds.slice(0, batchCount), batchDists);
595
+
596
+ // Process batch results
597
+ for (let i = 0; i < batchCount; i++) {
598
+ const neighborId = batchIds[i];
599
+ const distance = batchDists[i];
600
+
601
+ // Add to results if it's good enough
602
+ if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
603
+ this.candidatesHeap.push(neighborId, distance);
604
+ this.resultsHeap.push(neighborId, distance);
605
+
606
+ // Maintain max size of ef and update cached furthest distance
607
+ if (this.resultsHeap.size() > ef) {
608
+ this.resultsHeap.pop(); // Remove furthest (O(log n))
609
+ }
610
+ furthestResultDist = this.resultsHeap.peekValue();
611
+ }
612
+ }
613
+ }
614
+ } else {
615
+ // Original one-by-one for lazy-loaded indices
616
+ for (const neighborId of neighbors) {
617
+ if (this.isVisited(neighborId)) continue;
618
+ this.markVisited(neighborId);
619
+
620
+ const neighborVector = this.getNodeVector(neighborId);
621
+ if (!neighborVector) continue;
622
+
623
+ const distance = this.calculateDistance(query, neighborVector);
624
+
625
+ // Add to results if it's good enough
626
+ if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
627
+ this.candidatesHeap.push(neighborId, distance);
628
+ this.resultsHeap.push(neighborId, distance);
629
+
630
+ // Maintain max size of ef and update cached furthest distance
631
+ if (this.resultsHeap.size() > ef) {
632
+ this.resultsHeap.pop(); // Remove furthest (O(log n))
633
+ }
634
+ furthestResultDist = this.resultsHeap.peekValue();
635
+ }
636
+ }
637
+ }
638
+ }
639
+
640
+ // Extract results from max-heap into pre-sized array
641
+ // Build in reverse order to avoid reverse() call
642
+ const resultCount = this.resultsHeap.size();
643
+ const results: Array<{ id: number; distance: number }> = new Array(resultCount);
644
+ let idx = resultCount - 1;
645
+ while (!this.resultsHeap.isEmpty()) {
646
+ const dist = this.resultsHeap.peekValue();
647
+ const id = this.resultsHeap.pop();
648
+ results[idx--] = { id, distance: dist };
649
+ }
650
+
651
+ return results;
652
+ }
653
+
654
+ private greedySearch(query: Float32Array, entryNode: Node, level: number): { id: number; distance: number } {
655
+ // Simplified greedy search - no heap needed, just follow the best neighbor
656
+ this.clearVisited();
657
+
658
+ let currentNode = entryNode;
659
+ // Load entry node vector if lazy loading is enabled
660
+ const entryVector = this.getNodeVector(entryNode.id);
661
+ let currentDistance = entryVector ? this.calculateDistance(query, entryVector) : Infinity;
662
+ this.markVisited(currentNode.id);
663
+
664
+ // Keep following the best neighbor until no improvement
665
+ let improved = true;
666
+ while (improved) {
667
+ improved = false;
668
+ const neighbors = currentNode.neighbors[level] || [];
669
+
670
+ for (const neighborId of neighbors) {
671
+ if (this.isVisited(neighborId)) continue;
672
+ this.markVisited(neighborId);
673
+
674
+ const neighborVector = this.getNodeVector(neighborId);
675
+ if (!neighborVector) continue;
676
+
677
+ const neighborNode = this.nodes[neighborId];
678
+ if (!neighborNode) continue;
679
+
680
+ const distance = this.calculateDistance(query, neighborVector);
681
+
682
+ if (distance < currentDistance) {
683
+ currentDistance = distance;
684
+ currentNode = neighborNode;
685
+ improved = true;
686
+ }
687
+ }
688
+ }
689
+
690
+ return { id: currentNode.id, distance: currentDistance };
691
+ }
692
+
693
+ /**
694
+ * Add a point to the index (async wrapper for API compatibility)
695
+ * For bulk operations, use addPointsBulk() which uses the faster sync version internally
696
+ */
697
+ async addPoint(id: number, vector: number[] | Float32Array, options?: { skipNormalization?: boolean }): Promise<void> {
698
+ this.addPointSync(id, vector, options);
699
+ }
700
+
701
+ /**
702
+ * Synchronous version of addPoint - avoids async/await microtask overhead
703
+ * 10-15x faster for bulk insertions where async is not needed
704
+ * @param skipNormalization - Set true if vectors are already unit-normalized (e.g., Cohere embeddings)
705
+ */
706
+ addPointSync(id: number, vector: number[] | Float32Array, options?: { skipNormalization?: boolean }): void {
707
+ // Optimize: only copy when necessary
708
+ // - Always copy arrays (need Float32Array)
709
+ // - Copy Float32Array only if we need to normalize (modifies in place)
710
+ // - Reuse input directly if skipNormalization is set (caller guarantees immutability)
711
+ let floatVector: Float32Array;
712
+ if (Array.isArray(vector)) {
713
+ floatVector = new Float32Array(vector);
714
+ } else if (this.vectorsAreNormalized && !options?.skipNormalization) {
715
+ // Need to copy because normalizeVector modifies in place
716
+ floatVector = new Float32Array(vector);
717
+ } else {
718
+ // No normalization needed and input is Float32Array - use directly
719
+ // Note: caller should not modify this array after passing it
720
+ floatVector = vector;
721
+ }
722
+
723
+ if (floatVector.length !== this.dimension) {
724
+ throw new Error(`Vector dimension ${floatVector.length} does not match expected ${this.dimension}`);
725
+ }
726
+
727
+ // Pre-normalize vectors for cosine metric for faster distance computation
728
+ // Skip if caller indicates vectors are already normalized
729
+ if (this.vectorsAreNormalized && !options?.skipNormalization) {
730
+ floatVector = this.normalizeVector(floatVector);
731
+ }
732
+
733
+ // Create new node
734
+ const level = this.selectLevel();
735
+ // Pre-allocate neighbors array without Array.from overhead
736
+ const neighbors = new Array<number[]>(level + 1);
737
+ for (let i = 0; i <= level; i++) {
738
+ neighbors[i] = [];
739
+ }
740
+ const newNode: Node = {
741
+ id,
742
+ level,
743
+ vector: floatVector,
744
+ neighbors,
745
+ };
746
+
747
+ this.setNode(newNode);
748
+
749
+ // If this is the first node, make it the entry point
750
+ if (this.entryPointId === -1) {
751
+ this.entryPointId = id;
752
+ this.maxLevel = level;
753
+ return;
754
+ }
755
+
756
+ // Find the entry point at the highest level
757
+ let currentEntryPoint = this.nodes[this.entryPointId]!;
758
+ let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(floatVector, currentEntryPoint.vector) };
759
+
760
+ // Go down from max level to insertion level
761
+ for (let l = this.maxLevel; l > level; l--) {
762
+ const result = this.greedySearch(floatVector, currentEntryPoint, l);
763
+ if (result.distance < currentBest.distance) {
764
+ currentBest = result;
765
+ currentEntryPoint = this.nodes[currentBest.id]!;
766
+ }
767
+ }
768
+
769
+ // Now connect at each level from the insertion level down to 0
770
+ for (let l = Math.min(level, this.maxLevel); l >= 0; l--) {
771
+ // Search in the current level
772
+ const searchResults = this.searchLayer(floatVector, currentBest, l, this.efConstruction);
773
+
774
+ // Get neighbors for this level
775
+ const neighbors = this.selectNeighbors(id, searchResults, l);
776
+
777
+ // Add bidirectional connections
778
+ for (const neighborId of neighbors) {
779
+ this.addBidirectionalConnection(id, neighborId, l);
780
+ }
781
+
782
+ // Update the current best for the next level
783
+ if (searchResults.length > 0) {
784
+ currentBest = searchResults[0];
785
+ }
786
+ }
787
+
788
+ // Update the entry point if a higher level was created
789
+ if (level > this.maxLevel) {
790
+ this.maxLevel = level;
791
+ this.entryPointId = id;
792
+ }
793
+ }
794
+
795
+ searchKNN(query: Float32Array, k: number, efSearch?: number): Array<{ id: number; distance: number }> {
796
+ if (this.entryPointId === -1 || this.nodeCount === 0) {
797
+ return [];
798
+ }
799
+
800
+ if (query.length !== this.dimension) {
801
+ throw new Error(`Query dimension ${query.length} does not match expected ${this.dimension}`);
802
+ }
803
+
804
+ const effectiveEf = efSearch || Math.max(k * 2, 50);
805
+
806
+ // Normalize query vector for cosine metric to match stored normalized vectors
807
+ // Reuse pre-allocated buffer to avoid allocation per query (17% measured speedup)
808
+ let normalizedQuery = query;
809
+ if (this.vectorsAreNormalized) {
810
+ // Copy to reusable buffer and normalize in place
811
+ this.queryNormBuffer.set(query);
812
+ normalizedQuery = this.normalizeVector(this.queryNormBuffer);
813
+ }
814
+
815
+ // Start from the entry point at the highest level
816
+ let currentEntryPoint = this.nodes[this.entryPointId]!;
817
+ const entryVector = this.getNodeVector(this.entryPointId);
818
+ if (!entryVector) return [];
819
+ let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(normalizedQuery, entryVector) };
820
+
821
+ // Go down from max level to level 1
822
+ for (let l = this.maxLevel; l > 0; l--) {
823
+ const result = this.greedySearch(normalizedQuery, currentEntryPoint, l);
824
+ if (result.distance < currentBest.distance) {
825
+ currentBest = result;
826
+ currentEntryPoint = this.nodes[currentBest.id]!;
827
+ }
828
+ }
829
+
830
+ // At level 0, perform detailed search
831
+ const candidates = this.searchLayer(normalizedQuery, currentBest, 0, effectiveEf);
832
+
833
+ // Sort candidates by distance, then by ID for consistent tie-breaking
834
+ // Sort in place since we own this array from searchLayer
835
+ candidates.sort((a, b) => {
836
+ const diff = a.distance - b.distance;
837
+ return diff !== 0 ? diff : a.id - b.id;
838
+ });
839
+
840
+ // Return only top k results - truncate in place instead of slice
841
+ if (candidates.length > k) candidates.length = k;
842
+
843
+ return candidates;
844
+ }
845
+
846
+ /**
847
+ * Batch search for multiple query vectors.
848
+ * More efficient than calling searchKNN multiple times as it reuses internal buffers.
849
+ *
850
+ * @param queries Array of query vectors
851
+ * @param k Number of nearest neighbors to return per query
852
+ * @param efSearch Search effort parameter (higher = better recall, slower)
853
+ * @returns Array of results, one per query
854
+ */
855
+ searchKNNBatch(
856
+ queries: Float32Array[],
857
+ k: number,
858
+ efSearch?: number
859
+ ): Array<Array<{ id: number; distance: number }>> {
860
+ const numQueries = queries.length;
861
+
862
+ if (this.entryPointId === -1 || this.nodeCount === 0) {
863
+ // Pre-allocate empty result arrays
864
+ const emptyResults = new Array<Array<{ id: number; distance: number }>>(numQueries);
865
+ for (let i = 0; i < numQueries; i++) {
866
+ emptyResults[i] = [];
867
+ }
868
+ return emptyResults;
869
+ }
870
+
871
+ // Pre-allocate results array
872
+ const results = new Array<Array<{ id: number; distance: number }>>(numQueries);
873
+
874
+ // Clear and reuse internal buffers
875
+ this.clearVisited();
876
+
877
+ for (let i = 0; i < numQueries; i++) {
878
+ const query = queries[i];
879
+ if (query.length !== this.dimension) {
880
+ throw new Error(`Query dimension ${query.length} does not match expected ${this.dimension}`);
881
+ }
882
+
883
+ // Reuse the searchKNN implementation but with shared buffers
884
+ results[i] = this.searchKNN(query, k, efSearch);
885
+
886
+ // Clear visited set for next query
887
+ this.clearVisited();
888
+ }
889
+
890
+ return results;
891
+ }
892
+
893
+ /**
894
+ * Optimized batch search that returns results in a flat structure for better performance.
895
+ * Useful when you need to process many queries quickly.
896
+ *
897
+ * @param queries Flat Float32Array containing all queries concatenated
898
+ * @param numQueries Number of queries in the array
899
+ * @param k Number of nearest neighbors to return per query
900
+ * @param efSearch Search effort parameter
901
+ * @returns Object with flat arrays for ids and distances
902
+ */
903
+ searchKNNBatchFlat(
904
+ queries: Float32Array,
905
+ numQueries: number,
906
+ k: number,
907
+ efSearch?: number
908
+ ): { ids: Uint32Array; distances: Float32Array } {
909
+ if (this.entryPointId === -1 || this.nodeCount === 0) {
910
+ return {
911
+ ids: new Uint32Array(numQueries * k),
912
+ distances: new Float32Array(numQueries * k).fill(Infinity)
913
+ };
914
+ }
915
+
916
+ const ids = new Uint32Array(numQueries * k);
917
+ const distances = new Float32Array(numQueries * k);
918
+
919
+ for (let q = 0; q < numQueries; q++) {
920
+ // Extract query vector
921
+ const queryStart = q * this.dimension;
922
+ const query = queries.subarray(queryStart, queryStart + this.dimension);
923
+
924
+ // Search
925
+ const results = this.searchKNN(query, k, efSearch);
926
+
927
+ // Copy results to output arrays
928
+ const resultStart = q * k;
929
+ for (let i = 0; i < k; i++) {
930
+ if (i < results.length) {
931
+ ids[resultStart + i] = results[i].id;
932
+ distances[resultStart + i] = results[i].distance;
933
+ } else {
934
+ ids[resultStart + i] = 0;
935
+ distances[resultStart + i] = Infinity;
936
+ }
937
+ }
938
+
939
+ // Clear visited set for next query
940
+ this.clearVisited();
941
+ }
942
+
943
+ return { ids, distances };
944
+ }
945
+
946
+ // ============================================
947
+ // Convenience Methods
948
+ // ============================================
949
+
950
+ /**
951
+ * Add a single vector with auto-generated ID.
952
+ * Returns the assigned ID.
953
+ *
954
+ * @param vector Vector to add
955
+ * @returns The auto-generated ID
956
+ *
957
+ * @example
958
+ * ```typescript
959
+ * const id = await index.add([0.1, 0.2, 0.3]);
960
+ * console.log(`Added vector with ID: ${id}`);
961
+ * ```
962
+ */
963
+ async add(vector: number[] | Float32Array): Promise<number> {
964
+ const id = this.nodeCount;
965
+ await this.addPoint(id, vector);
966
+ return id;
967
+ }
968
+
969
+ /**
970
+ * Simple query interface - find k nearest neighbors.
971
+ *
972
+ * @param vector Query vector
973
+ * @param k Number of results (default: 10)
974
+ * @returns Array of {id, distance} results
975
+ *
976
+ * @example
977
+ * ```typescript
978
+ * const results = index.query([0.1, 0.2, 0.3], 5);
979
+ * results.forEach(r => console.log(`ID: ${r.id}, Distance: ${r.distance}`));
980
+ * ```
981
+ */
982
+ query(vector: number[] | Float32Array, k: number = 10): Array<{ id: number; distance: number }> {
983
+ const floatVector = Array.isArray(vector) ? new Float32Array(vector) : vector;
984
+ return this.searchKNN(floatVector, k);
985
+ }
986
+
987
+ /**
988
+ * Add multiple vectors with auto-generated IDs.
989
+ * Returns the assigned IDs.
990
+ *
991
+ * @param vectors Array of vectors to add
992
+ * @returns Array of auto-generated IDs
993
+ *
994
+ * @example
995
+ * ```typescript
996
+ * const ids = await index.addAll([[0.1, 0.2], [0.3, 0.4]]);
997
+ * ```
998
+ */
999
+ async addAll(vectors: Array<number[] | Float32Array>): Promise<number[]> {
1000
+ // Use bulk construction mode for better performance
1001
+ const points = vectors.map((vector, i) => ({
1002
+ id: this.nodeCount + i,
1003
+ vector: vector instanceof Float32Array ? vector : new Float32Array(vector)
1004
+ }));
1005
+ await this.addPointsBulk(points);
1006
+ return points.map(p => p.id);
1007
+ }
1008
+
1009
+ /**
1010
+ * Bulk add multiple points with optimized O(1) neighbor lookups.
1011
+ * Significantly faster than sequential addPoint() calls for large batches.
1012
+ * Uses Set-based membership testing during construction, then releases memory.
1013
+ *
1014
+ * @param points Array of {id, vector} to add
1015
+ * @example
1016
+ * ```typescript
1017
+ * await index.addPointsBulk([
1018
+ * { id: 0, vector: new Float32Array([0.1, 0.2, ...]) },
1019
+ * { id: 1, vector: new Float32Array([0.3, 0.4, ...]) },
1020
+ * ]);
1021
+ * ```
1022
+ */
1023
+ async addPointsBulk(points: Array<{ id: number; vector: Float32Array }>, options?: { skipNormalization?: boolean }): Promise<void> {
1024
+ this.addPointsBulkSync(points, options);
1025
+ }
1026
+
1027
+ /**
1028
+ * Synchronous bulk insertion - 10-15x faster than async version
1029
+ * Uses addPointSync() internally to avoid microtask queue overhead
1030
+ * @param skipNormalization - Set true if vectors are already unit-normalized
1031
+ */
1032
+ addPointsBulkSync(points: Array<{ id: number; vector: Float32Array }>, options?: { skipNormalization?: boolean }): void {
1033
+ if (points.length === 0) return;
1034
+
1035
+ // Enable construction mode for O(1) neighbor lookups
1036
+ this.constructionMode = true;
1037
+ this.neighborSets.clear();
1038
+
1039
+ try {
1040
+ for (const { id, vector } of points) {
1041
+ this.addPointSync(id, vector, options);
1042
+ }
1043
+ } finally {
1044
+ // Always cleanup, even on error
1045
+ this.constructionMode = false;
1046
+ this.neighborSets.clear(); // Release memory
1047
+ }
1048
+ }
1049
+
1050
+ /**
1051
+ * Clear construction-time data structures to free memory.
1052
+ * Called automatically after addPointsBulk(), but can be called
1053
+ * manually if needed.
1054
+ */
1055
+ clearConstructionCache(): void {
1056
+ this.constructionMode = false;
1057
+ this.neighborSets.clear();
1058
+ }
1059
+
1060
+ // Format version constants
1061
+ private static readonly MAGIC = 0x484E5357; // "HNSW" in ASCII (big-endian: 0x48='H', 0x4E='N', 0x53='S', 0x57='W')
1062
+ private static readonly FORMAT_VERSION = 3; // v3: vector offset index for lazy loading
1063
+ private static readonly HEADER_SIZE = 40; // 4 (magic) + 4 (version) + 28 (existing header) + 4 (vectorDataOffset)
1064
+
1065
+ /**
1066
+ * Get all nodes as an array (filters out undefined slots)
1067
+ */
1068
+ private getNodesArray(): Node[] {
1069
+ const result: Node[] = [];
1070
+ for (let i = 0; i < this.nodeCount; i++) {
1071
+ const node = this.nodes[i];
1072
+ if (node) result.push(node);
1073
+ }
1074
+ return result;
1075
+ }
1076
+
1077
+ serialize(): ArrayBuffer {
1078
+ // Format v3: Vectors stored separately at end with offset table for lazy loading
1079
+ const nodeCount = this.nodeCount;
1080
+ const nodesArray = this.getNodesArray();
1081
+
1082
+ // Build ID to index mapping first (needed for delta encoding)
1083
+ const idToIndex = new Map<number, number>();
1084
+ for (let i = 0; i < nodesArray.length; i++) {
1085
+ idToIndex.set(nodesArray[i].id, i);
1086
+ }
1087
+
1088
+ // Pre-encode all neighbor lists with delta encoding
1089
+ const encodedNeighbors: Uint8Array[][] = [];
1090
+ let totalNeighborBytes = 0;
1091
+
1092
+ for (const node of nodesArray) {
1093
+ const nodeEncodings: Uint8Array[] = [];
1094
+ for (let l = 0; l <= node.level; l++) {
1095
+ const neighborIndices = node.neighbors[l].map(id => idToIndex.get(id) ?? 0);
1096
+ const encoded = deltaEncodeNeighbors(neighborIndices);
1097
+ nodeEncodings.push(encoded);
1098
+ totalNeighborBytes += encoded.length;
1099
+ }
1100
+ encodedNeighbors.push(nodeEncodings);
1101
+ }
1102
+
1103
+ // Calculate sizes for v3 format:
1104
+ // - Header: 40 bytes (includes vectorDataOffset)
1105
+ // - Node metadata: nodeCount * 8 (id + level)
1106
+ // - Neighbor metadata: sum of (level+1) * 8 per node
1107
+ // - Encoded neighbors: totalNeighborBytes
1108
+ // - Vector offset table: nodeCount * 4 (offset within vector section)
1109
+ // - Vectors: nodeCount * dimension * 4 (at end for lazy loading)
1110
+ let graphSize = HNSWIndex.HEADER_SIZE;
1111
+ graphSize += nodeCount * 8; // id + level per node
1112
+
1113
+ for (const node of nodesArray) {
1114
+ graphSize += (node.level + 1) * 8; // neighbor metadata per level
1115
+ }
1116
+ graphSize += totalNeighborBytes;
1117
+ graphSize += nodeCount * 4; // vector offset table
1118
+
1119
+ const vectorDataOffset = graphSize;
1120
+ const vectorDataSize = nodeCount * this.dimension * 4;
1121
+ const totalSize = graphSize + vectorDataSize;
1122
+
1123
+ const buffer = new ArrayBuffer(totalSize);
1124
+ const view = new DataView(buffer);
1125
+ const uint8Array = new Uint8Array(buffer);
1126
+
1127
+ let offset = 0;
1128
+
1129
+ // Write header with magic, version, and vectorDataOffset
1130
+ view.setUint32(offset, HNSWIndex.MAGIC, true); offset += 4;
1131
+ view.setUint32(offset, HNSWIndex.FORMAT_VERSION, true); offset += 4;
1132
+ view.setUint32(offset, this.dimension, true); offset += 4;
1133
+ const metricCode = this.metric === 'cosine' ? 0 : this.metric === 'euclidean' ? 1 : 2;
1134
+ view.setUint32(offset, metricCode, true); offset += 4;
1135
+ view.setUint32(offset, this.M, true); offset += 4;
1136
+ view.setUint32(offset, this.efConstruction, true); offset += 4;
1137
+ view.setUint32(offset, this.maxLevel, true); offset += 4;
1138
+ view.setUint32(offset, this.entryPointId, true); offset += 4;
1139
+ view.setUint32(offset, nodeCount, true); offset += 4;
1140
+ view.setUint32(offset, vectorDataOffset, true); offset += 4; // New in v3
1141
+
1142
+ // Write node metadata (without vectors)
1143
+ for (let i = 0; i < nodesArray.length; i++) {
1144
+ const node = nodesArray[i];
1145
+ view.setUint32(offset, node.id, true); offset += 4;
1146
+ view.setUint32(offset, node.level, true); offset += 4;
1147
+ }
1148
+
1149
+ // Write neighbor metadata (counts and encoded sizes)
1150
+ for (let i = 0; i < nodesArray.length; i++) {
1151
+ const node = nodesArray[i];
1152
+ const nodeEncodings = encodedNeighbors[i];
1153
+
1154
+ for (let l = 0; l <= node.level; l++) {
1155
+ view.setUint32(offset, node.neighbors[l].length, true); offset += 4;
1156
+ view.setUint32(offset, nodeEncodings[l].length, true); offset += 4;
1157
+ }
1158
+ }
1159
+
1160
+ // Write all encoded neighbor data
1161
+ for (let i = 0; i < nodesArray.length; i++) {
1162
+ const nodeEncodings = encodedNeighbors[i];
1163
+ for (const encoded of nodeEncodings) {
1164
+ uint8Array.set(encoded, offset);
1165
+ offset += encoded.length;
1166
+ }
1167
+ }
1168
+
1169
+ // Write vector offset table (offset within vector section)
1170
+ for (let i = 0; i < nodesArray.length; i++) {
1171
+ view.setUint32(offset, i * this.dimension * 4, true); // Relative offset
1172
+ offset += 4;
1173
+ }
1174
+
1175
+ // Write vectors at end (for lazy loading capability)
1176
+ for (let i = 0; i < nodesArray.length; i++) {
1177
+ const node = nodesArray[i];
1178
+ for (let j = 0; j < this.dimension; j++) {
1179
+ view.setFloat32(offset, node.vector[j], true);
1180
+ offset += 4;
1181
+ }
1182
+ }
1183
+
1184
+ return buffer;
1185
+ }
1186
+
1187
+ /**
1188
+ * Deserialize an HNSW index from a buffer.
1189
+ *
1190
+ * @param buffer The serialized index buffer
1191
+ * @param options Optional loading options
1192
+ * - lazyLoadVectors: If true, don't load vectors immediately (v3+ only)
1193
+ */
1194
+ static deserialize(buffer: ArrayBuffer, options?: { lazyLoadVectors?: boolean }): HNSWIndex {
1195
+ const view = new DataView(buffer);
1196
+ const uint8Array = new Uint8Array(buffer);
1197
+ const lazyLoad = options?.lazyLoadVectors ?? false;
1198
+
1199
+ let offset = 0;
1200
+
1201
+ // Check for magic header (new format v1+)
1202
+ const possibleMagic = view.getUint32(0, true);
1203
+ let formatVersion = 0;
1204
+
1205
+ if (possibleMagic === HNSWIndex.MAGIC) {
1206
+ offset += 4; // Skip magic
1207
+ formatVersion = view.getUint32(offset, true); offset += 4;
1208
+
1209
+ if (formatVersion > HNSWIndex.FORMAT_VERSION) {
1210
+ throw new Error(`Unsupported HNSW format version: ${formatVersion}. Maximum supported: ${HNSWIndex.FORMAT_VERSION}`);
1211
+ }
1212
+ } else {
1213
+ formatVersion = 0;
1214
+ offset = 0;
1215
+ }
1216
+
1217
+ // Read common header fields
1218
+ const dimension = view.getUint32(offset, true); offset += 4;
1219
+ const metricCode = view.getUint32(offset, true);
1220
+ const metric = metricCode === 0 ? 'cosine' : metricCode === 1 ? 'euclidean' : 'dot_product';
1221
+ offset += 4;
1222
+ const M = view.getUint32(offset, true); offset += 4;
1223
+ const efConstruction = view.getUint32(offset, true); offset += 4;
1224
+ const maxLevel = view.getInt32(offset, true); offset += 4;
1225
+ const entryPointId = view.getInt32(offset, true); offset += 4;
1226
+ const nodeCount = view.getUint32(offset, true); offset += 4;
1227
+
1228
+ // V3+ has vectorDataOffset in header
1229
+ let vectorDataOffset = 0;
1230
+ if (formatVersion >= 3) {
1231
+ vectorDataOffset = view.getUint32(offset, true); offset += 4;
1232
+ }
1233
+
1234
+ const index = new HNSWIndex(dimension, metric, M, efConstruction);
1235
+ index.maxLevel = maxLevel;
1236
+ index.entryPointId = entryPointId;
1237
+
1238
+ const indexToId: number[] = new Array(nodeCount);
1239
+
1240
+ if (formatVersion >= 3) {
1241
+ // V3 format: vectors at end, supports lazy loading
1242
+ const nodeMetadata: Array<{ id: number; level: number }> = [];
1243
+ const neighborMetadata: Array<Array<{ count: number; encodedSize: number }>> = [];
1244
+
1245
+ // First pass: read node metadata (no vectors here)
1246
+ for (let i = 0; i < nodeCount; i++) {
1247
+ const id = view.getUint32(offset, true); offset += 4;
1248
+ const level = view.getUint32(offset, true); offset += 4;
1249
+ indexToId[i] = id;
1250
+ nodeMetadata.push({ id, level });
1251
+ }
1252
+
1253
+ // Second pass: read neighbor metadata
1254
+ for (let i = 0; i < nodeCount; i++) {
1255
+ const level = nodeMetadata[i].level;
1256
+ const levelMeta: Array<{ count: number; encodedSize: number }> = [];
1257
+
1258
+ for (let l = 0; l <= level; l++) {
1259
+ const count = view.getUint32(offset, true); offset += 4;
1260
+ const encodedSize = view.getUint32(offset, true); offset += 4;
1261
+ levelMeta.push({ count, encodedSize });
1262
+ }
1263
+
1264
+ neighborMetadata.push(levelMeta);
1265
+ }
1266
+
1267
+ // Third pass: read and decode neighbor data
1268
+ const nodeNeighbors: number[][][] = [];
1269
+ for (let i = 0; i < nodeCount; i++) {
1270
+ const level = nodeMetadata[i].level;
1271
+ const neighbors = new Array<number[]>(level + 1);
1272
+
1273
+ for (let l = 0; l <= level; l++) {
1274
+ const { count, encodedSize } = neighborMetadata[i][l];
1275
+
1276
+ if (count === 0 || encodedSize === 0) {
1277
+ neighbors[l] = [];
1278
+ } else {
1279
+ const encodedSlice = uint8Array.subarray(offset, offset + encodedSize);
1280
+ const neighborIndices = deltaDecodeNeighbors(encodedSlice, count);
1281
+ neighbors[l] = neighborIndices.map(idx =>
1282
+ idx >= 0 && idx < indexToId.length ? indexToId[idx] : 0
1283
+ );
1284
+ offset += encodedSize;
1285
+ }
1286
+ }
1287
+
1288
+ nodeNeighbors.push(neighbors);
1289
+ }
1290
+
1291
+ // Read vector offset table
1292
+ for (let i = 0; i < nodeCount; i++) {
1293
+ const relativeOffset = view.getUint32(offset, true); offset += 4;
1294
+ const id = nodeMetadata[i].id;
1295
+ index.vectorOffsets.set(id, vectorDataOffset + relativeOffset);
1296
+ }
1297
+
1298
+ // Create nodes and optionally load vectors
1299
+ if (lazyLoad) {
1300
+ // Lazy loading: store buffer, don't load vectors yet
1301
+ index.lazyLoadEnabled = true;
1302
+ index.vectorBuffer = buffer;
1303
+
1304
+ for (let i = 0; i < nodeCount; i++) {
1305
+ const { id, level } = nodeMetadata[i];
1306
+ // Create node with empty vector (will be loaded on demand)
1307
+ const node: Node = {
1308
+ id,
1309
+ level,
1310
+ vector: new Float32Array(dimension), // Placeholder
1311
+ neighbors: nodeNeighbors[i]
1312
+ };
1313
+ index.setNode(node);
1314
+ }
1315
+ } else {
1316
+ // Eager loading: load all vectors now
1317
+ for (let i = 0; i < nodeCount; i++) {
1318
+ const { id, level } = nodeMetadata[i];
1319
+ const vectorOffset = index.vectorOffsets.get(id)!;
1320
+
1321
+ const vector = new Float32Array(dimension);
1322
+ for (let j = 0; j < dimension; j++) {
1323
+ vector[j] = view.getFloat32(vectorOffset + j * 4, true);
1324
+ }
1325
+
1326
+ const node: Node = { id, level, vector, neighbors: nodeNeighbors[i] };
1327
+ index.setNode(node);
1328
+ index.vectorsLoaded.add(id);
1329
+ }
1330
+ }
1331
+ } else if (formatVersion >= 2) {
1332
+ // V2 format: delta-encoded neighbor lists, vectors inline
1333
+ const nodeMetadata: Array<{ id: number; level: number; vector: Float32Array }> = [];
1334
+ const neighborMetadata: Array<Array<{ count: number; encodedSize: number }>> = [];
1335
+
1336
+ for (let i = 0; i < nodeCount; i++) {
1337
+ const id = view.getUint32(offset, true); offset += 4;
1338
+ const level = view.getUint32(offset, true); offset += 4;
1339
+ indexToId[i] = id;
1340
+
1341
+ const vector = new Float32Array(dimension);
1342
+ for (let j = 0; j < dimension; j++) {
1343
+ vector[j] = view.getFloat32(offset, true);
1344
+ offset += 4;
1345
+ }
1346
+
1347
+ nodeMetadata.push({ id, level, vector });
1348
+ }
1349
+
1350
+ for (let i = 0; i < nodeCount; i++) {
1351
+ const level = nodeMetadata[i].level;
1352
+ const levelMeta: Array<{ count: number; encodedSize: number }> = [];
1353
+
1354
+ for (let l = 0; l <= level; l++) {
1355
+ const count = view.getUint32(offset, true); offset += 4;
1356
+ const encodedSize = view.getUint32(offset, true); offset += 4;
1357
+ levelMeta.push({ count, encodedSize });
1358
+ }
1359
+
1360
+ neighborMetadata.push(levelMeta);
1361
+ }
1362
+
1363
+ for (let i = 0; i < nodeCount; i++) {
1364
+ const { id, level, vector } = nodeMetadata[i];
1365
+ const neighbors = new Array<number[]>(level + 1);
1366
+
1367
+ for (let l = 0; l <= level; l++) {
1368
+ const { count, encodedSize } = neighborMetadata[i][l];
1369
+
1370
+ if (count === 0 || encodedSize === 0) {
1371
+ neighbors[l] = [];
1372
+ } else {
1373
+ const encodedSlice = uint8Array.subarray(offset, offset + encodedSize);
1374
+ const neighborIndices = deltaDecodeNeighbors(encodedSlice, count);
1375
+ neighbors[l] = neighborIndices.map(idx =>
1376
+ idx >= 0 && idx < indexToId.length ? indexToId[idx] : 0
1377
+ );
1378
+ offset += encodedSize;
1379
+ }
1380
+ }
1381
+
1382
+ const node: Node = { id, level, vector, neighbors };
1383
+ index.setNode(node);
1384
+ }
1385
+ } else {
1386
+ // V0/V1 format: raw neighbor IDs
1387
+ for (let i = 0; i < nodeCount; i++) {
1388
+ const id = view.getUint32(offset, true); offset += 4;
1389
+ const level = view.getUint32(offset, true); offset += 4;
1390
+ indexToId[i] = id;
1391
+
1392
+ const vector = new Float32Array(dimension);
1393
+ for (let j = 0; j < dimension; j++) {
1394
+ vector[j] = view.getFloat32(offset, true);
1395
+ offset += 4;
1396
+ }
1397
+
1398
+ const neighbors = new Array<number[]>(level + 1);
1399
+ for (let l = 0; l <= level; l++) {
1400
+ const neighborCount = view.getUint32(offset, true); offset += 4;
1401
+ neighbors[l] = new Array(neighborCount);
1402
+ }
1403
+ const node: Node = { id, level, vector, neighbors };
1404
+
1405
+ index.setNode(node);
1406
+ }
1407
+
1408
+ for (const node of index.nodes.values()) {
1409
+ if (!node) continue;
1410
+ for (let l = 0; l <= node.level; l++) {
1411
+ for (let j = 0; j < node.neighbors[l].length; j++) {
1412
+ const neighborIndex = view.getInt32(offset, true); offset += 4;
1413
+ if (neighborIndex >= 0 && neighborIndex < indexToId.length) {
1414
+ node.neighbors[l][j] = indexToId[neighborIndex];
1415
+ }
1416
+ }
1417
+ }
1418
+ }
1419
+ }
1420
+
1421
+ return index;
1422
+ }
1423
+
1424
+ /**
1425
+ * Load a specific vector on demand (for lazy-loaded indices).
1426
+ * Returns the vector if lazy loading is enabled, otherwise returns the already-loaded vector.
1427
+ */
1428
+ loadVector(nodeId: number): Float32Array | null {
1429
+ const node = this.nodes[nodeId];
1430
+ if (!node) return null;
1431
+
1432
+ // If not lazy loading or already loaded, return existing vector
1433
+ if (!this.lazyLoadEnabled || this.vectorsLoaded.has(nodeId)) {
1434
+ return node.vector;
1435
+ }
1436
+
1437
+ // Load vector from buffer
1438
+ if (!this.vectorBuffer) return null;
1439
+
1440
+ const vectorOffset = this.vectorOffsets.get(nodeId);
1441
+ if (vectorOffset === undefined) return null;
1442
+
1443
+ const view = new DataView(this.vectorBuffer);
1444
+ const vector = new Float32Array(this.dimension);
1445
+ for (let j = 0; j < this.dimension; j++) {
1446
+ vector[j] = view.getFloat32(vectorOffset + j * 4, true);
1447
+ }
1448
+
1449
+ // Update node with loaded vector
1450
+ node.vector = vector;
1451
+ this.vectorsLoaded.add(nodeId);
1452
+
1453
+ // OPTIMIZATION: Also update flat vector storage for batch distance calculations
1454
+ this.setFlatVector(nodeId, vector);
1455
+
1456
+ return vector;
1457
+ }
1458
+
1459
+ /**
1460
+ * Preload vectors for specific node IDs.
1461
+ * Useful for warming up cache before searches.
1462
+ */
1463
+ preloadVectors(nodeIds: number[]): void {
1464
+ if (!this.lazyLoadEnabled) return;
1465
+
1466
+ for (const nodeId of nodeIds) {
1467
+ this.loadVector(nodeId);
1468
+ }
1469
+ }
1470
+
1471
+ /**
1472
+ * Check if lazy loading is enabled
1473
+ */
1474
+ isLazyLoadEnabled(): boolean {
1475
+ return this.lazyLoadEnabled;
1476
+ }
1477
+
1478
+ /**
1479
+ * Get lazy loading statistics
1480
+ */
1481
+ getLazyLoadStats(): { enabled: boolean; totalNodes: number; loadedVectors: number; memoryReduction: string } {
1482
+ const totalNodes = this.nodeCount;
1483
+ const loadedVectors = this.vectorsLoaded.size;
1484
+
1485
+ if (!this.lazyLoadEnabled) {
1486
+ return {
1487
+ enabled: false,
1488
+ totalNodes,
1489
+ loadedVectors: totalNodes,
1490
+ memoryReduction: '0%'
1491
+ };
1492
+ }
1493
+
1494
+ const reduction = totalNodes > 0 ? ((1 - loadedVectors / totalNodes) * 100).toFixed(1) : '0';
1495
+ return {
1496
+ enabled: true,
1497
+ totalNodes,
1498
+ loadedVectors,
1499
+ memoryReduction: `${reduction}%`
1500
+ };
1501
+ }
1502
+
1503
+ // Save to binary file using Bun APIs
1504
+ async saveToFile(filePath: string): Promise<void> {
1505
+ const buffer = this.serialize();
1506
+ await Bun.write(filePath, buffer);
1507
+ }
1508
+
1509
+ // Load from binary file using Bun APIs
1510
+ static async loadFromFile(filePath: string): Promise<HNSWIndex> {
1511
+ const file = Bun.file(filePath);
1512
+ const buffer = await file.arrayBuffer();
1513
+ return HNSWIndex.deserialize(buffer);
1514
+ }
1515
+
1516
+ // Clean up resources
1517
+ destroy(): void {
1518
+ // Clear all nodes to free memory
1519
+ this.nodes = [];
1520
+ this.nodeCount = 0;
1521
+ this.flatVectors = new Float32Array(0);
1522
+ this.flatVectorsCapacity = 0;
1523
+ }
1524
+
1525
+ /**
1526
+ * Get memory usage statistics
1527
+ */
1528
+ getMemoryUsage(): number {
1529
+ // Calculate approximate memory usage in bytes
1530
+ let totalBytes = 0;
1531
+
1532
+ // Node objects
1533
+ for (let i = 0; i < this.nodeCount; i++) {
1534
+ const node = this.nodes[i];
1535
+ if (!node) continue;
1536
+ // Node structure: id (4 bytes), level (4 bytes), vector (4*dimension), neighbors array overhead
1537
+ totalBytes += 8; // id + level
1538
+ totalBytes += node.vector.length * 4; // vector data
1539
+ totalBytes += 24; // neighbors array overhead (rough estimate)
1540
+
1541
+ // Neighbor connections
1542
+ for (const neighborList of node.neighbors) {
1543
+ totalBytes += neighborList.length * 4; // neighbor IDs
1544
+ totalBytes += 16; // array overhead per level
1545
+ }
1546
+ }
1547
+
1548
+ // Flat vector storage
1549
+ totalBytes += this.flatVectors.byteLength;
1550
+
1551
+ // Array overhead
1552
+ totalBytes += this.nodeCount * 8; // Array entry overhead (rough estimate)
1553
+
1554
+ // Object overhead
1555
+ totalBytes += 1024; // Base object overhead
1556
+
1557
+ return totalBytes;
1558
+ }
1559
+
1560
+ /**
1561
+ * Get all vectors for brute-force search
1562
+ */
1563
+ getAllVectors(): Map<number, Float32Array> {
1564
+ const result = new Map<number, Float32Array>();
1565
+ for (let i = 0; i < this.nodeCount; i++) {
1566
+ const node = this.nodes[i];
1567
+ if (node) result.set(node.id, node.vector);
1568
+ }
1569
+ return result;
1570
+ }
1571
+
1572
+ // ============================================
1573
+ // Quantized Search (Int8 with automatic rescoring)
1574
+ // ============================================
1575
+
1576
+ /**
1577
+ * Enable Int8 quantization for faster search with automatic rescoring.
1578
+ * Trains the quantizer on existing vectors and quantizes them.
1579
+ *
1580
+ * Performance:
1581
+ * - 4x memory reduction
1582
+ * - 3-4x faster distance calculations
1583
+ * - 99%+ recall with automatic rescoring
1584
+ *
1585
+ * @example
1586
+ * ```typescript
1587
+ * // After adding vectors
1588
+ * index.enableQuantization();
1589
+ *
1590
+ * // Now use quantized search (automatically rescores for high recall)
1591
+ * const results = index.searchKNNQuantized(query, 10);
1592
+ * ```
1593
+ */
1594
+ enableQuantization(): void {
1595
+ if (this.nodeCount === 0) {
1596
+ throw new Error('Cannot enable quantization on empty index. Add vectors first.');
1597
+ }
1598
+
1599
+ // Collect all vectors for training
1600
+ const vectors: Float32Array[] = [];
1601
+ for (let i = 0; i < this.nodeCount; i++) {
1602
+ const node = this.nodes[i];
1603
+ if (node) vectors.push(node.vector);
1604
+ }
1605
+
1606
+ // Initialize and train scalar (int8) quantizer
1607
+ this.scalarQuantizer = new ScalarQuantizer(this.dimension);
1608
+ this.scalarQuantizer.train(vectors);
1609
+
1610
+ // Quantize all existing vectors - use array instead of Map
1611
+ this.int8Vectors = new Array(this.nodeCount);
1612
+ for (let i = 0; i < this.nodeCount; i++) {
1613
+ const node = this.nodes[i];
1614
+ if (node) {
1615
+ this.int8Vectors[node.id] = this.scalarQuantizer.quantize(node.vector);
1616
+ }
1617
+ }
1618
+
1619
+ this.quantizationEnabled = true;
1620
+ }
1621
+
1622
+ /**
1623
+ * Check if quantization is enabled
1624
+ */
1625
+ isQuantizationEnabled(): boolean {
1626
+ return this.quantizationEnabled;
1627
+ }
1628
+
1629
+ /**
1630
+ * Fast quantized search with automatic rescoring.
1631
+ *
1632
+ * Uses Int8 quantized vectors for initial candidate retrieval (3-4x faster),
1633
+ * then rescores top candidates with float32 for accurate ranking.
1634
+ *
1635
+ * @param query Query vector
1636
+ * @param k Number of results to return
1637
+ * @param candidateMultiplier How many extra candidates to retrieve for rescoring (default: 3)
1638
+ * @param efSearch Search effort parameter
1639
+ * @returns Array of {id, distance} results (same format as searchKNN)
1640
+ *
1641
+ * Performance:
1642
+ * - 3-4x faster than float32 for distance calculations
1643
+ * - 99%+ recall with candidateMultiplier=3 (automatic rescoring)
1644
+ * - 4x memory reduction
1645
+ */
1646
+ searchKNNQuantized(
1647
+ query: Float32Array,
1648
+ k: number,
1649
+ candidateMultiplier: number = 3,
1650
+ efSearch?: number
1651
+ ): Array<{ id: number; distance: number }> {
1652
+ // Fallback to standard search if quantization not enabled
1653
+ if (!this.quantizationEnabled) {
1654
+ return this.searchKNN(query, k, efSearch);
1655
+ }
1656
+
1657
+ if (this.entryPointId === -1 || this.nodeCount === 0) {
1658
+ return [];
1659
+ }
1660
+
1661
+ // Normalize query if needed - reuse buffer for efficiency
1662
+ let normalizedQuery = query;
1663
+ if (this.vectorsAreNormalized) {
1664
+ this.queryNormBuffer.set(query);
1665
+ normalizedQuery = this.normalizeVector(this.queryNormBuffer);
1666
+ }
1667
+
1668
+ // Get more candidates than needed for rescoring
1669
+ const numCandidates = k * candidateMultiplier;
1670
+ const effectiveEf = efSearch || Math.max(numCandidates * 2, 50);
1671
+
1672
+ // Phase 1: Fast HNSW navigation using float32 (only for graph traversal)
1673
+ let currentEntryPoint = this.nodes[this.entryPointId]!;
1674
+ const entryVector = this.getNodeVector(this.entryPointId);
1675
+ if (!entryVector) return [];
1676
+ let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(normalizedQuery, entryVector) };
1677
+
1678
+ for (let l = this.maxLevel; l > 0; l--) {
1679
+ const result = this.greedySearch(normalizedQuery, currentEntryPoint, l);
1680
+ if (result.distance < currentBest.distance) {
1681
+ currentBest = result;
1682
+ currentEntryPoint = this.nodes[currentBest.id]!;
1683
+ }
1684
+ }
1685
+
1686
+ // Phase 2: Search layer 0 with quantized distance for speed
1687
+ const candidates = this.searchLayerQuantized(normalizedQuery, currentBest, 0, effectiveEf);
1688
+
1689
+ // Phase 3: Rescore top candidates with float32 for accuracy
1690
+ // Pre-allocate rescored array
1691
+ const rescoreCount = Math.min(candidates.length, numCandidates);
1692
+ const rescored = new Array<{ id: number; distance: number }>(rescoreCount);
1693
+
1694
+ for (let i = 0; i < rescoreCount; i++) {
1695
+ const c = candidates[i];
1696
+ const nodeVector = this.getNodeVector(c.id);
1697
+ if (nodeVector) {
1698
+ rescored[i] = { id: c.id, distance: this.calculateDistance(normalizedQuery, nodeVector) };
1699
+ } else {
1700
+ rescored[i] = c; // Keep original if node not found
1701
+ }
1702
+ }
1703
+
1704
+ // Sort by accurate distance
1705
+ rescored.sort((a, b) => a.distance - b.distance);
1706
+
1707
+ // Return top k
1708
+ if (rescored.length > k) rescored.length = k;
1709
+ return rescored;
1710
+ }
1711
+
1712
+ /**
1713
+ * Search layer using Int8 quantized distances for speed.
1714
+ * Same algorithm as searchLayer but uses faster Int8 distance calculations.
1715
+ */
1716
+ private searchLayerQuantized(
1717
+ query: Float32Array,
1718
+ nearest: { id: number; distance: number },
1719
+ layer: number,
1720
+ ef: number
1721
+ ): Array<{ id: number; distance: number }> {
1722
+ // Quantize query once
1723
+ const int8Query = this.scalarQuantizer ? this.scalarQuantizer.quantize(query) : null;
1724
+
1725
+ // Clear visited tracking
1726
+ this.clearVisited();
1727
+
1728
+ // Ensure heaps are large enough, then clear and reuse
1729
+ this.ensureHeapCapacity(ef);
1730
+ this.candidatesHeap.clear();
1731
+ this.resultsHeap.clear();
1732
+
1733
+ // Initialize with entry point
1734
+ this.markVisited(nearest.id);
1735
+ this.candidatesHeap.push(nearest.id, nearest.distance);
1736
+ this.resultsHeap.push(nearest.id, nearest.distance);
1737
+
1738
+ let furthestResultDist = nearest.distance;
1739
+
1740
+ while (!this.candidatesHeap.isEmpty()) {
1741
+ const closestCandidateDist = this.candidatesHeap.peekValue();
1742
+ const closestCandidateId = this.candidatesHeap.pop();
1743
+
1744
+ if (closestCandidateId === -1) continue;
1745
+
1746
+ // TERMINATION
1747
+ if (this.resultsHeap.size() >= ef && closestCandidateDist > furthestResultDist) {
1748
+ break;
1749
+ }
1750
+
1751
+ const node = this.nodes[closestCandidateId];
1752
+ if (!node) continue;
1753
+
1754
+ const neighbors = node.neighbors[layer] || [];
1755
+
1756
+ for (const neighborId of neighbors) {
1757
+ if (this.isVisited(neighborId)) continue;
1758
+ this.markVisited(neighborId);
1759
+
1760
+ // Use Int8 quantized distance for speed
1761
+ let distance: number;
1762
+ if (int8Query) {
1763
+ const neighborInt8 = this.int8Vectors[neighborId];
1764
+ if (neighborInt8) {
1765
+ // Use appropriate int8 distance based on metric
1766
+ if (this.metric === 'cosine') {
1767
+ distance = cosineDistanceInt8(int8Query, neighborInt8);
1768
+ } else {
1769
+ distance = l2SquaredInt8(int8Query, neighborInt8);
1770
+ }
1771
+ } else {
1772
+ // Fallback to float32
1773
+ const neighborNode = this.nodes[neighborId];
1774
+ if (!neighborNode) continue;
1775
+ distance = this.calculateDistance(query, neighborNode.vector);
1776
+ }
1777
+ } else {
1778
+ // Fallback to float32
1779
+ const neighborNode = this.nodes[neighborId];
1780
+ if (!neighborNode) continue;
1781
+ distance = this.calculateDistance(query, neighborNode.vector);
1782
+ }
1783
+
1784
+ if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
1785
+ this.candidatesHeap.push(neighborId, distance);
1786
+ this.resultsHeap.push(neighborId, distance);
1787
+
1788
+ if (this.resultsHeap.size() > ef) {
1789
+ this.resultsHeap.pop();
1790
+ }
1791
+ furthestResultDist = this.resultsHeap.peekValue();
1792
+ }
1793
+ }
1794
+ }
1795
+
1796
+ // Extract results from max-heap
1797
+ const resultCount = this.resultsHeap.size();
1798
+ const results: Array<{ id: number; distance: number }> = new Array(resultCount);
1799
+ let idx = resultCount - 1;
1800
+ while (!this.resultsHeap.isEmpty()) {
1801
+ const dist = this.resultsHeap.peekValue();
1802
+ const id = this.resultsHeap.pop();
1803
+ results[idx--] = { id, distance: dist };
1804
+ }
1805
+
1806
+ return results;
1807
+ }
1808
+
1809
+ /**
1810
+ * Get quantization statistics
1811
+ */
1812
+ getQuantizationStats(): {
1813
+ enabled: boolean;
1814
+ vectorCount: number;
1815
+ memoryReduction: string;
1816
+ expectedSpeedup: string;
1817
+ } {
1818
+ const vectorCount = this.nodeCount;
1819
+ const float32Size = vectorCount * this.dimension * 4;
1820
+
1821
+ if (this.quantizationEnabled) {
1822
+ const int8Size = vectorCount * this.dimension;
1823
+ const reduction = (float32Size / int8Size).toFixed(1);
1824
+ return {
1825
+ enabled: true,
1826
+ vectorCount,
1827
+ memoryReduction: `${reduction}x (${(float32Size / 1024 / 1024).toFixed(1)}MB → ${(int8Size / 1024 / 1024).toFixed(1)}MB)`,
1828
+ expectedSpeedup: '3-4x for distance calculations'
1829
+ };
1830
+ }
1831
+
1832
+ return {
1833
+ enabled: false,
1834
+ vectorCount,
1835
+ memoryReduction: '1x (no quantization)',
1836
+ expectedSpeedup: '1x (baseline)'
1837
+ };
1838
+ }
1839
+ }