verso-db 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +252 -0
- package/dist/BinaryHeap.d.ts +25 -0
- package/dist/BinaryHeap.d.ts.map +1 -0
- package/dist/Collection.d.ts +156 -0
- package/dist/Collection.d.ts.map +1 -0
- package/dist/HNSWIndex.d.ts +357 -0
- package/dist/HNSWIndex.d.ts.map +1 -0
- package/dist/MaxBinaryHeap.d.ts +63 -0
- package/dist/MaxBinaryHeap.d.ts.map +1 -0
- package/dist/Storage.d.ts +54 -0
- package/dist/Storage.d.ts.map +1 -0
- package/dist/VectorDB.d.ts +44 -0
- package/dist/VectorDB.d.ts.map +1 -0
- package/dist/backends/DistanceBackend.d.ts +5 -0
- package/dist/backends/DistanceBackend.d.ts.map +1 -0
- package/dist/backends/JsDistanceBackend.d.ts +37 -0
- package/dist/backends/JsDistanceBackend.d.ts.map +1 -0
- package/dist/encoding/DeltaEncoder.d.ts +61 -0
- package/dist/encoding/DeltaEncoder.d.ts.map +1 -0
- package/dist/errors.d.ts +58 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/index.d.ts +64 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3732 -0
- package/dist/presets.d.ts +91 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/quantization/ScalarQuantizer.d.ts +114 -0
- package/dist/quantization/ScalarQuantizer.d.ts.map +1 -0
- package/dist/storage/BatchWriter.d.ts +104 -0
- package/dist/storage/BatchWriter.d.ts.map +1 -0
- package/dist/storage/BunStorageBackend.d.ts +58 -0
- package/dist/storage/BunStorageBackend.d.ts.map +1 -0
- package/dist/storage/MemoryBackend.d.ts +44 -0
- package/dist/storage/MemoryBackend.d.ts.map +1 -0
- package/dist/storage/OPFSBackend.d.ts +59 -0
- package/dist/storage/OPFSBackend.d.ts.map +1 -0
- package/dist/storage/StorageBackend.d.ts +66 -0
- package/dist/storage/StorageBackend.d.ts.map +1 -0
- package/dist/storage/WriteAheadLog.d.ts +111 -0
- package/dist/storage/WriteAheadLog.d.ts.map +1 -0
- package/dist/storage/createStorageBackend.d.ts +40 -0
- package/dist/storage/createStorageBackend.d.ts.map +1 -0
- package/dist/storage/index.d.ts +30 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/package.json +98 -0
- package/src/BinaryHeap.ts +131 -0
- package/src/Collection.ts +695 -0
- package/src/HNSWIndex.ts +1839 -0
- package/src/MaxBinaryHeap.ts +175 -0
- package/src/Storage.ts +435 -0
- package/src/VectorDB.ts +109 -0
- package/src/backends/DistanceBackend.ts +17 -0
- package/src/backends/JsDistanceBackend.ts +227 -0
- package/src/encoding/DeltaEncoder.ts +217 -0
- package/src/errors.ts +110 -0
- package/src/index.ts +138 -0
- package/src/presets.ts +229 -0
- package/src/quantization/ScalarQuantizer.ts +383 -0
- package/src/storage/BatchWriter.ts +336 -0
- package/src/storage/BunStorageBackend.ts +161 -0
- package/src/storage/MemoryBackend.ts +120 -0
- package/src/storage/OPFSBackend.ts +250 -0
- package/src/storage/StorageBackend.ts +74 -0
- package/src/storage/WriteAheadLog.ts +326 -0
- package/src/storage/createStorageBackend.ts +137 -0
- package/src/storage/index.ts +53 -0
package/src/HNSWIndex.ts
ADDED
|
@@ -0,0 +1,1839 @@
|
|
|
1
|
+
// Bun-native file operations - no fs import needed
|
|
2
|
+
import type { DistanceBackend } from './backends/DistanceBackend';
|
|
3
|
+
import { JsDistanceBackend, dotProductFast, l2SquaredFast } from './backends/JsDistanceBackend';
|
|
4
|
+
import { BinaryHeap } from './BinaryHeap';
|
|
5
|
+
import { MaxBinaryHeap } from './MaxBinaryHeap';
|
|
6
|
+
import { ScalarQuantizer, l2SquaredInt8, cosineDistanceInt8 } from './quantization/ScalarQuantizer';
|
|
7
|
+
import { deltaEncodeNeighbors, deltaDecodeNeighbors, deltaEncodedSize } from './encoding/DeltaEncoder';
|
|
8
|
+
export type DistanceMetric = 'cosine' | 'euclidean' | 'dot_product';
|
|
9
|
+
|
|
10
|
+
export interface Node {
|
|
11
|
+
id: number;
|
|
12
|
+
level: number;
|
|
13
|
+
vector: Float32Array;
|
|
14
|
+
neighbors: number[][]; // neighbors[layer][neighbor_index] = neighbor_id
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class HNSWIndex {
|
|
18
|
+
private M: number; // Max number of connections per node per level
|
|
19
|
+
private M0: number; // Max number of connections for level 0 (typically M * 2)
|
|
20
|
+
private efConstruction: number; // Size of candidate list during construction
|
|
21
|
+
private levelMult: number; // Probability multiplier for level selection
|
|
22
|
+
private maxLevel: number;
|
|
23
|
+
private entryPointId: number;
|
|
24
|
+
// OPTIMIZATION: Use array instead of Map for O(1) indexed access (3-5x faster than Map.get)
|
|
25
|
+
private nodes: (Node | undefined)[];
|
|
26
|
+
private nodeCount: number = 0;
|
|
27
|
+
private dimension: number;
|
|
28
|
+
private metric: DistanceMetric;
|
|
29
|
+
private maxLayers: number;
|
|
30
|
+
private distanceBackend: DistanceBackend;
|
|
31
|
+
|
|
32
|
+
// OPTIMIZATION: Flat vector storage for cache-friendly batch distance calculations
|
|
33
|
+
// All vectors stored contiguously: [v0_d0, v0_d1, ..., v1_d0, v1_d1, ...]
|
|
34
|
+
private flatVectors: Float32Array;
|
|
35
|
+
private flatVectorsCapacity: number = 0;
|
|
36
|
+
|
|
37
|
+
// Heap and scratch buffers for reuse
|
|
38
|
+
private searchHeap: BinaryHeap;
|
|
39
|
+
// TypedArray for fast visited tracking - much faster than Set<number>
|
|
40
|
+
private visitedArray: Uint8Array;
|
|
41
|
+
private visitedArraySize: number;
|
|
42
|
+
private visitedGeneration: number = 0; // Increment to "clear" without filling
|
|
43
|
+
// Reusable heaps for searchLayer - avoids allocation on every call
|
|
44
|
+
private candidatesHeap: BinaryHeap;
|
|
45
|
+
private resultsHeap: MaxBinaryHeap;
|
|
46
|
+
private selectionHeap: BinaryHeap; // Reusable heap for selectNeighbors
|
|
47
|
+
private heapCapacity: number;
|
|
48
|
+
// Pre-normalization optimization for cosine distance
|
|
49
|
+
private vectorsAreNormalized: boolean = false;
|
|
50
|
+
// Cached distance function to avoid switch overhead
|
|
51
|
+
private distanceFn: (a: Float32Array, b: Float32Array) => number;
|
|
52
|
+
|
|
53
|
+
// Quantization support for 3-4x faster search with Int8
|
|
54
|
+
private scalarQuantizer: ScalarQuantizer | null = null;
|
|
55
|
+
// OPTIMIZATION: Use array instead of Map for int8 vectors too
|
|
56
|
+
private int8Vectors: (Int8Array | undefined)[] = [];
|
|
57
|
+
private quantizationEnabled: boolean = false;
|
|
58
|
+
|
|
59
|
+
// Lazy loading support (v3+ format)
|
|
60
|
+
private lazyLoadEnabled: boolean = false;
|
|
61
|
+
private vectorOffsets: Map<number, number> = new Map(); // nodeId -> byte offset in file
|
|
62
|
+
private vectorBuffer: ArrayBuffer | null = null; // Cached buffer for lazy loading
|
|
63
|
+
private vectorsLoaded: Set<number> = new Set(); // Track which vectors are loaded
|
|
64
|
+
|
|
65
|
+
// Reusable query buffer for search operations - avoids allocation per query
|
|
66
|
+
// Profiling showed 17% improvement with buffer reuse (99.5ms → 82.7ms for 1000 queries)
|
|
67
|
+
private queryNormBuffer: Float32Array;
|
|
68
|
+
|
|
69
|
+
// Bulk construction optimization - O(1) neighbor lookup during construction
|
|
70
|
+
// Uses parallel Set<number>[] alongside neighbor arrays for fast membership testing
|
|
71
|
+
// Memory is released after construction completes
|
|
72
|
+
private neighborSets: Map<number, Set<number>[]> = new Map();
|
|
73
|
+
private constructionMode: boolean = false;
|
|
74
|
+
|
|
75
|
+
constructor(dimension: number, metric: DistanceMetric = 'cosine', M = 24, efConstruction = 200, distanceBackend?: DistanceBackend) {
|
|
76
|
+
this.dimension = dimension;
|
|
77
|
+
this.metric = metric;
|
|
78
|
+
this.M = M;
|
|
79
|
+
this.M0 = M * 2;
|
|
80
|
+
this.efConstruction = efConstruction;
|
|
81
|
+
this.levelMult = 1 / Math.log(M);
|
|
82
|
+
this.maxLevel = -1;
|
|
83
|
+
this.entryPointId = -1;
|
|
84
|
+
// OPTIMIZATION: Pre-allocate node array with initial capacity
|
|
85
|
+
const initialCapacity = 10000;
|
|
86
|
+
this.nodes = new Array(initialCapacity);
|
|
87
|
+
this.nodeCount = 0;
|
|
88
|
+
// OPTIMIZATION: Pre-allocate flat vector storage
|
|
89
|
+
this.flatVectorsCapacity = initialCapacity;
|
|
90
|
+
this.flatVectors = new Float32Array(initialCapacity * dimension);
|
|
91
|
+
this.maxLayers = 32; // Maximum possible layers to pre-allocate
|
|
92
|
+
this.distanceBackend = distanceBackend ?? new JsDistanceBackend();
|
|
93
|
+
|
|
94
|
+
// Initialize heap and scratch buffers
|
|
95
|
+
this.searchHeap = new BinaryHeap(1000); // Initial capacity
|
|
96
|
+
|
|
97
|
+
// Initialize visited tracking with TypedArray for speed
|
|
98
|
+
// Start with reasonable size, will grow as needed
|
|
99
|
+
this.visitedArraySize = 10000;
|
|
100
|
+
this.visitedArray = new Uint8Array(this.visitedArraySize);
|
|
101
|
+
this.visitedGeneration = 1;
|
|
102
|
+
|
|
103
|
+
// Pre-allocate searchLayer heaps - sized for typical ef values
|
|
104
|
+
// Will be resized if needed for larger ef
|
|
105
|
+
this.heapCapacity = Math.max(efConstruction * 2, 500);
|
|
106
|
+
this.candidatesHeap = new BinaryHeap(this.heapCapacity);
|
|
107
|
+
this.resultsHeap = new MaxBinaryHeap(this.heapCapacity);
|
|
108
|
+
// Selection heap sized for M0 (largest neighbor list size)
|
|
109
|
+
this.selectionHeap = new BinaryHeap(Math.max(M * 2, efConstruction));
|
|
110
|
+
|
|
111
|
+
// For cosine metric, we pre-normalize vectors for faster distance computation
|
|
112
|
+
this.vectorsAreNormalized = (metric === 'cosine');
|
|
113
|
+
|
|
114
|
+
// Pre-allocate query normalization buffer - reused across all searches
|
|
115
|
+
this.queryNormBuffer = new Float32Array(dimension);
|
|
116
|
+
|
|
117
|
+
// Initialize cached distance function based on metric
|
|
118
|
+
// This avoids switch statement overhead on every distance calculation
|
|
119
|
+
if (metric === 'cosine') {
|
|
120
|
+
// For cosine with pre-normalized vectors, just compute 1 - dot product
|
|
121
|
+
this.distanceFn = (a: Float32Array, b: Float32Array): number => {
|
|
122
|
+
const dot = dotProductFast(a, b);
|
|
123
|
+
const distance = 1 - dot;
|
|
124
|
+
return distance < 1e-10 ? 0 : distance;
|
|
125
|
+
};
|
|
126
|
+
} else if (metric === 'euclidean') {
|
|
127
|
+
this.distanceFn = (a: Float32Array, b: Float32Array): number => {
|
|
128
|
+
return Math.sqrt(l2SquaredFast(a, b));
|
|
129
|
+
};
|
|
130
|
+
} else if (metric === 'dot_product') {
|
|
131
|
+
this.distanceFn = (a: Float32Array, b: Float32Array): number => {
|
|
132
|
+
return -dotProductFast(a, b);
|
|
133
|
+
};
|
|
134
|
+
} else {
|
|
135
|
+
throw new Error(`Unsupported metric: ${metric}`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// ============================================
|
|
140
|
+
// OPTIMIZATION: Capacity and flat storage helpers
|
|
141
|
+
// ============================================
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Ensure node array and flat vector storage have enough capacity
|
|
145
|
+
*/
|
|
146
|
+
private ensureCapacity(minCapacity: number): void {
|
|
147
|
+
// Grow node array if needed
|
|
148
|
+
if (minCapacity > this.nodes.length) {
|
|
149
|
+
const newCapacity = Math.max(this.nodes.length * 2, minCapacity);
|
|
150
|
+
const newNodes = new Array(newCapacity);
|
|
151
|
+
for (let i = 0; i < this.nodeCount; i++) {
|
|
152
|
+
newNodes[i] = this.nodes[i];
|
|
153
|
+
}
|
|
154
|
+
this.nodes = newNodes;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Grow flat vector storage if needed
|
|
158
|
+
if (minCapacity > this.flatVectorsCapacity) {
|
|
159
|
+
const newCapacity = Math.max(this.flatVectorsCapacity * 2, minCapacity);
|
|
160
|
+
const newFlatVectors = new Float32Array(newCapacity * this.dimension);
|
|
161
|
+
newFlatVectors.set(this.flatVectors);
|
|
162
|
+
this.flatVectors = newFlatVectors;
|
|
163
|
+
this.flatVectorsCapacity = newCapacity;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Get vector from flat storage by node ID
|
|
169
|
+
* Returns a subarray view (no copy) for efficiency
|
|
170
|
+
*/
|
|
171
|
+
private getFlatVector(nodeId: number): Float32Array {
|
|
172
|
+
const offset = nodeId * this.dimension;
|
|
173
|
+
return this.flatVectors.subarray(offset, offset + this.dimension);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Set vector in flat storage
|
|
178
|
+
*/
|
|
179
|
+
private setFlatVector(nodeId: number, vector: Float32Array): void {
|
|
180
|
+
const offset = nodeId * this.dimension;
|
|
181
|
+
this.flatVectors.set(vector, offset);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Get node by ID (O(1) array access)
|
|
186
|
+
*/
|
|
187
|
+
private getNode(id: number): Node | undefined {
|
|
188
|
+
return this.nodes[id];
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Set node by ID
|
|
193
|
+
*/
|
|
194
|
+
private setNode(node: Node): void {
|
|
195
|
+
const id = node.id;
|
|
196
|
+
this.ensureCapacity(id + 1);
|
|
197
|
+
this.nodes[id] = node;
|
|
198
|
+
// Store vector in flat storage too
|
|
199
|
+
this.setFlatVector(id, node.vector);
|
|
200
|
+
// Track node count
|
|
201
|
+
if (id >= this.nodeCount) {
|
|
202
|
+
this.nodeCount = id + 1;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// OPTIMIZATION: Reusable arrays for batch distance calculation
|
|
207
|
+
private batchNeighborIds: number[] = [];
|
|
208
|
+
private batchDistances: number[] = [];
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* OPTIMIZATION: Batch distance calculation for better cache locality
|
|
212
|
+
* Computes distances from query to multiple neighbors at once
|
|
213
|
+
* Uses flat vector storage for contiguous memory access
|
|
214
|
+
*/
|
|
215
|
+
private calculateDistancesBatch(
|
|
216
|
+
query: Float32Array,
|
|
217
|
+
neighborIds: number[],
|
|
218
|
+
outDistances: number[]
|
|
219
|
+
): void {
|
|
220
|
+
const dim = this.dimension;
|
|
221
|
+
const flatVectors = this.flatVectors;
|
|
222
|
+
|
|
223
|
+
for (let i = 0; i < neighborIds.length; i++) {
|
|
224
|
+
const neighborId = neighborIds[i];
|
|
225
|
+
const offset = neighborId * dim;
|
|
226
|
+
|
|
227
|
+
// Inline distance calculation for better performance
|
|
228
|
+
// This avoids function call overhead per neighbor
|
|
229
|
+
if (this.metric === 'cosine') {
|
|
230
|
+
// Pre-normalized vectors: distance = 1 - dot(a, b)
|
|
231
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
232
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
233
|
+
let d = 0;
|
|
234
|
+
const limit8 = dim - 7;
|
|
235
|
+
|
|
236
|
+
for (; d < limit8; d += 8) {
|
|
237
|
+
sum0 += flatVectors[offset + d] * query[d];
|
|
238
|
+
sum1 += flatVectors[offset + d + 1] * query[d + 1];
|
|
239
|
+
sum2 += flatVectors[offset + d + 2] * query[d + 2];
|
|
240
|
+
sum3 += flatVectors[offset + d + 3] * query[d + 3];
|
|
241
|
+
sum4 += flatVectors[offset + d + 4] * query[d + 4];
|
|
242
|
+
sum5 += flatVectors[offset + d + 5] * query[d + 5];
|
|
243
|
+
sum6 += flatVectors[offset + d + 6] * query[d + 6];
|
|
244
|
+
sum7 += flatVectors[offset + d + 7] * query[d + 7];
|
|
245
|
+
}
|
|
246
|
+
for (; d < dim; d++) {
|
|
247
|
+
sum0 += flatVectors[offset + d] * query[d];
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const dot = sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
|
|
251
|
+
const dist = 1 - dot;
|
|
252
|
+
outDistances[i] = dist < 1e-10 ? 0 : dist;
|
|
253
|
+
} else if (this.metric === 'euclidean') {
|
|
254
|
+
// L2 squared distance
|
|
255
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
256
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
257
|
+
let d = 0;
|
|
258
|
+
const limit8 = dim - 7;
|
|
259
|
+
|
|
260
|
+
for (; d < limit8; d += 8) {
|
|
261
|
+
const d0 = flatVectors[offset + d] - query[d];
|
|
262
|
+
const d1 = flatVectors[offset + d + 1] - query[d + 1];
|
|
263
|
+
const d2 = flatVectors[offset + d + 2] - query[d + 2];
|
|
264
|
+
const d3 = flatVectors[offset + d + 3] - query[d + 3];
|
|
265
|
+
const d4 = flatVectors[offset + d + 4] - query[d + 4];
|
|
266
|
+
const d5 = flatVectors[offset + d + 5] - query[d + 5];
|
|
267
|
+
const d6 = flatVectors[offset + d + 6] - query[d + 6];
|
|
268
|
+
const d7 = flatVectors[offset + d + 7] - query[d + 7];
|
|
269
|
+
sum0 += d0 * d0;
|
|
270
|
+
sum1 += d1 * d1;
|
|
271
|
+
sum2 += d2 * d2;
|
|
272
|
+
sum3 += d3 * d3;
|
|
273
|
+
sum4 += d4 * d4;
|
|
274
|
+
sum5 += d5 * d5;
|
|
275
|
+
sum6 += d6 * d6;
|
|
276
|
+
sum7 += d7 * d7;
|
|
277
|
+
}
|
|
278
|
+
for (; d < dim; d++) {
|
|
279
|
+
const diff = flatVectors[offset + d] - query[d];
|
|
280
|
+
sum0 += diff * diff;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
outDistances[i] = Math.sqrt(sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7);
|
|
284
|
+
} else {
|
|
285
|
+
// dot_product: negative dot product
|
|
286
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
287
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
288
|
+
let d = 0;
|
|
289
|
+
const limit8 = dim - 7;
|
|
290
|
+
|
|
291
|
+
for (; d < limit8; d += 8) {
|
|
292
|
+
sum0 += flatVectors[offset + d] * query[d];
|
|
293
|
+
sum1 += flatVectors[offset + d + 1] * query[d + 1];
|
|
294
|
+
sum2 += flatVectors[offset + d + 2] * query[d + 2];
|
|
295
|
+
sum3 += flatVectors[offset + d + 3] * query[d + 3];
|
|
296
|
+
sum4 += flatVectors[offset + d + 4] * query[d + 4];
|
|
297
|
+
sum5 += flatVectors[offset + d + 5] * query[d + 5];
|
|
298
|
+
sum6 += flatVectors[offset + d + 6] * query[d + 6];
|
|
299
|
+
sum7 += flatVectors[offset + d + 7] * query[d + 7];
|
|
300
|
+
}
|
|
301
|
+
for (; d < dim; d++) {
|
|
302
|
+
sum0 += flatVectors[offset + d] * query[d];
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
outDistances[i] = -(sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Check if a node has been visited in the current search.
|
|
312
|
+
* Uses generation counting to avoid clearing the array.
|
|
313
|
+
*/
|
|
314
|
+
private isVisited(id: number): boolean {
|
|
315
|
+
if (id >= this.visitedArraySize) {
|
|
316
|
+
return false; // Not in array = not visited
|
|
317
|
+
}
|
|
318
|
+
return this.visitedArray[id] === this.visitedGeneration;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Mark a node as visited in the current search.
|
|
323
|
+
* Grows the array if needed.
|
|
324
|
+
*/
|
|
325
|
+
private markVisited(id: number): void {
|
|
326
|
+
if (id >= this.visitedArraySize) {
|
|
327
|
+
// Grow array to accommodate larger IDs
|
|
328
|
+
const newSize = Math.max(this.visitedArraySize * 2, id + 1000);
|
|
329
|
+
const newArray = new Uint8Array(newSize);
|
|
330
|
+
newArray.set(this.visitedArray);
|
|
331
|
+
this.visitedArray = newArray;
|
|
332
|
+
this.visitedArraySize = newSize;
|
|
333
|
+
}
|
|
334
|
+
this.visitedArray[id] = this.visitedGeneration;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Clear all visited markers by incrementing the generation.
|
|
339
|
+
* Much faster than filling the array with zeros.
|
|
340
|
+
*/
|
|
341
|
+
private clearVisited(): void {
|
|
342
|
+
this.visitedGeneration++;
|
|
343
|
+
// Wrap around to avoid overflow (255 is max for Uint8)
|
|
344
|
+
if (this.visitedGeneration > 250) {
|
|
345
|
+
this.visitedArray.fill(0);
|
|
346
|
+
this.visitedGeneration = 1;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
private normalizeVector(vector: Float32Array): Float32Array {
|
|
351
|
+
const len = vector.length;
|
|
352
|
+
// Use 8 accumulators for better ILP
|
|
353
|
+
let s0 = 0, s1 = 0, s2 = 0, s3 = 0;
|
|
354
|
+
let s4 = 0, s5 = 0, s6 = 0, s7 = 0;
|
|
355
|
+
let i = 0;
|
|
356
|
+
const limit8 = len - 7;
|
|
357
|
+
|
|
358
|
+
// 8-wide unrolling for norm computation
|
|
359
|
+
for (; i < limit8; i += 8) {
|
|
360
|
+
s0 += vector[i] * vector[i];
|
|
361
|
+
s1 += vector[i + 1] * vector[i + 1];
|
|
362
|
+
s2 += vector[i + 2] * vector[i + 2];
|
|
363
|
+
s3 += vector[i + 3] * vector[i + 3];
|
|
364
|
+
s4 += vector[i + 4] * vector[i + 4];
|
|
365
|
+
s5 += vector[i + 5] * vector[i + 5];
|
|
366
|
+
s6 += vector[i + 6] * vector[i + 6];
|
|
367
|
+
s7 += vector[i + 7] * vector[i + 7];
|
|
368
|
+
}
|
|
369
|
+
// Handle remaining elements
|
|
370
|
+
for (; i < len; i++) {
|
|
371
|
+
s0 += vector[i] * vector[i];
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
const normSq = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
|
|
375
|
+
const norm = Math.sqrt(normSq);
|
|
376
|
+
if (norm > 0) {
|
|
377
|
+
const invNorm = 1 / norm;
|
|
378
|
+
for (let j = 0; j < len; j++) {
|
|
379
|
+
vector[j] *= invNorm;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
return vector;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
private selectLevel(): number {
|
|
386
|
+
const r = Math.random();
|
|
387
|
+
const level = Math.floor(-Math.log(r) * this.levelMult);
|
|
388
|
+
return Math.min(level, this.maxLayers - 1);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* Calculate distance between two vectors using the configured metric.
|
|
393
|
+
* Uses cached function pointer to avoid switch overhead.
|
|
394
|
+
*/
|
|
395
|
+
calculateDistance(a: Float32Array, b: Float32Array): number {
|
|
396
|
+
return this.distanceFn(a, b);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
/**
|
|
400
|
+
* Get a node's vector, loading it if necessary (for lazy loading support)
|
|
401
|
+
*/
|
|
402
|
+
private getNodeVector(nodeId: number): Float32Array | null {
|
|
403
|
+
const node = this.nodes[nodeId];
|
|
404
|
+
if (!node) return null;
|
|
405
|
+
|
|
406
|
+
if (this.lazyLoadEnabled && !this.vectorsLoaded.has(nodeId)) {
|
|
407
|
+
this.loadVector(nodeId);
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
return node.vector;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
private getLayerMaxConnections(layer: number): number {
|
|
414
|
+
return layer === 0 ? this.M0 : this.M;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
private selectNeighbors(currentId: number, candidates: Array<{ id: number; distance: number }>, layer: number): number[] {
|
|
418
|
+
const maxConnections = this.getLayerMaxConnections(layer);
|
|
419
|
+
|
|
420
|
+
// Reuse selection heap - clear and ensure capacity
|
|
421
|
+
this.selectionHeap.clear();
|
|
422
|
+
|
|
423
|
+
// Add all candidates to the heap (skip self-reference)
|
|
424
|
+
for (const candidate of candidates) {
|
|
425
|
+
if (candidate.id !== currentId) {
|
|
426
|
+
this.selectionHeap.push(candidate.id, candidate.distance);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// Extract the best neighbors (closest first)
|
|
431
|
+
// Pre-allocate array for expected size
|
|
432
|
+
const selected = new Array<number>(Math.min(maxConnections, this.selectionHeap.size()));
|
|
433
|
+
let idx = 0;
|
|
434
|
+
|
|
435
|
+
// Extract up to maxConnections elements from heap
|
|
436
|
+
// Candidates from searchLayer are already unique (visited tracking)
|
|
437
|
+
while (idx < maxConnections && !this.selectionHeap.isEmpty()) {
|
|
438
|
+
const id = this.selectionHeap.pop();
|
|
439
|
+
if (id !== -1) {
|
|
440
|
+
selected[idx++] = id;
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// Trim if needed
|
|
445
|
+
if (idx < selected.length) {
|
|
446
|
+
selected.length = idx;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
return selected;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
private addBidirectionalConnection(fromId: number, toId: number, level: number): void {
|
|
453
|
+
const fromNode = this.nodes[fromId];
|
|
454
|
+
const toNode = this.nodes[toId];
|
|
455
|
+
|
|
456
|
+
if (!fromNode || !toNode) return;
|
|
457
|
+
|
|
458
|
+
// Ensure neighbor arrays exist
|
|
459
|
+
if (!fromNode.neighbors[level]) {
|
|
460
|
+
fromNode.neighbors[level] = [];
|
|
461
|
+
}
|
|
462
|
+
if (!toNode.neighbors[level]) {
|
|
463
|
+
toNode.neighbors[level] = [];
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
if (this.constructionMode) {
|
|
467
|
+
// O(1) lookup using Sets during bulk construction
|
|
468
|
+
let fromSets = this.neighborSets.get(fromId);
|
|
469
|
+
if (!fromSets) {
|
|
470
|
+
fromSets = [];
|
|
471
|
+
this.neighborSets.set(fromId, fromSets);
|
|
472
|
+
}
|
|
473
|
+
if (!fromSets[level]) {
|
|
474
|
+
fromSets[level] = new Set(fromNode.neighbors[level]);
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
let toSets = this.neighborSets.get(toId);
|
|
478
|
+
if (!toSets) {
|
|
479
|
+
toSets = [];
|
|
480
|
+
this.neighborSets.set(toId, toSets);
|
|
481
|
+
}
|
|
482
|
+
if (!toSets[level]) {
|
|
483
|
+
toSets[level] = new Set(toNode.neighbors[level]);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
// O(1) membership test with Set.has()
|
|
487
|
+
if (!fromSets[level].has(toId)) {
|
|
488
|
+
fromSets[level].add(toId);
|
|
489
|
+
fromNode.neighbors[level].push(toId);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (!toSets[level].has(fromId)) {
|
|
493
|
+
toSets[level].add(fromId);
|
|
494
|
+
toNode.neighbors[level].push(fromId);
|
|
495
|
+
}
|
|
496
|
+
} else {
|
|
497
|
+
// Original O(M) lookup for single inserts (fallback)
|
|
498
|
+
if (!fromNode.neighbors[level].includes(toId)) {
|
|
499
|
+
fromNode.neighbors[level].push(toId);
|
|
500
|
+
}
|
|
501
|
+
if (!toNode.neighbors[level].includes(fromId)) {
|
|
502
|
+
toNode.neighbors[level].push(fromId);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
/**
|
|
508
|
+
* Ensure heap capacity is sufficient for the given ef value.
|
|
509
|
+
* Resizes heaps if needed.
|
|
510
|
+
*/
|
|
511
|
+
private ensureHeapCapacity(ef: number): void {
|
|
512
|
+
const requiredCapacity = Math.max(ef * 2, 100);
|
|
513
|
+
if (requiredCapacity > this.heapCapacity) {
|
|
514
|
+
this.heapCapacity = requiredCapacity;
|
|
515
|
+
this.candidatesHeap = new BinaryHeap(this.heapCapacity);
|
|
516
|
+
this.resultsHeap = new MaxBinaryHeap(this.heapCapacity);
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Search a layer using the standard two-heap HNSW algorithm.
|
|
522
|
+
*
|
|
523
|
+
* Uses two heaps:
|
|
524
|
+
* - candidatesHeap (min-heap): Tracks nodes to explore, prioritizing closest
|
|
525
|
+
* - resultsHeap (max-heap): Tracks top-ef results, allowing O(log n) eviction of furthest
|
|
526
|
+
*
|
|
527
|
+
* Termination: Stops when closest unvisited candidate is farther than furthest result.
|
|
528
|
+
*/
|
|
529
|
+
private searchLayer(
|
|
530
|
+
query: Float32Array,
|
|
531
|
+
nearest: { id: number; distance: number },
|
|
532
|
+
layer: number,
|
|
533
|
+
ef: number
|
|
534
|
+
): Array<{ id: number; distance: number }> {
|
|
535
|
+
// Clear visited tracking
|
|
536
|
+
this.clearVisited();
|
|
537
|
+
|
|
538
|
+
// Ensure heaps are large enough, then clear and reuse
|
|
539
|
+
this.ensureHeapCapacity(ef);
|
|
540
|
+
this.candidatesHeap.clear();
|
|
541
|
+
this.resultsHeap.clear();
|
|
542
|
+
|
|
543
|
+
// Initialize with entry point
|
|
544
|
+
this.markVisited(nearest.id);
|
|
545
|
+
this.candidatesHeap.push(nearest.id, nearest.distance);
|
|
546
|
+
this.resultsHeap.push(nearest.id, nearest.distance);
|
|
547
|
+
|
|
548
|
+
// Cache the furthest result distance - only changes when resultsHeap is modified
|
|
549
|
+
let furthestResultDist = nearest.distance;
|
|
550
|
+
|
|
551
|
+
// OPTIMIZATION: Pre-allocate batch arrays for distance calculation
|
|
552
|
+
// Reuse across iterations to avoid allocation
|
|
553
|
+
const batchIds = this.batchNeighborIds;
|
|
554
|
+
const batchDists = this.batchDistances;
|
|
555
|
+
|
|
556
|
+
while (!this.candidatesHeap.isEmpty()) {
|
|
557
|
+
// Get closest unexplored candidate
|
|
558
|
+
const closestCandidateDist = this.candidatesHeap.peekValue();
|
|
559
|
+
const closestCandidateId = this.candidatesHeap.pop();
|
|
560
|
+
|
|
561
|
+
if (closestCandidateId === -1) continue;
|
|
562
|
+
|
|
563
|
+
// TERMINATION: Stop if closest candidate is farther than worst result
|
|
564
|
+
if (this.resultsHeap.size() >= ef && closestCandidateDist > furthestResultDist) {
|
|
565
|
+
break;
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
const node = this.nodes[closestCandidateId];
|
|
569
|
+
if (!node) continue;
|
|
570
|
+
|
|
571
|
+
const neighbors = node.neighbors[layer] || [];
|
|
572
|
+
|
|
573
|
+
// Use batch distance calculation for non-lazy indices (better cache locality)
|
|
574
|
+
// Fall back to one-by-one for lazy-loaded indices (vectors may not be in flatVectors)
|
|
575
|
+
if (!this.lazyLoadEnabled) {
|
|
576
|
+
// OPTIMIZATION: Collect unvisited neighbors and compute distances in batch
|
|
577
|
+
let batchCount = 0;
|
|
578
|
+
for (let i = 0; i < neighbors.length; i++) {
|
|
579
|
+
const neighborId = neighbors[i];
|
|
580
|
+
if (!this.isVisited(neighborId)) {
|
|
581
|
+
this.markVisited(neighborId);
|
|
582
|
+
batchIds[batchCount] = neighborId;
|
|
583
|
+
batchCount++;
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
// Calculate all distances at once (better cache utilization)
|
|
588
|
+
if (batchCount > 0) {
|
|
589
|
+
// Ensure batch arrays are large enough
|
|
590
|
+
if (batchDists.length < batchCount) {
|
|
591
|
+
this.batchDistances.length = batchCount;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
this.calculateDistancesBatch(query, batchIds.slice(0, batchCount), batchDists);
|
|
595
|
+
|
|
596
|
+
// Process batch results
|
|
597
|
+
for (let i = 0; i < batchCount; i++) {
|
|
598
|
+
const neighborId = batchIds[i];
|
|
599
|
+
const distance = batchDists[i];
|
|
600
|
+
|
|
601
|
+
// Add to results if it's good enough
|
|
602
|
+
if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
|
|
603
|
+
this.candidatesHeap.push(neighborId, distance);
|
|
604
|
+
this.resultsHeap.push(neighborId, distance);
|
|
605
|
+
|
|
606
|
+
// Maintain max size of ef and update cached furthest distance
|
|
607
|
+
if (this.resultsHeap.size() > ef) {
|
|
608
|
+
this.resultsHeap.pop(); // Remove furthest (O(log n))
|
|
609
|
+
}
|
|
610
|
+
furthestResultDist = this.resultsHeap.peekValue();
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
} else {
|
|
615
|
+
// Original one-by-one for lazy-loaded indices
|
|
616
|
+
for (const neighborId of neighbors) {
|
|
617
|
+
if (this.isVisited(neighborId)) continue;
|
|
618
|
+
this.markVisited(neighborId);
|
|
619
|
+
|
|
620
|
+
const neighborVector = this.getNodeVector(neighborId);
|
|
621
|
+
if (!neighborVector) continue;
|
|
622
|
+
|
|
623
|
+
const distance = this.calculateDistance(query, neighborVector);
|
|
624
|
+
|
|
625
|
+
// Add to results if it's good enough
|
|
626
|
+
if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
|
|
627
|
+
this.candidatesHeap.push(neighborId, distance);
|
|
628
|
+
this.resultsHeap.push(neighborId, distance);
|
|
629
|
+
|
|
630
|
+
// Maintain max size of ef and update cached furthest distance
|
|
631
|
+
if (this.resultsHeap.size() > ef) {
|
|
632
|
+
this.resultsHeap.pop(); // Remove furthest (O(log n))
|
|
633
|
+
}
|
|
634
|
+
furthestResultDist = this.resultsHeap.peekValue();
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
// Extract results from max-heap into pre-sized array
|
|
641
|
+
// Build in reverse order to avoid reverse() call
|
|
642
|
+
const resultCount = this.resultsHeap.size();
|
|
643
|
+
const results: Array<{ id: number; distance: number }> = new Array(resultCount);
|
|
644
|
+
let idx = resultCount - 1;
|
|
645
|
+
while (!this.resultsHeap.isEmpty()) {
|
|
646
|
+
const dist = this.resultsHeap.peekValue();
|
|
647
|
+
const id = this.resultsHeap.pop();
|
|
648
|
+
results[idx--] = { id, distance: dist };
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
return results;
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
private greedySearch(query: Float32Array, entryNode: Node, level: number): { id: number; distance: number } {
|
|
655
|
+
// Simplified greedy search - no heap needed, just follow the best neighbor
|
|
656
|
+
this.clearVisited();
|
|
657
|
+
|
|
658
|
+
let currentNode = entryNode;
|
|
659
|
+
// Load entry node vector if lazy loading is enabled
|
|
660
|
+
const entryVector = this.getNodeVector(entryNode.id);
|
|
661
|
+
let currentDistance = entryVector ? this.calculateDistance(query, entryVector) : Infinity;
|
|
662
|
+
this.markVisited(currentNode.id);
|
|
663
|
+
|
|
664
|
+
// Keep following the best neighbor until no improvement
|
|
665
|
+
let improved = true;
|
|
666
|
+
while (improved) {
|
|
667
|
+
improved = false;
|
|
668
|
+
const neighbors = currentNode.neighbors[level] || [];
|
|
669
|
+
|
|
670
|
+
for (const neighborId of neighbors) {
|
|
671
|
+
if (this.isVisited(neighborId)) continue;
|
|
672
|
+
this.markVisited(neighborId);
|
|
673
|
+
|
|
674
|
+
const neighborVector = this.getNodeVector(neighborId);
|
|
675
|
+
if (!neighborVector) continue;
|
|
676
|
+
|
|
677
|
+
const neighborNode = this.nodes[neighborId];
|
|
678
|
+
if (!neighborNode) continue;
|
|
679
|
+
|
|
680
|
+
const distance = this.calculateDistance(query, neighborVector);
|
|
681
|
+
|
|
682
|
+
if (distance < currentDistance) {
|
|
683
|
+
currentDistance = distance;
|
|
684
|
+
currentNode = neighborNode;
|
|
685
|
+
improved = true;
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
return { id: currentNode.id, distance: currentDistance };
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
/**
|
|
694
|
+
* Add a point to the index (async wrapper for API compatibility)
|
|
695
|
+
* For bulk operations, use addPointsBulk() which uses the faster sync version internally
|
|
696
|
+
*/
|
|
697
|
+
async addPoint(id: number, vector: number[] | Float32Array, options?: { skipNormalization?: boolean }): Promise<void> {
|
|
698
|
+
this.addPointSync(id, vector, options);
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* Synchronous version of addPoint - avoids async/await microtask overhead
|
|
703
|
+
* 10-15x faster for bulk insertions where async is not needed
|
|
704
|
+
* @param skipNormalization - Set true if vectors are already unit-normalized (e.g., Cohere embeddings)
|
|
705
|
+
*/
|
|
706
|
+
addPointSync(id: number, vector: number[] | Float32Array, options?: { skipNormalization?: boolean }): void {
|
|
707
|
+
// Optimize: only copy when necessary
|
|
708
|
+
// - Always copy arrays (need Float32Array)
|
|
709
|
+
// - Copy Float32Array only if we need to normalize (modifies in place)
|
|
710
|
+
// - Reuse input directly if skipNormalization is set (caller guarantees immutability)
|
|
711
|
+
let floatVector: Float32Array;
|
|
712
|
+
if (Array.isArray(vector)) {
|
|
713
|
+
floatVector = new Float32Array(vector);
|
|
714
|
+
} else if (this.vectorsAreNormalized && !options?.skipNormalization) {
|
|
715
|
+
// Need to copy because normalizeVector modifies in place
|
|
716
|
+
floatVector = new Float32Array(vector);
|
|
717
|
+
} else {
|
|
718
|
+
// No normalization needed and input is Float32Array - use directly
|
|
719
|
+
// Note: caller should not modify this array after passing it
|
|
720
|
+
floatVector = vector;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
if (floatVector.length !== this.dimension) {
|
|
724
|
+
throw new Error(`Vector dimension ${floatVector.length} does not match expected ${this.dimension}`);
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
// Pre-normalize vectors for cosine metric for faster distance computation
|
|
728
|
+
// Skip if caller indicates vectors are already normalized
|
|
729
|
+
if (this.vectorsAreNormalized && !options?.skipNormalization) {
|
|
730
|
+
floatVector = this.normalizeVector(floatVector);
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// Create new node
|
|
734
|
+
const level = this.selectLevel();
|
|
735
|
+
// Pre-allocate neighbors array without Array.from overhead
|
|
736
|
+
const neighbors = new Array<number[]>(level + 1);
|
|
737
|
+
for (let i = 0; i <= level; i++) {
|
|
738
|
+
neighbors[i] = [];
|
|
739
|
+
}
|
|
740
|
+
const newNode: Node = {
|
|
741
|
+
id,
|
|
742
|
+
level,
|
|
743
|
+
vector: floatVector,
|
|
744
|
+
neighbors,
|
|
745
|
+
};
|
|
746
|
+
|
|
747
|
+
this.setNode(newNode);
|
|
748
|
+
|
|
749
|
+
// If this is the first node, make it the entry point
|
|
750
|
+
if (this.entryPointId === -1) {
|
|
751
|
+
this.entryPointId = id;
|
|
752
|
+
this.maxLevel = level;
|
|
753
|
+
return;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
// Find the entry point at the highest level
|
|
757
|
+
let currentEntryPoint = this.nodes[this.entryPointId]!;
|
|
758
|
+
let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(floatVector, currentEntryPoint.vector) };
|
|
759
|
+
|
|
760
|
+
// Go down from max level to insertion level
|
|
761
|
+
for (let l = this.maxLevel; l > level; l--) {
|
|
762
|
+
const result = this.greedySearch(floatVector, currentEntryPoint, l);
|
|
763
|
+
if (result.distance < currentBest.distance) {
|
|
764
|
+
currentBest = result;
|
|
765
|
+
currentEntryPoint = this.nodes[currentBest.id]!;
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
|
|
769
|
+
// Now connect at each level from the insertion level down to 0
|
|
770
|
+
for (let l = Math.min(level, this.maxLevel); l >= 0; l--) {
|
|
771
|
+
// Search in the current level
|
|
772
|
+
const searchResults = this.searchLayer(floatVector, currentBest, l, this.efConstruction);
|
|
773
|
+
|
|
774
|
+
// Get neighbors for this level
|
|
775
|
+
const neighbors = this.selectNeighbors(id, searchResults, l);
|
|
776
|
+
|
|
777
|
+
// Add bidirectional connections
|
|
778
|
+
for (const neighborId of neighbors) {
|
|
779
|
+
this.addBidirectionalConnection(id, neighborId, l);
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
// Update the current best for the next level
|
|
783
|
+
if (searchResults.length > 0) {
|
|
784
|
+
currentBest = searchResults[0];
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
|
|
788
|
+
// Update the entry point if a higher level was created
|
|
789
|
+
if (level > this.maxLevel) {
|
|
790
|
+
this.maxLevel = level;
|
|
791
|
+
this.entryPointId = id;
|
|
792
|
+
}
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
searchKNN(query: Float32Array, k: number, efSearch?: number): Array<{ id: number; distance: number }> {
|
|
796
|
+
if (this.entryPointId === -1 || this.nodeCount === 0) {
|
|
797
|
+
return [];
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
if (query.length !== this.dimension) {
|
|
801
|
+
throw new Error(`Query dimension ${query.length} does not match expected ${this.dimension}`);
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
const effectiveEf = efSearch || Math.max(k * 2, 50);
|
|
805
|
+
|
|
806
|
+
// Normalize query vector for cosine metric to match stored normalized vectors
|
|
807
|
+
// Reuse pre-allocated buffer to avoid allocation per query (17% measured speedup)
|
|
808
|
+
let normalizedQuery = query;
|
|
809
|
+
if (this.vectorsAreNormalized) {
|
|
810
|
+
// Copy to reusable buffer and normalize in place
|
|
811
|
+
this.queryNormBuffer.set(query);
|
|
812
|
+
normalizedQuery = this.normalizeVector(this.queryNormBuffer);
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// Start from the entry point at the highest level
|
|
816
|
+
let currentEntryPoint = this.nodes[this.entryPointId]!;
|
|
817
|
+
const entryVector = this.getNodeVector(this.entryPointId);
|
|
818
|
+
if (!entryVector) return [];
|
|
819
|
+
let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(normalizedQuery, entryVector) };
|
|
820
|
+
|
|
821
|
+
// Go down from max level to level 1
|
|
822
|
+
for (let l = this.maxLevel; l > 0; l--) {
|
|
823
|
+
const result = this.greedySearch(normalizedQuery, currentEntryPoint, l);
|
|
824
|
+
if (result.distance < currentBest.distance) {
|
|
825
|
+
currentBest = result;
|
|
826
|
+
currentEntryPoint = this.nodes[currentBest.id]!;
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
// At level 0, perform detailed search
|
|
831
|
+
const candidates = this.searchLayer(normalizedQuery, currentBest, 0, effectiveEf);
|
|
832
|
+
|
|
833
|
+
// Sort candidates by distance, then by ID for consistent tie-breaking
|
|
834
|
+
// Sort in place since we own this array from searchLayer
|
|
835
|
+
candidates.sort((a, b) => {
|
|
836
|
+
const diff = a.distance - b.distance;
|
|
837
|
+
return diff !== 0 ? diff : a.id - b.id;
|
|
838
|
+
});
|
|
839
|
+
|
|
840
|
+
// Return only top k results - truncate in place instead of slice
|
|
841
|
+
if (candidates.length > k) candidates.length = k;
|
|
842
|
+
|
|
843
|
+
return candidates;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
/**
|
|
847
|
+
* Batch search for multiple query vectors.
|
|
848
|
+
* More efficient than calling searchKNN multiple times as it reuses internal buffers.
|
|
849
|
+
*
|
|
850
|
+
* @param queries Array of query vectors
|
|
851
|
+
* @param k Number of nearest neighbors to return per query
|
|
852
|
+
* @param efSearch Search effort parameter (higher = better recall, slower)
|
|
853
|
+
* @returns Array of results, one per query
|
|
854
|
+
*/
|
|
855
|
+
searchKNNBatch(
|
|
856
|
+
queries: Float32Array[],
|
|
857
|
+
k: number,
|
|
858
|
+
efSearch?: number
|
|
859
|
+
): Array<Array<{ id: number; distance: number }>> {
|
|
860
|
+
const numQueries = queries.length;
|
|
861
|
+
|
|
862
|
+
if (this.entryPointId === -1 || this.nodeCount === 0) {
|
|
863
|
+
// Pre-allocate empty result arrays
|
|
864
|
+
const emptyResults = new Array<Array<{ id: number; distance: number }>>(numQueries);
|
|
865
|
+
for (let i = 0; i < numQueries; i++) {
|
|
866
|
+
emptyResults[i] = [];
|
|
867
|
+
}
|
|
868
|
+
return emptyResults;
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
// Pre-allocate results array
|
|
872
|
+
const results = new Array<Array<{ id: number; distance: number }>>(numQueries);
|
|
873
|
+
|
|
874
|
+
// Clear and reuse internal buffers
|
|
875
|
+
this.clearVisited();
|
|
876
|
+
|
|
877
|
+
for (let i = 0; i < numQueries; i++) {
|
|
878
|
+
const query = queries[i];
|
|
879
|
+
if (query.length !== this.dimension) {
|
|
880
|
+
throw new Error(`Query dimension ${query.length} does not match expected ${this.dimension}`);
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
// Reuse the searchKNN implementation but with shared buffers
|
|
884
|
+
results[i] = this.searchKNN(query, k, efSearch);
|
|
885
|
+
|
|
886
|
+
// Clear visited set for next query
|
|
887
|
+
this.clearVisited();
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
return results;
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
/**
|
|
894
|
+
* Optimized batch search that returns results in a flat structure for better performance.
|
|
895
|
+
* Useful when you need to process many queries quickly.
|
|
896
|
+
*
|
|
897
|
+
* @param queries Flat Float32Array containing all queries concatenated
|
|
898
|
+
* @param numQueries Number of queries in the array
|
|
899
|
+
* @param k Number of nearest neighbors to return per query
|
|
900
|
+
* @param efSearch Search effort parameter
|
|
901
|
+
* @returns Object with flat arrays for ids and distances
|
|
902
|
+
*/
|
|
903
|
+
searchKNNBatchFlat(
|
|
904
|
+
queries: Float32Array,
|
|
905
|
+
numQueries: number,
|
|
906
|
+
k: number,
|
|
907
|
+
efSearch?: number
|
|
908
|
+
): { ids: Uint32Array; distances: Float32Array } {
|
|
909
|
+
if (this.entryPointId === -1 || this.nodeCount === 0) {
|
|
910
|
+
return {
|
|
911
|
+
ids: new Uint32Array(numQueries * k),
|
|
912
|
+
distances: new Float32Array(numQueries * k).fill(Infinity)
|
|
913
|
+
};
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
const ids = new Uint32Array(numQueries * k);
|
|
917
|
+
const distances = new Float32Array(numQueries * k);
|
|
918
|
+
|
|
919
|
+
for (let q = 0; q < numQueries; q++) {
|
|
920
|
+
// Extract query vector
|
|
921
|
+
const queryStart = q * this.dimension;
|
|
922
|
+
const query = queries.subarray(queryStart, queryStart + this.dimension);
|
|
923
|
+
|
|
924
|
+
// Search
|
|
925
|
+
const results = this.searchKNN(query, k, efSearch);
|
|
926
|
+
|
|
927
|
+
// Copy results to output arrays
|
|
928
|
+
const resultStart = q * k;
|
|
929
|
+
for (let i = 0; i < k; i++) {
|
|
930
|
+
if (i < results.length) {
|
|
931
|
+
ids[resultStart + i] = results[i].id;
|
|
932
|
+
distances[resultStart + i] = results[i].distance;
|
|
933
|
+
} else {
|
|
934
|
+
ids[resultStart + i] = 0;
|
|
935
|
+
distances[resultStart + i] = Infinity;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
// Clear visited set for next query
|
|
940
|
+
this.clearVisited();
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
return { ids, distances };
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
// ============================================
|
|
947
|
+
// Convenience Methods
|
|
948
|
+
// ============================================
|
|
949
|
+
|
|
950
|
+
/**
|
|
951
|
+
* Add a single vector with auto-generated ID.
|
|
952
|
+
* Returns the assigned ID.
|
|
953
|
+
*
|
|
954
|
+
* @param vector Vector to add
|
|
955
|
+
* @returns The auto-generated ID
|
|
956
|
+
*
|
|
957
|
+
* @example
|
|
958
|
+
* ```typescript
|
|
959
|
+
* const id = await index.add([0.1, 0.2, 0.3]);
|
|
960
|
+
* console.log(`Added vector with ID: ${id}`);
|
|
961
|
+
* ```
|
|
962
|
+
*/
|
|
963
|
+
async add(vector: number[] | Float32Array): Promise<number> {
|
|
964
|
+
const id = this.nodeCount;
|
|
965
|
+
await this.addPoint(id, vector);
|
|
966
|
+
return id;
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
/**
|
|
970
|
+
* Simple query interface - find k nearest neighbors.
|
|
971
|
+
*
|
|
972
|
+
* @param vector Query vector
|
|
973
|
+
* @param k Number of results (default: 10)
|
|
974
|
+
* @returns Array of {id, distance} results
|
|
975
|
+
*
|
|
976
|
+
* @example
|
|
977
|
+
* ```typescript
|
|
978
|
+
* const results = index.query([0.1, 0.2, 0.3], 5);
|
|
979
|
+
* results.forEach(r => console.log(`ID: ${r.id}, Distance: ${r.distance}`));
|
|
980
|
+
* ```
|
|
981
|
+
*/
|
|
982
|
+
query(vector: number[] | Float32Array, k: number = 10): Array<{ id: number; distance: number }> {
|
|
983
|
+
const floatVector = Array.isArray(vector) ? new Float32Array(vector) : vector;
|
|
984
|
+
return this.searchKNN(floatVector, k);
|
|
985
|
+
}
|
|
986
|
+
|
|
987
|
+
/**
|
|
988
|
+
* Add multiple vectors with auto-generated IDs.
|
|
989
|
+
* Returns the assigned IDs.
|
|
990
|
+
*
|
|
991
|
+
* @param vectors Array of vectors to add
|
|
992
|
+
* @returns Array of auto-generated IDs
|
|
993
|
+
*
|
|
994
|
+
* @example
|
|
995
|
+
* ```typescript
|
|
996
|
+
* const ids = await index.addAll([[0.1, 0.2], [0.3, 0.4]]);
|
|
997
|
+
* ```
|
|
998
|
+
*/
|
|
999
|
+
async addAll(vectors: Array<number[] | Float32Array>): Promise<number[]> {
|
|
1000
|
+
// Use bulk construction mode for better performance
|
|
1001
|
+
const points = vectors.map((vector, i) => ({
|
|
1002
|
+
id: this.nodeCount + i,
|
|
1003
|
+
vector: vector instanceof Float32Array ? vector : new Float32Array(vector)
|
|
1004
|
+
}));
|
|
1005
|
+
await this.addPointsBulk(points);
|
|
1006
|
+
return points.map(p => p.id);
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
/**
|
|
1010
|
+
* Bulk add multiple points with optimized O(1) neighbor lookups.
|
|
1011
|
+
* Significantly faster than sequential addPoint() calls for large batches.
|
|
1012
|
+
* Uses Set-based membership testing during construction, then releases memory.
|
|
1013
|
+
*
|
|
1014
|
+
* @param points Array of {id, vector} to add
|
|
1015
|
+
* @example
|
|
1016
|
+
* ```typescript
|
|
1017
|
+
* await index.addPointsBulk([
|
|
1018
|
+
* { id: 0, vector: new Float32Array([0.1, 0.2, ...]) },
|
|
1019
|
+
* { id: 1, vector: new Float32Array([0.3, 0.4, ...]) },
|
|
1020
|
+
* ]);
|
|
1021
|
+
* ```
|
|
1022
|
+
*/
|
|
1023
|
+
async addPointsBulk(points: Array<{ id: number; vector: Float32Array }>, options?: { skipNormalization?: boolean }): Promise<void> {
|
|
1024
|
+
this.addPointsBulkSync(points, options);
|
|
1025
|
+
}
|
|
1026
|
+
|
|
1027
|
+
/**
|
|
1028
|
+
* Synchronous bulk insertion - 10-15x faster than async version
|
|
1029
|
+
* Uses addPointSync() internally to avoid microtask queue overhead
|
|
1030
|
+
* @param skipNormalization - Set true if vectors are already unit-normalized
|
|
1031
|
+
*/
|
|
1032
|
+
addPointsBulkSync(points: Array<{ id: number; vector: Float32Array }>, options?: { skipNormalization?: boolean }): void {
|
|
1033
|
+
if (points.length === 0) return;
|
|
1034
|
+
|
|
1035
|
+
// Enable construction mode for O(1) neighbor lookups
|
|
1036
|
+
this.constructionMode = true;
|
|
1037
|
+
this.neighborSets.clear();
|
|
1038
|
+
|
|
1039
|
+
try {
|
|
1040
|
+
for (const { id, vector } of points) {
|
|
1041
|
+
this.addPointSync(id, vector, options);
|
|
1042
|
+
}
|
|
1043
|
+
} finally {
|
|
1044
|
+
// Always cleanup, even on error
|
|
1045
|
+
this.constructionMode = false;
|
|
1046
|
+
this.neighborSets.clear(); // Release memory
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
/**
|
|
1051
|
+
* Clear construction-time data structures to free memory.
|
|
1052
|
+
* Called automatically after addPointsBulk(), but can be called
|
|
1053
|
+
* manually if needed.
|
|
1054
|
+
*/
|
|
1055
|
+
clearConstructionCache(): void {
|
|
1056
|
+
this.constructionMode = false;
|
|
1057
|
+
this.neighborSets.clear();
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
// Format version constants
|
|
1061
|
+
private static readonly MAGIC = 0x484E5357; // "HNSW" in ASCII (big-endian: 0x48='H', 0x4E='N', 0x53='S', 0x57='W')
|
|
1062
|
+
private static readonly FORMAT_VERSION = 3; // v3: vector offset index for lazy loading
|
|
1063
|
+
private static readonly HEADER_SIZE = 40; // 4 (magic) + 4 (version) + 28 (existing header) + 4 (vectorDataOffset)
|
|
1064
|
+
|
|
1065
|
+
/**
|
|
1066
|
+
* Get all nodes as an array (filters out undefined slots)
|
|
1067
|
+
*/
|
|
1068
|
+
private getNodesArray(): Node[] {
|
|
1069
|
+
const result: Node[] = [];
|
|
1070
|
+
for (let i = 0; i < this.nodeCount; i++) {
|
|
1071
|
+
const node = this.nodes[i];
|
|
1072
|
+
if (node) result.push(node);
|
|
1073
|
+
}
|
|
1074
|
+
return result;
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
serialize(): ArrayBuffer {
|
|
1078
|
+
// Format v3: Vectors stored separately at end with offset table for lazy loading
|
|
1079
|
+
const nodeCount = this.nodeCount;
|
|
1080
|
+
const nodesArray = this.getNodesArray();
|
|
1081
|
+
|
|
1082
|
+
// Build ID to index mapping first (needed for delta encoding)
|
|
1083
|
+
const idToIndex = new Map<number, number>();
|
|
1084
|
+
for (let i = 0; i < nodesArray.length; i++) {
|
|
1085
|
+
idToIndex.set(nodesArray[i].id, i);
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
// Pre-encode all neighbor lists with delta encoding
|
|
1089
|
+
const encodedNeighbors: Uint8Array[][] = [];
|
|
1090
|
+
let totalNeighborBytes = 0;
|
|
1091
|
+
|
|
1092
|
+
for (const node of nodesArray) {
|
|
1093
|
+
const nodeEncodings: Uint8Array[] = [];
|
|
1094
|
+
for (let l = 0; l <= node.level; l++) {
|
|
1095
|
+
const neighborIndices = node.neighbors[l].map(id => idToIndex.get(id) ?? 0);
|
|
1096
|
+
const encoded = deltaEncodeNeighbors(neighborIndices);
|
|
1097
|
+
nodeEncodings.push(encoded);
|
|
1098
|
+
totalNeighborBytes += encoded.length;
|
|
1099
|
+
}
|
|
1100
|
+
encodedNeighbors.push(nodeEncodings);
|
|
1101
|
+
}
|
|
1102
|
+
|
|
1103
|
+
// Calculate sizes for v3 format:
|
|
1104
|
+
// - Header: 40 bytes (includes vectorDataOffset)
|
|
1105
|
+
// - Node metadata: nodeCount * 8 (id + level)
|
|
1106
|
+
// - Neighbor metadata: sum of (level+1) * 8 per node
|
|
1107
|
+
// - Encoded neighbors: totalNeighborBytes
|
|
1108
|
+
// - Vector offset table: nodeCount * 4 (offset within vector section)
|
|
1109
|
+
// - Vectors: nodeCount * dimension * 4 (at end for lazy loading)
|
|
1110
|
+
let graphSize = HNSWIndex.HEADER_SIZE;
|
|
1111
|
+
graphSize += nodeCount * 8; // id + level per node
|
|
1112
|
+
|
|
1113
|
+
for (const node of nodesArray) {
|
|
1114
|
+
graphSize += (node.level + 1) * 8; // neighbor metadata per level
|
|
1115
|
+
}
|
|
1116
|
+
graphSize += totalNeighborBytes;
|
|
1117
|
+
graphSize += nodeCount * 4; // vector offset table
|
|
1118
|
+
|
|
1119
|
+
const vectorDataOffset = graphSize;
|
|
1120
|
+
const vectorDataSize = nodeCount * this.dimension * 4;
|
|
1121
|
+
const totalSize = graphSize + vectorDataSize;
|
|
1122
|
+
|
|
1123
|
+
const buffer = new ArrayBuffer(totalSize);
|
|
1124
|
+
const view = new DataView(buffer);
|
|
1125
|
+
const uint8Array = new Uint8Array(buffer);
|
|
1126
|
+
|
|
1127
|
+
let offset = 0;
|
|
1128
|
+
|
|
1129
|
+
// Write header with magic, version, and vectorDataOffset
|
|
1130
|
+
view.setUint32(offset, HNSWIndex.MAGIC, true); offset += 4;
|
|
1131
|
+
view.setUint32(offset, HNSWIndex.FORMAT_VERSION, true); offset += 4;
|
|
1132
|
+
view.setUint32(offset, this.dimension, true); offset += 4;
|
|
1133
|
+
const metricCode = this.metric === 'cosine' ? 0 : this.metric === 'euclidean' ? 1 : 2;
|
|
1134
|
+
view.setUint32(offset, metricCode, true); offset += 4;
|
|
1135
|
+
view.setUint32(offset, this.M, true); offset += 4;
|
|
1136
|
+
view.setUint32(offset, this.efConstruction, true); offset += 4;
|
|
1137
|
+
view.setUint32(offset, this.maxLevel, true); offset += 4;
|
|
1138
|
+
view.setUint32(offset, this.entryPointId, true); offset += 4;
|
|
1139
|
+
view.setUint32(offset, nodeCount, true); offset += 4;
|
|
1140
|
+
view.setUint32(offset, vectorDataOffset, true); offset += 4; // New in v3
|
|
1141
|
+
|
|
1142
|
+
// Write node metadata (without vectors)
|
|
1143
|
+
for (let i = 0; i < nodesArray.length; i++) {
|
|
1144
|
+
const node = nodesArray[i];
|
|
1145
|
+
view.setUint32(offset, node.id, true); offset += 4;
|
|
1146
|
+
view.setUint32(offset, node.level, true); offset += 4;
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
// Write neighbor metadata (counts and encoded sizes)
|
|
1150
|
+
for (let i = 0; i < nodesArray.length; i++) {
|
|
1151
|
+
const node = nodesArray[i];
|
|
1152
|
+
const nodeEncodings = encodedNeighbors[i];
|
|
1153
|
+
|
|
1154
|
+
for (let l = 0; l <= node.level; l++) {
|
|
1155
|
+
view.setUint32(offset, node.neighbors[l].length, true); offset += 4;
|
|
1156
|
+
view.setUint32(offset, nodeEncodings[l].length, true); offset += 4;
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
|
|
1160
|
+
// Write all encoded neighbor data
|
|
1161
|
+
for (let i = 0; i < nodesArray.length; i++) {
|
|
1162
|
+
const nodeEncodings = encodedNeighbors[i];
|
|
1163
|
+
for (const encoded of nodeEncodings) {
|
|
1164
|
+
uint8Array.set(encoded, offset);
|
|
1165
|
+
offset += encoded.length;
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
|
|
1169
|
+
// Write vector offset table (offset within vector section)
|
|
1170
|
+
for (let i = 0; i < nodesArray.length; i++) {
|
|
1171
|
+
view.setUint32(offset, i * this.dimension * 4, true); // Relative offset
|
|
1172
|
+
offset += 4;
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
// Write vectors at end (for lazy loading capability)
|
|
1176
|
+
for (let i = 0; i < nodesArray.length; i++) {
|
|
1177
|
+
const node = nodesArray[i];
|
|
1178
|
+
for (let j = 0; j < this.dimension; j++) {
|
|
1179
|
+
view.setFloat32(offset, node.vector[j], true);
|
|
1180
|
+
offset += 4;
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
return buffer;
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
/**
|
|
1188
|
+
* Deserialize an HNSW index from a buffer.
|
|
1189
|
+
*
|
|
1190
|
+
* @param buffer The serialized index buffer
|
|
1191
|
+
* @param options Optional loading options
|
|
1192
|
+
* - lazyLoadVectors: If true, don't load vectors immediately (v3+ only)
|
|
1193
|
+
*/
|
|
1194
|
+
static deserialize(buffer: ArrayBuffer, options?: { lazyLoadVectors?: boolean }): HNSWIndex {
|
|
1195
|
+
const view = new DataView(buffer);
|
|
1196
|
+
const uint8Array = new Uint8Array(buffer);
|
|
1197
|
+
const lazyLoad = options?.lazyLoadVectors ?? false;
|
|
1198
|
+
|
|
1199
|
+
let offset = 0;
|
|
1200
|
+
|
|
1201
|
+
// Check for magic header (new format v1+)
|
|
1202
|
+
const possibleMagic = view.getUint32(0, true);
|
|
1203
|
+
let formatVersion = 0;
|
|
1204
|
+
|
|
1205
|
+
if (possibleMagic === HNSWIndex.MAGIC) {
|
|
1206
|
+
offset += 4; // Skip magic
|
|
1207
|
+
formatVersion = view.getUint32(offset, true); offset += 4;
|
|
1208
|
+
|
|
1209
|
+
if (formatVersion > HNSWIndex.FORMAT_VERSION) {
|
|
1210
|
+
throw new Error(`Unsupported HNSW format version: ${formatVersion}. Maximum supported: ${HNSWIndex.FORMAT_VERSION}`);
|
|
1211
|
+
}
|
|
1212
|
+
} else {
|
|
1213
|
+
formatVersion = 0;
|
|
1214
|
+
offset = 0;
|
|
1215
|
+
}
|
|
1216
|
+
|
|
1217
|
+
// Read common header fields
|
|
1218
|
+
const dimension = view.getUint32(offset, true); offset += 4;
|
|
1219
|
+
const metricCode = view.getUint32(offset, true);
|
|
1220
|
+
const metric = metricCode === 0 ? 'cosine' : metricCode === 1 ? 'euclidean' : 'dot_product';
|
|
1221
|
+
offset += 4;
|
|
1222
|
+
const M = view.getUint32(offset, true); offset += 4;
|
|
1223
|
+
const efConstruction = view.getUint32(offset, true); offset += 4;
|
|
1224
|
+
const maxLevel = view.getInt32(offset, true); offset += 4;
|
|
1225
|
+
const entryPointId = view.getInt32(offset, true); offset += 4;
|
|
1226
|
+
const nodeCount = view.getUint32(offset, true); offset += 4;
|
|
1227
|
+
|
|
1228
|
+
// V3+ has vectorDataOffset in header
|
|
1229
|
+
let vectorDataOffset = 0;
|
|
1230
|
+
if (formatVersion >= 3) {
|
|
1231
|
+
vectorDataOffset = view.getUint32(offset, true); offset += 4;
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
const index = new HNSWIndex(dimension, metric, M, efConstruction);
|
|
1235
|
+
index.maxLevel = maxLevel;
|
|
1236
|
+
index.entryPointId = entryPointId;
|
|
1237
|
+
|
|
1238
|
+
const indexToId: number[] = new Array(nodeCount);
|
|
1239
|
+
|
|
1240
|
+
if (formatVersion >= 3) {
|
|
1241
|
+
// V3 format: vectors at end, supports lazy loading
|
|
1242
|
+
const nodeMetadata: Array<{ id: number; level: number }> = [];
|
|
1243
|
+
const neighborMetadata: Array<Array<{ count: number; encodedSize: number }>> = [];
|
|
1244
|
+
|
|
1245
|
+
// First pass: read node metadata (no vectors here)
|
|
1246
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1247
|
+
const id = view.getUint32(offset, true); offset += 4;
|
|
1248
|
+
const level = view.getUint32(offset, true); offset += 4;
|
|
1249
|
+
indexToId[i] = id;
|
|
1250
|
+
nodeMetadata.push({ id, level });
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
// Second pass: read neighbor metadata
|
|
1254
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1255
|
+
const level = nodeMetadata[i].level;
|
|
1256
|
+
const levelMeta: Array<{ count: number; encodedSize: number }> = [];
|
|
1257
|
+
|
|
1258
|
+
for (let l = 0; l <= level; l++) {
|
|
1259
|
+
const count = view.getUint32(offset, true); offset += 4;
|
|
1260
|
+
const encodedSize = view.getUint32(offset, true); offset += 4;
|
|
1261
|
+
levelMeta.push({ count, encodedSize });
|
|
1262
|
+
}
|
|
1263
|
+
|
|
1264
|
+
neighborMetadata.push(levelMeta);
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
// Third pass: read and decode neighbor data
|
|
1268
|
+
const nodeNeighbors: number[][][] = [];
|
|
1269
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1270
|
+
const level = nodeMetadata[i].level;
|
|
1271
|
+
const neighbors = new Array<number[]>(level + 1);
|
|
1272
|
+
|
|
1273
|
+
for (let l = 0; l <= level; l++) {
|
|
1274
|
+
const { count, encodedSize } = neighborMetadata[i][l];
|
|
1275
|
+
|
|
1276
|
+
if (count === 0 || encodedSize === 0) {
|
|
1277
|
+
neighbors[l] = [];
|
|
1278
|
+
} else {
|
|
1279
|
+
const encodedSlice = uint8Array.subarray(offset, offset + encodedSize);
|
|
1280
|
+
const neighborIndices = deltaDecodeNeighbors(encodedSlice, count);
|
|
1281
|
+
neighbors[l] = neighborIndices.map(idx =>
|
|
1282
|
+
idx >= 0 && idx < indexToId.length ? indexToId[idx] : 0
|
|
1283
|
+
);
|
|
1284
|
+
offset += encodedSize;
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
1287
|
+
|
|
1288
|
+
nodeNeighbors.push(neighbors);
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
// Read vector offset table
|
|
1292
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1293
|
+
const relativeOffset = view.getUint32(offset, true); offset += 4;
|
|
1294
|
+
const id = nodeMetadata[i].id;
|
|
1295
|
+
index.vectorOffsets.set(id, vectorDataOffset + relativeOffset);
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
// Create nodes and optionally load vectors
|
|
1299
|
+
if (lazyLoad) {
|
|
1300
|
+
// Lazy loading: store buffer, don't load vectors yet
|
|
1301
|
+
index.lazyLoadEnabled = true;
|
|
1302
|
+
index.vectorBuffer = buffer;
|
|
1303
|
+
|
|
1304
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1305
|
+
const { id, level } = nodeMetadata[i];
|
|
1306
|
+
// Create node with empty vector (will be loaded on demand)
|
|
1307
|
+
const node: Node = {
|
|
1308
|
+
id,
|
|
1309
|
+
level,
|
|
1310
|
+
vector: new Float32Array(dimension), // Placeholder
|
|
1311
|
+
neighbors: nodeNeighbors[i]
|
|
1312
|
+
};
|
|
1313
|
+
index.setNode(node);
|
|
1314
|
+
}
|
|
1315
|
+
} else {
|
|
1316
|
+
// Eager loading: load all vectors now
|
|
1317
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1318
|
+
const { id, level } = nodeMetadata[i];
|
|
1319
|
+
const vectorOffset = index.vectorOffsets.get(id)!;
|
|
1320
|
+
|
|
1321
|
+
const vector = new Float32Array(dimension);
|
|
1322
|
+
for (let j = 0; j < dimension; j++) {
|
|
1323
|
+
vector[j] = view.getFloat32(vectorOffset + j * 4, true);
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
const node: Node = { id, level, vector, neighbors: nodeNeighbors[i] };
|
|
1327
|
+
index.setNode(node);
|
|
1328
|
+
index.vectorsLoaded.add(id);
|
|
1329
|
+
}
|
|
1330
|
+
}
|
|
1331
|
+
} else if (formatVersion >= 2) {
|
|
1332
|
+
// V2 format: delta-encoded neighbor lists, vectors inline
|
|
1333
|
+
const nodeMetadata: Array<{ id: number; level: number; vector: Float32Array }> = [];
|
|
1334
|
+
const neighborMetadata: Array<Array<{ count: number; encodedSize: number }>> = [];
|
|
1335
|
+
|
|
1336
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1337
|
+
const id = view.getUint32(offset, true); offset += 4;
|
|
1338
|
+
const level = view.getUint32(offset, true); offset += 4;
|
|
1339
|
+
indexToId[i] = id;
|
|
1340
|
+
|
|
1341
|
+
const vector = new Float32Array(dimension);
|
|
1342
|
+
for (let j = 0; j < dimension; j++) {
|
|
1343
|
+
vector[j] = view.getFloat32(offset, true);
|
|
1344
|
+
offset += 4;
|
|
1345
|
+
}
|
|
1346
|
+
|
|
1347
|
+
nodeMetadata.push({ id, level, vector });
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1351
|
+
const level = nodeMetadata[i].level;
|
|
1352
|
+
const levelMeta: Array<{ count: number; encodedSize: number }> = [];
|
|
1353
|
+
|
|
1354
|
+
for (let l = 0; l <= level; l++) {
|
|
1355
|
+
const count = view.getUint32(offset, true); offset += 4;
|
|
1356
|
+
const encodedSize = view.getUint32(offset, true); offset += 4;
|
|
1357
|
+
levelMeta.push({ count, encodedSize });
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
neighborMetadata.push(levelMeta);
|
|
1361
|
+
}
|
|
1362
|
+
|
|
1363
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1364
|
+
const { id, level, vector } = nodeMetadata[i];
|
|
1365
|
+
const neighbors = new Array<number[]>(level + 1);
|
|
1366
|
+
|
|
1367
|
+
for (let l = 0; l <= level; l++) {
|
|
1368
|
+
const { count, encodedSize } = neighborMetadata[i][l];
|
|
1369
|
+
|
|
1370
|
+
if (count === 0 || encodedSize === 0) {
|
|
1371
|
+
neighbors[l] = [];
|
|
1372
|
+
} else {
|
|
1373
|
+
const encodedSlice = uint8Array.subarray(offset, offset + encodedSize);
|
|
1374
|
+
const neighborIndices = deltaDecodeNeighbors(encodedSlice, count);
|
|
1375
|
+
neighbors[l] = neighborIndices.map(idx =>
|
|
1376
|
+
idx >= 0 && idx < indexToId.length ? indexToId[idx] : 0
|
|
1377
|
+
);
|
|
1378
|
+
offset += encodedSize;
|
|
1379
|
+
}
|
|
1380
|
+
}
|
|
1381
|
+
|
|
1382
|
+
const node: Node = { id, level, vector, neighbors };
|
|
1383
|
+
index.setNode(node);
|
|
1384
|
+
}
|
|
1385
|
+
} else {
|
|
1386
|
+
// V0/V1 format: raw neighbor IDs
|
|
1387
|
+
for (let i = 0; i < nodeCount; i++) {
|
|
1388
|
+
const id = view.getUint32(offset, true); offset += 4;
|
|
1389
|
+
const level = view.getUint32(offset, true); offset += 4;
|
|
1390
|
+
indexToId[i] = id;
|
|
1391
|
+
|
|
1392
|
+
const vector = new Float32Array(dimension);
|
|
1393
|
+
for (let j = 0; j < dimension; j++) {
|
|
1394
|
+
vector[j] = view.getFloat32(offset, true);
|
|
1395
|
+
offset += 4;
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
const neighbors = new Array<number[]>(level + 1);
|
|
1399
|
+
for (let l = 0; l <= level; l++) {
|
|
1400
|
+
const neighborCount = view.getUint32(offset, true); offset += 4;
|
|
1401
|
+
neighbors[l] = new Array(neighborCount);
|
|
1402
|
+
}
|
|
1403
|
+
const node: Node = { id, level, vector, neighbors };
|
|
1404
|
+
|
|
1405
|
+
index.setNode(node);
|
|
1406
|
+
}
|
|
1407
|
+
|
|
1408
|
+
for (const node of index.nodes.values()) {
|
|
1409
|
+
if (!node) continue;
|
|
1410
|
+
for (let l = 0; l <= node.level; l++) {
|
|
1411
|
+
for (let j = 0; j < node.neighbors[l].length; j++) {
|
|
1412
|
+
const neighborIndex = view.getInt32(offset, true); offset += 4;
|
|
1413
|
+
if (neighborIndex >= 0 && neighborIndex < indexToId.length) {
|
|
1414
|
+
node.neighbors[l][j] = indexToId[neighborIndex];
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
}
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
return index;
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
/**
|
|
1425
|
+
* Load a specific vector on demand (for lazy-loaded indices).
|
|
1426
|
+
* Returns the vector if lazy loading is enabled, otherwise returns the already-loaded vector.
|
|
1427
|
+
*/
|
|
1428
|
+
loadVector(nodeId: number): Float32Array | null {
|
|
1429
|
+
const node = this.nodes[nodeId];
|
|
1430
|
+
if (!node) return null;
|
|
1431
|
+
|
|
1432
|
+
// If not lazy loading or already loaded, return existing vector
|
|
1433
|
+
if (!this.lazyLoadEnabled || this.vectorsLoaded.has(nodeId)) {
|
|
1434
|
+
return node.vector;
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
// Load vector from buffer
|
|
1438
|
+
if (!this.vectorBuffer) return null;
|
|
1439
|
+
|
|
1440
|
+
const vectorOffset = this.vectorOffsets.get(nodeId);
|
|
1441
|
+
if (vectorOffset === undefined) return null;
|
|
1442
|
+
|
|
1443
|
+
const view = new DataView(this.vectorBuffer);
|
|
1444
|
+
const vector = new Float32Array(this.dimension);
|
|
1445
|
+
for (let j = 0; j < this.dimension; j++) {
|
|
1446
|
+
vector[j] = view.getFloat32(vectorOffset + j * 4, true);
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1449
|
+
// Update node with loaded vector
|
|
1450
|
+
node.vector = vector;
|
|
1451
|
+
this.vectorsLoaded.add(nodeId);
|
|
1452
|
+
|
|
1453
|
+
// OPTIMIZATION: Also update flat vector storage for batch distance calculations
|
|
1454
|
+
this.setFlatVector(nodeId, vector);
|
|
1455
|
+
|
|
1456
|
+
return vector;
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
/**
|
|
1460
|
+
* Preload vectors for specific node IDs.
|
|
1461
|
+
* Useful for warming up cache before searches.
|
|
1462
|
+
*/
|
|
1463
|
+
preloadVectors(nodeIds: number[]): void {
|
|
1464
|
+
if (!this.lazyLoadEnabled) return;
|
|
1465
|
+
|
|
1466
|
+
for (const nodeId of nodeIds) {
|
|
1467
|
+
this.loadVector(nodeId);
|
|
1468
|
+
}
|
|
1469
|
+
}
|
|
1470
|
+
|
|
1471
|
+
/**
|
|
1472
|
+
* Check if lazy loading is enabled
|
|
1473
|
+
*/
|
|
1474
|
+
isLazyLoadEnabled(): boolean {
|
|
1475
|
+
return this.lazyLoadEnabled;
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1478
|
+
/**
|
|
1479
|
+
* Get lazy loading statistics
|
|
1480
|
+
*/
|
|
1481
|
+
getLazyLoadStats(): { enabled: boolean; totalNodes: number; loadedVectors: number; memoryReduction: string } {
|
|
1482
|
+
const totalNodes = this.nodeCount;
|
|
1483
|
+
const loadedVectors = this.vectorsLoaded.size;
|
|
1484
|
+
|
|
1485
|
+
if (!this.lazyLoadEnabled) {
|
|
1486
|
+
return {
|
|
1487
|
+
enabled: false,
|
|
1488
|
+
totalNodes,
|
|
1489
|
+
loadedVectors: totalNodes,
|
|
1490
|
+
memoryReduction: '0%'
|
|
1491
|
+
};
|
|
1492
|
+
}
|
|
1493
|
+
|
|
1494
|
+
const reduction = totalNodes > 0 ? ((1 - loadedVectors / totalNodes) * 100).toFixed(1) : '0';
|
|
1495
|
+
return {
|
|
1496
|
+
enabled: true,
|
|
1497
|
+
totalNodes,
|
|
1498
|
+
loadedVectors,
|
|
1499
|
+
memoryReduction: `${reduction}%`
|
|
1500
|
+
};
|
|
1501
|
+
}
|
|
1502
|
+
|
|
1503
|
+
// Save to binary file using Bun APIs
|
|
1504
|
+
async saveToFile(filePath: string): Promise<void> {
|
|
1505
|
+
const buffer = this.serialize();
|
|
1506
|
+
await Bun.write(filePath, buffer);
|
|
1507
|
+
}
|
|
1508
|
+
|
|
1509
|
+
// Load from binary file using Bun APIs
|
|
1510
|
+
static async loadFromFile(filePath: string): Promise<HNSWIndex> {
|
|
1511
|
+
const file = Bun.file(filePath);
|
|
1512
|
+
const buffer = await file.arrayBuffer();
|
|
1513
|
+
return HNSWIndex.deserialize(buffer);
|
|
1514
|
+
}
|
|
1515
|
+
|
|
1516
|
+
// Clean up resources
|
|
1517
|
+
destroy(): void {
|
|
1518
|
+
// Clear all nodes to free memory
|
|
1519
|
+
this.nodes = [];
|
|
1520
|
+
this.nodeCount = 0;
|
|
1521
|
+
this.flatVectors = new Float32Array(0);
|
|
1522
|
+
this.flatVectorsCapacity = 0;
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
/**
|
|
1526
|
+
* Get memory usage statistics
|
|
1527
|
+
*/
|
|
1528
|
+
getMemoryUsage(): number {
|
|
1529
|
+
// Calculate approximate memory usage in bytes
|
|
1530
|
+
let totalBytes = 0;
|
|
1531
|
+
|
|
1532
|
+
// Node objects
|
|
1533
|
+
for (let i = 0; i < this.nodeCount; i++) {
|
|
1534
|
+
const node = this.nodes[i];
|
|
1535
|
+
if (!node) continue;
|
|
1536
|
+
// Node structure: id (4 bytes), level (4 bytes), vector (4*dimension), neighbors array overhead
|
|
1537
|
+
totalBytes += 8; // id + level
|
|
1538
|
+
totalBytes += node.vector.length * 4; // vector data
|
|
1539
|
+
totalBytes += 24; // neighbors array overhead (rough estimate)
|
|
1540
|
+
|
|
1541
|
+
// Neighbor connections
|
|
1542
|
+
for (const neighborList of node.neighbors) {
|
|
1543
|
+
totalBytes += neighborList.length * 4; // neighbor IDs
|
|
1544
|
+
totalBytes += 16; // array overhead per level
|
|
1545
|
+
}
|
|
1546
|
+
}
|
|
1547
|
+
|
|
1548
|
+
// Flat vector storage
|
|
1549
|
+
totalBytes += this.flatVectors.byteLength;
|
|
1550
|
+
|
|
1551
|
+
// Array overhead
|
|
1552
|
+
totalBytes += this.nodeCount * 8; // Array entry overhead (rough estimate)
|
|
1553
|
+
|
|
1554
|
+
// Object overhead
|
|
1555
|
+
totalBytes += 1024; // Base object overhead
|
|
1556
|
+
|
|
1557
|
+
return totalBytes;
|
|
1558
|
+
}
|
|
1559
|
+
|
|
1560
|
+
/**
|
|
1561
|
+
* Get all vectors for brute-force search
|
|
1562
|
+
*/
|
|
1563
|
+
getAllVectors(): Map<number, Float32Array> {
|
|
1564
|
+
const result = new Map<number, Float32Array>();
|
|
1565
|
+
for (let i = 0; i < this.nodeCount; i++) {
|
|
1566
|
+
const node = this.nodes[i];
|
|
1567
|
+
if (node) result.set(node.id, node.vector);
|
|
1568
|
+
}
|
|
1569
|
+
return result;
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
// ============================================
|
|
1573
|
+
// Quantized Search (Int8 with automatic rescoring)
|
|
1574
|
+
// ============================================
|
|
1575
|
+
|
|
1576
|
+
/**
|
|
1577
|
+
* Enable Int8 quantization for faster search with automatic rescoring.
|
|
1578
|
+
* Trains the quantizer on existing vectors and quantizes them.
|
|
1579
|
+
*
|
|
1580
|
+
* Performance:
|
|
1581
|
+
* - 4x memory reduction
|
|
1582
|
+
* - 3-4x faster distance calculations
|
|
1583
|
+
* - 99%+ recall with automatic rescoring
|
|
1584
|
+
*
|
|
1585
|
+
* @example
|
|
1586
|
+
* ```typescript
|
|
1587
|
+
* // After adding vectors
|
|
1588
|
+
* index.enableQuantization();
|
|
1589
|
+
*
|
|
1590
|
+
* // Now use quantized search (automatically rescores for high recall)
|
|
1591
|
+
* const results = index.searchKNNQuantized(query, 10);
|
|
1592
|
+
* ```
|
|
1593
|
+
*/
|
|
1594
|
+
enableQuantization(): void {
|
|
1595
|
+
if (this.nodeCount === 0) {
|
|
1596
|
+
throw new Error('Cannot enable quantization on empty index. Add vectors first.');
|
|
1597
|
+
}
|
|
1598
|
+
|
|
1599
|
+
// Collect all vectors for training
|
|
1600
|
+
const vectors: Float32Array[] = [];
|
|
1601
|
+
for (let i = 0; i < this.nodeCount; i++) {
|
|
1602
|
+
const node = this.nodes[i];
|
|
1603
|
+
if (node) vectors.push(node.vector);
|
|
1604
|
+
}
|
|
1605
|
+
|
|
1606
|
+
// Initialize and train scalar (int8) quantizer
|
|
1607
|
+
this.scalarQuantizer = new ScalarQuantizer(this.dimension);
|
|
1608
|
+
this.scalarQuantizer.train(vectors);
|
|
1609
|
+
|
|
1610
|
+
// Quantize all existing vectors - use array instead of Map
|
|
1611
|
+
this.int8Vectors = new Array(this.nodeCount);
|
|
1612
|
+
for (let i = 0; i < this.nodeCount; i++) {
|
|
1613
|
+
const node = this.nodes[i];
|
|
1614
|
+
if (node) {
|
|
1615
|
+
this.int8Vectors[node.id] = this.scalarQuantizer.quantize(node.vector);
|
|
1616
|
+
}
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
this.quantizationEnabled = true;
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
/**
|
|
1623
|
+
* Check if quantization is enabled
|
|
1624
|
+
*/
|
|
1625
|
+
isQuantizationEnabled(): boolean {
|
|
1626
|
+
return this.quantizationEnabled;
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1629
|
+
/**
|
|
1630
|
+
* Fast quantized search with automatic rescoring.
|
|
1631
|
+
*
|
|
1632
|
+
* Uses Int8 quantized vectors for initial candidate retrieval (3-4x faster),
|
|
1633
|
+
* then rescores top candidates with float32 for accurate ranking.
|
|
1634
|
+
*
|
|
1635
|
+
* @param query Query vector
|
|
1636
|
+
* @param k Number of results to return
|
|
1637
|
+
* @param candidateMultiplier How many extra candidates to retrieve for rescoring (default: 3)
|
|
1638
|
+
* @param efSearch Search effort parameter
|
|
1639
|
+
* @returns Array of {id, distance} results (same format as searchKNN)
|
|
1640
|
+
*
|
|
1641
|
+
* Performance:
|
|
1642
|
+
* - 3-4x faster than float32 for distance calculations
|
|
1643
|
+
* - 99%+ recall with candidateMultiplier=3 (automatic rescoring)
|
|
1644
|
+
* - 4x memory reduction
|
|
1645
|
+
*/
|
|
1646
|
+
searchKNNQuantized(
|
|
1647
|
+
query: Float32Array,
|
|
1648
|
+
k: number,
|
|
1649
|
+
candidateMultiplier: number = 3,
|
|
1650
|
+
efSearch?: number
|
|
1651
|
+
): Array<{ id: number; distance: number }> {
|
|
1652
|
+
// Fallback to standard search if quantization not enabled
|
|
1653
|
+
if (!this.quantizationEnabled) {
|
|
1654
|
+
return this.searchKNN(query, k, efSearch);
|
|
1655
|
+
}
|
|
1656
|
+
|
|
1657
|
+
if (this.entryPointId === -1 || this.nodeCount === 0) {
|
|
1658
|
+
return [];
|
|
1659
|
+
}
|
|
1660
|
+
|
|
1661
|
+
// Normalize query if needed - reuse buffer for efficiency
|
|
1662
|
+
let normalizedQuery = query;
|
|
1663
|
+
if (this.vectorsAreNormalized) {
|
|
1664
|
+
this.queryNormBuffer.set(query);
|
|
1665
|
+
normalizedQuery = this.normalizeVector(this.queryNormBuffer);
|
|
1666
|
+
}
|
|
1667
|
+
|
|
1668
|
+
// Get more candidates than needed for rescoring
|
|
1669
|
+
const numCandidates = k * candidateMultiplier;
|
|
1670
|
+
const effectiveEf = efSearch || Math.max(numCandidates * 2, 50);
|
|
1671
|
+
|
|
1672
|
+
// Phase 1: Fast HNSW navigation using float32 (only for graph traversal)
|
|
1673
|
+
let currentEntryPoint = this.nodes[this.entryPointId]!;
|
|
1674
|
+
const entryVector = this.getNodeVector(this.entryPointId);
|
|
1675
|
+
if (!entryVector) return [];
|
|
1676
|
+
let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(normalizedQuery, entryVector) };
|
|
1677
|
+
|
|
1678
|
+
for (let l = this.maxLevel; l > 0; l--) {
|
|
1679
|
+
const result = this.greedySearch(normalizedQuery, currentEntryPoint, l);
|
|
1680
|
+
if (result.distance < currentBest.distance) {
|
|
1681
|
+
currentBest = result;
|
|
1682
|
+
currentEntryPoint = this.nodes[currentBest.id]!;
|
|
1683
|
+
}
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
// Phase 2: Search layer 0 with quantized distance for speed
|
|
1687
|
+
const candidates = this.searchLayerQuantized(normalizedQuery, currentBest, 0, effectiveEf);
|
|
1688
|
+
|
|
1689
|
+
// Phase 3: Rescore top candidates with float32 for accuracy
|
|
1690
|
+
// Pre-allocate rescored array
|
|
1691
|
+
const rescoreCount = Math.min(candidates.length, numCandidates);
|
|
1692
|
+
const rescored = new Array<{ id: number; distance: number }>(rescoreCount);
|
|
1693
|
+
|
|
1694
|
+
for (let i = 0; i < rescoreCount; i++) {
|
|
1695
|
+
const c = candidates[i];
|
|
1696
|
+
const nodeVector = this.getNodeVector(c.id);
|
|
1697
|
+
if (nodeVector) {
|
|
1698
|
+
rescored[i] = { id: c.id, distance: this.calculateDistance(normalizedQuery, nodeVector) };
|
|
1699
|
+
} else {
|
|
1700
|
+
rescored[i] = c; // Keep original if node not found
|
|
1701
|
+
}
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
// Sort by accurate distance
|
|
1705
|
+
rescored.sort((a, b) => a.distance - b.distance);
|
|
1706
|
+
|
|
1707
|
+
// Return top k
|
|
1708
|
+
if (rescored.length > k) rescored.length = k;
|
|
1709
|
+
return rescored;
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
/**
|
|
1713
|
+
* Search layer using Int8 quantized distances for speed.
|
|
1714
|
+
* Same algorithm as searchLayer but uses faster Int8 distance calculations.
|
|
1715
|
+
*/
|
|
1716
|
+
private searchLayerQuantized(
|
|
1717
|
+
query: Float32Array,
|
|
1718
|
+
nearest: { id: number; distance: number },
|
|
1719
|
+
layer: number,
|
|
1720
|
+
ef: number
|
|
1721
|
+
): Array<{ id: number; distance: number }> {
|
|
1722
|
+
// Quantize query once
|
|
1723
|
+
const int8Query = this.scalarQuantizer ? this.scalarQuantizer.quantize(query) : null;
|
|
1724
|
+
|
|
1725
|
+
// Clear visited tracking
|
|
1726
|
+
this.clearVisited();
|
|
1727
|
+
|
|
1728
|
+
// Ensure heaps are large enough, then clear and reuse
|
|
1729
|
+
this.ensureHeapCapacity(ef);
|
|
1730
|
+
this.candidatesHeap.clear();
|
|
1731
|
+
this.resultsHeap.clear();
|
|
1732
|
+
|
|
1733
|
+
// Initialize with entry point
|
|
1734
|
+
this.markVisited(nearest.id);
|
|
1735
|
+
this.candidatesHeap.push(nearest.id, nearest.distance);
|
|
1736
|
+
this.resultsHeap.push(nearest.id, nearest.distance);
|
|
1737
|
+
|
|
1738
|
+
let furthestResultDist = nearest.distance;
|
|
1739
|
+
|
|
1740
|
+
while (!this.candidatesHeap.isEmpty()) {
|
|
1741
|
+
const closestCandidateDist = this.candidatesHeap.peekValue();
|
|
1742
|
+
const closestCandidateId = this.candidatesHeap.pop();
|
|
1743
|
+
|
|
1744
|
+
if (closestCandidateId === -1) continue;
|
|
1745
|
+
|
|
1746
|
+
// TERMINATION
|
|
1747
|
+
if (this.resultsHeap.size() >= ef && closestCandidateDist > furthestResultDist) {
|
|
1748
|
+
break;
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
const node = this.nodes[closestCandidateId];
|
|
1752
|
+
if (!node) continue;
|
|
1753
|
+
|
|
1754
|
+
const neighbors = node.neighbors[layer] || [];
|
|
1755
|
+
|
|
1756
|
+
for (const neighborId of neighbors) {
|
|
1757
|
+
if (this.isVisited(neighborId)) continue;
|
|
1758
|
+
this.markVisited(neighborId);
|
|
1759
|
+
|
|
1760
|
+
// Use Int8 quantized distance for speed
|
|
1761
|
+
let distance: number;
|
|
1762
|
+
if (int8Query) {
|
|
1763
|
+
const neighborInt8 = this.int8Vectors[neighborId];
|
|
1764
|
+
if (neighborInt8) {
|
|
1765
|
+
// Use appropriate int8 distance based on metric
|
|
1766
|
+
if (this.metric === 'cosine') {
|
|
1767
|
+
distance = cosineDistanceInt8(int8Query, neighborInt8);
|
|
1768
|
+
} else {
|
|
1769
|
+
distance = l2SquaredInt8(int8Query, neighborInt8);
|
|
1770
|
+
}
|
|
1771
|
+
} else {
|
|
1772
|
+
// Fallback to float32
|
|
1773
|
+
const neighborNode = this.nodes[neighborId];
|
|
1774
|
+
if (!neighborNode) continue;
|
|
1775
|
+
distance = this.calculateDistance(query, neighborNode.vector);
|
|
1776
|
+
}
|
|
1777
|
+
} else {
|
|
1778
|
+
// Fallback to float32
|
|
1779
|
+
const neighborNode = this.nodes[neighborId];
|
|
1780
|
+
if (!neighborNode) continue;
|
|
1781
|
+
distance = this.calculateDistance(query, neighborNode.vector);
|
|
1782
|
+
}
|
|
1783
|
+
|
|
1784
|
+
if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
|
|
1785
|
+
this.candidatesHeap.push(neighborId, distance);
|
|
1786
|
+
this.resultsHeap.push(neighborId, distance);
|
|
1787
|
+
|
|
1788
|
+
if (this.resultsHeap.size() > ef) {
|
|
1789
|
+
this.resultsHeap.pop();
|
|
1790
|
+
}
|
|
1791
|
+
furthestResultDist = this.resultsHeap.peekValue();
|
|
1792
|
+
}
|
|
1793
|
+
}
|
|
1794
|
+
}
|
|
1795
|
+
|
|
1796
|
+
// Extract results from max-heap
|
|
1797
|
+
const resultCount = this.resultsHeap.size();
|
|
1798
|
+
const results: Array<{ id: number; distance: number }> = new Array(resultCount);
|
|
1799
|
+
let idx = resultCount - 1;
|
|
1800
|
+
while (!this.resultsHeap.isEmpty()) {
|
|
1801
|
+
const dist = this.resultsHeap.peekValue();
|
|
1802
|
+
const id = this.resultsHeap.pop();
|
|
1803
|
+
results[idx--] = { id, distance: dist };
|
|
1804
|
+
}
|
|
1805
|
+
|
|
1806
|
+
return results;
|
|
1807
|
+
}
|
|
1808
|
+
|
|
1809
|
+
/**
|
|
1810
|
+
* Get quantization statistics
|
|
1811
|
+
*/
|
|
1812
|
+
getQuantizationStats(): {
|
|
1813
|
+
enabled: boolean;
|
|
1814
|
+
vectorCount: number;
|
|
1815
|
+
memoryReduction: string;
|
|
1816
|
+
expectedSpeedup: string;
|
|
1817
|
+
} {
|
|
1818
|
+
const vectorCount = this.nodeCount;
|
|
1819
|
+
const float32Size = vectorCount * this.dimension * 4;
|
|
1820
|
+
|
|
1821
|
+
if (this.quantizationEnabled) {
|
|
1822
|
+
const int8Size = vectorCount * this.dimension;
|
|
1823
|
+
const reduction = (float32Size / int8Size).toFixed(1);
|
|
1824
|
+
return {
|
|
1825
|
+
enabled: true,
|
|
1826
|
+
vectorCount,
|
|
1827
|
+
memoryReduction: `${reduction}x (${(float32Size / 1024 / 1024).toFixed(1)}MB → ${(int8Size / 1024 / 1024).toFixed(1)}MB)`,
|
|
1828
|
+
expectedSpeedup: '3-4x for distance calculations'
|
|
1829
|
+
};
|
|
1830
|
+
}
|
|
1831
|
+
|
|
1832
|
+
return {
|
|
1833
|
+
enabled: false,
|
|
1834
|
+
vectorCount,
|
|
1835
|
+
memoryReduction: '1x (no quantization)',
|
|
1836
|
+
expectedSpeedup: '1x (baseline)'
|
|
1837
|
+
};
|
|
1838
|
+
}
|
|
1839
|
+
}
|