verso-db 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/LICENSE +21 -0
- package/README.md +252 -0
- package/dist/BinaryHeap.d.ts +25 -0
- package/dist/BinaryHeap.d.ts.map +1 -0
- package/dist/Collection.d.ts +156 -0
- package/dist/Collection.d.ts.map +1 -0
- package/dist/HNSWIndex.d.ts +357 -0
- package/dist/HNSWIndex.d.ts.map +1 -0
- package/dist/MaxBinaryHeap.d.ts +63 -0
- package/dist/MaxBinaryHeap.d.ts.map +1 -0
- package/dist/Storage.d.ts +54 -0
- package/dist/Storage.d.ts.map +1 -0
- package/dist/VectorDB.d.ts +44 -0
- package/dist/VectorDB.d.ts.map +1 -0
- package/dist/backends/DistanceBackend.d.ts +5 -0
- package/dist/backends/DistanceBackend.d.ts.map +1 -0
- package/dist/backends/JsDistanceBackend.d.ts +37 -0
- package/dist/backends/JsDistanceBackend.d.ts.map +1 -0
- package/dist/encoding/DeltaEncoder.d.ts +61 -0
- package/dist/encoding/DeltaEncoder.d.ts.map +1 -0
- package/dist/errors.d.ts +58 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/index.d.ts +64 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +3732 -0
- package/dist/presets.d.ts +91 -0
- package/dist/presets.d.ts.map +1 -0
- package/dist/quantization/ScalarQuantizer.d.ts +114 -0
- package/dist/quantization/ScalarQuantizer.d.ts.map +1 -0
- package/dist/storage/BatchWriter.d.ts +104 -0
- package/dist/storage/BatchWriter.d.ts.map +1 -0
- package/dist/storage/BunStorageBackend.d.ts +58 -0
- package/dist/storage/BunStorageBackend.d.ts.map +1 -0
- package/dist/storage/MemoryBackend.d.ts +44 -0
- package/dist/storage/MemoryBackend.d.ts.map +1 -0
- package/dist/storage/OPFSBackend.d.ts +59 -0
- package/dist/storage/OPFSBackend.d.ts.map +1 -0
- package/dist/storage/StorageBackend.d.ts +66 -0
- package/dist/storage/StorageBackend.d.ts.map +1 -0
- package/dist/storage/WriteAheadLog.d.ts +111 -0
- package/dist/storage/WriteAheadLog.d.ts.map +1 -0
- package/dist/storage/createStorageBackend.d.ts +40 -0
- package/dist/storage/createStorageBackend.d.ts.map +1 -0
- package/dist/storage/index.d.ts +30 -0
- package/dist/storage/index.d.ts.map +1 -0
- package/package.json +98 -0
- package/src/BinaryHeap.ts +131 -0
- package/src/Collection.ts +695 -0
- package/src/HNSWIndex.ts +1839 -0
- package/src/MaxBinaryHeap.ts +175 -0
- package/src/Storage.ts +435 -0
- package/src/VectorDB.ts +109 -0
- package/src/backends/DistanceBackend.ts +17 -0
- package/src/backends/JsDistanceBackend.ts +227 -0
- package/src/encoding/DeltaEncoder.ts +217 -0
- package/src/errors.ts +110 -0
- package/src/index.ts +138 -0
- package/src/presets.ts +229 -0
- package/src/quantization/ScalarQuantizer.ts +383 -0
- package/src/storage/BatchWriter.ts +336 -0
- package/src/storage/BunStorageBackend.ts +161 -0
- package/src/storage/MemoryBackend.ts +120 -0
- package/src/storage/OPFSBackend.ts +250 -0
- package/src/storage/StorageBackend.ts +74 -0
- package/src/storage/WriteAheadLog.ts +326 -0
- package/src/storage/createStorageBackend.ts +137 -0
- package/src/storage/index.ts +53 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,3732 @@
|
|
|
1
|
+
// src/backends/JsDistanceBackend.ts
|
|
2
|
+
class JsDistanceBackend {
|
|
3
|
+
batchL2(base, dim, ids, query, out) {
|
|
4
|
+
const len = ids.length;
|
|
5
|
+
for (let i = 0;i < len; i++) {
|
|
6
|
+
const off = ids[i] * dim;
|
|
7
|
+
out[i] = this.l2SquaredUnrolled(base, off, query, dim);
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
batchDot(base, dim, ids, query, out) {
|
|
11
|
+
const len = ids.length;
|
|
12
|
+
for (let i = 0;i < len; i++) {
|
|
13
|
+
const off = ids[i] * dim;
|
|
14
|
+
out[i] = this.dotProductUnrolled(base, off, query, dim);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
dotProductUnrolled(base, offset, query, dim) {
|
|
18
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
19
|
+
let d = 0;
|
|
20
|
+
const limit = dim - 3;
|
|
21
|
+
for (;d < limit; d += 4) {
|
|
22
|
+
sum0 += base[offset + d] * query[d];
|
|
23
|
+
sum1 += base[offset + d + 1] * query[d + 1];
|
|
24
|
+
sum2 += base[offset + d + 2] * query[d + 2];
|
|
25
|
+
sum3 += base[offset + d + 3] * query[d + 3];
|
|
26
|
+
}
|
|
27
|
+
for (;d < dim; d++) {
|
|
28
|
+
sum0 += base[offset + d] * query[d];
|
|
29
|
+
}
|
|
30
|
+
return sum0 + sum1 + sum2 + sum3;
|
|
31
|
+
}
|
|
32
|
+
l2SquaredUnrolled(base, offset, query, dim) {
|
|
33
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
34
|
+
let d = 0;
|
|
35
|
+
const limit = dim - 3;
|
|
36
|
+
for (;d < limit; d += 4) {
|
|
37
|
+
const diff0 = base[offset + d] - query[d];
|
|
38
|
+
const diff1 = base[offset + d + 1] - query[d + 1];
|
|
39
|
+
const diff2 = base[offset + d + 2] - query[d + 2];
|
|
40
|
+
const diff3 = base[offset + d + 3] - query[d + 3];
|
|
41
|
+
sum0 += diff0 * diff0;
|
|
42
|
+
sum1 += diff1 * diff1;
|
|
43
|
+
sum2 += diff2 * diff2;
|
|
44
|
+
sum3 += diff3 * diff3;
|
|
45
|
+
}
|
|
46
|
+
for (;d < dim; d++) {
|
|
47
|
+
const diff = base[offset + d] - query[d];
|
|
48
|
+
sum0 += diff * diff;
|
|
49
|
+
}
|
|
50
|
+
return sum0 + sum1 + sum2 + sum3;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function dotProductFast(a, b) {
|
|
54
|
+
const len = a.length;
|
|
55
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
56
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
57
|
+
let i = 0;
|
|
58
|
+
const limit8 = len - 7;
|
|
59
|
+
for (;i < limit8; i += 8) {
|
|
60
|
+
sum0 += a[i] * b[i];
|
|
61
|
+
sum1 += a[i + 1] * b[i + 1];
|
|
62
|
+
sum2 += a[i + 2] * b[i + 2];
|
|
63
|
+
sum3 += a[i + 3] * b[i + 3];
|
|
64
|
+
sum4 += a[i + 4] * b[i + 4];
|
|
65
|
+
sum5 += a[i + 5] * b[i + 5];
|
|
66
|
+
sum6 += a[i + 6] * b[i + 6];
|
|
67
|
+
sum7 += a[i + 7] * b[i + 7];
|
|
68
|
+
}
|
|
69
|
+
for (;i < len; i++) {
|
|
70
|
+
sum0 += a[i] * b[i];
|
|
71
|
+
}
|
|
72
|
+
return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
|
|
73
|
+
}
|
|
74
|
+
function l2SquaredFast(a, b) {
|
|
75
|
+
const len = a.length;
|
|
76
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
77
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
78
|
+
let i = 0;
|
|
79
|
+
const limit8 = len - 7;
|
|
80
|
+
for (;i < limit8; i += 8) {
|
|
81
|
+
const d0 = a[i] - b[i];
|
|
82
|
+
const d1 = a[i + 1] - b[i + 1];
|
|
83
|
+
const d2 = a[i + 2] - b[i + 2];
|
|
84
|
+
const d3 = a[i + 3] - b[i + 3];
|
|
85
|
+
const d4 = a[i + 4] - b[i + 4];
|
|
86
|
+
const d5 = a[i + 5] - b[i + 5];
|
|
87
|
+
const d6 = a[i + 6] - b[i + 6];
|
|
88
|
+
const d7 = a[i + 7] - b[i + 7];
|
|
89
|
+
sum0 += d0 * d0;
|
|
90
|
+
sum1 += d1 * d1;
|
|
91
|
+
sum2 += d2 * d2;
|
|
92
|
+
sum3 += d3 * d3;
|
|
93
|
+
sum4 += d4 * d4;
|
|
94
|
+
sum5 += d5 * d5;
|
|
95
|
+
sum6 += d6 * d6;
|
|
96
|
+
sum7 += d7 * d7;
|
|
97
|
+
}
|
|
98
|
+
for (;i < len; i++) {
|
|
99
|
+
const d = a[i] - b[i];
|
|
100
|
+
sum0 += d * d;
|
|
101
|
+
}
|
|
102
|
+
return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
|
|
103
|
+
}
|
|
104
|
+
function normalizeInPlace(v) {
|
|
105
|
+
const len = v.length;
|
|
106
|
+
let s0 = 0, s1 = 0, s2 = 0, s3 = 0;
|
|
107
|
+
let s4 = 0, s5 = 0, s6 = 0, s7 = 0;
|
|
108
|
+
let i = 0;
|
|
109
|
+
const limit8 = len - 7;
|
|
110
|
+
for (;i < limit8; i += 8) {
|
|
111
|
+
s0 += v[i] * v[i];
|
|
112
|
+
s1 += v[i + 1] * v[i + 1];
|
|
113
|
+
s2 += v[i + 2] * v[i + 2];
|
|
114
|
+
s3 += v[i + 3] * v[i + 3];
|
|
115
|
+
s4 += v[i + 4] * v[i + 4];
|
|
116
|
+
s5 += v[i + 5] * v[i + 5];
|
|
117
|
+
s6 += v[i + 6] * v[i + 6];
|
|
118
|
+
s7 += v[i + 7] * v[i + 7];
|
|
119
|
+
}
|
|
120
|
+
for (;i < len; i++) {
|
|
121
|
+
s0 += v[i] * v[i];
|
|
122
|
+
}
|
|
123
|
+
const norm = Math.sqrt(s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7);
|
|
124
|
+
if (norm > 0) {
|
|
125
|
+
const invNorm = 1 / norm;
|
|
126
|
+
for (let j = 0;j < len; j++) {
|
|
127
|
+
v[j] *= invNorm;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return norm;
|
|
131
|
+
}
|
|
132
|
+
function cosineDistanceFast(a, b, aIsNormalized = false, bIsNormalized = false) {
|
|
133
|
+
const dot = dotProductFast(a, b);
|
|
134
|
+
if (aIsNormalized && bIsNormalized) {
|
|
135
|
+
const dist2 = 1 - dot;
|
|
136
|
+
return dist2 < 0.0000000001 ? 0 : dist2;
|
|
137
|
+
}
|
|
138
|
+
const len = a.length;
|
|
139
|
+
let nA0 = 0, nA1 = 0, nA2 = 0, nA3 = 0;
|
|
140
|
+
let nB0 = 0, nB1 = 0, nB2 = 0, nB3 = 0;
|
|
141
|
+
let i = 0;
|
|
142
|
+
const limit8 = len - 7;
|
|
143
|
+
for (;i < limit8; i += 8) {
|
|
144
|
+
nA0 += a[i] * a[i] + a[i + 4] * a[i + 4];
|
|
145
|
+
nA1 += a[i + 1] * a[i + 1] + a[i + 5] * a[i + 5];
|
|
146
|
+
nA2 += a[i + 2] * a[i + 2] + a[i + 6] * a[i + 6];
|
|
147
|
+
nA3 += a[i + 3] * a[i + 3] + a[i + 7] * a[i + 7];
|
|
148
|
+
nB0 += b[i] * b[i] + b[i + 4] * b[i + 4];
|
|
149
|
+
nB1 += b[i + 1] * b[i + 1] + b[i + 5] * b[i + 5];
|
|
150
|
+
nB2 += b[i + 2] * b[i + 2] + b[i + 6] * b[i + 6];
|
|
151
|
+
nB3 += b[i + 3] * b[i + 3] + b[i + 7] * b[i + 7];
|
|
152
|
+
}
|
|
153
|
+
for (;i < len; i++) {
|
|
154
|
+
nA0 += a[i] * a[i];
|
|
155
|
+
nB0 += b[i] * b[i];
|
|
156
|
+
}
|
|
157
|
+
const normA = nA0 + nA1 + nA2 + nA3;
|
|
158
|
+
const normB = nB0 + nB1 + nB2 + nB3;
|
|
159
|
+
const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
|
|
160
|
+
if (magnitude === 0)
|
|
161
|
+
return 1;
|
|
162
|
+
const dist = 1 - dot / magnitude;
|
|
163
|
+
return dist < 0.0000000001 ? 0 : dist;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// src/BinaryHeap.ts
|
|
167
|
+
class BinaryHeap {
|
|
168
|
+
ids;
|
|
169
|
+
dists;
|
|
170
|
+
_size;
|
|
171
|
+
capacity;
|
|
172
|
+
lastPoppedValue = 0;
|
|
173
|
+
constructor(capacity) {
|
|
174
|
+
this.capacity = capacity;
|
|
175
|
+
this.ids = new Uint32Array(capacity);
|
|
176
|
+
this.dists = new Float32Array(capacity);
|
|
177
|
+
this._size = 0;
|
|
178
|
+
}
|
|
179
|
+
push(id, dist) {
|
|
180
|
+
if (this._size >= this.capacity) {
|
|
181
|
+
return;
|
|
182
|
+
}
|
|
183
|
+
this.ids[this._size] = id;
|
|
184
|
+
this.dists[this._size] = dist;
|
|
185
|
+
this._size++;
|
|
186
|
+
this.heapifyUp(this._size - 1);
|
|
187
|
+
}
|
|
188
|
+
pop() {
|
|
189
|
+
if (this._size === 0)
|
|
190
|
+
return -1;
|
|
191
|
+
const result = this.ids[0];
|
|
192
|
+
this.lastPoppedValue = this.dists[0];
|
|
193
|
+
this._size--;
|
|
194
|
+
if (this._size > 0) {
|
|
195
|
+
this.ids[0] = this.ids[this._size];
|
|
196
|
+
this.dists[0] = this.dists[this._size];
|
|
197
|
+
this.heapifyDown(0);
|
|
198
|
+
}
|
|
199
|
+
return result;
|
|
200
|
+
}
|
|
201
|
+
peek() {
|
|
202
|
+
return this._size > 0 ? this.ids[0] : -1;
|
|
203
|
+
}
|
|
204
|
+
peekValue() {
|
|
205
|
+
return this._size > 0 ? this.dists[0] : Infinity;
|
|
206
|
+
}
|
|
207
|
+
getLastPoppedValue() {
|
|
208
|
+
return this.lastPoppedValue;
|
|
209
|
+
}
|
|
210
|
+
size() {
|
|
211
|
+
return this._size;
|
|
212
|
+
}
|
|
213
|
+
clear() {
|
|
214
|
+
this._size = 0;
|
|
215
|
+
}
|
|
216
|
+
isEmpty() {
|
|
217
|
+
return this._size === 0;
|
|
218
|
+
}
|
|
219
|
+
getCapacity() {
|
|
220
|
+
return this.capacity;
|
|
221
|
+
}
|
|
222
|
+
heapifyUp(index) {
|
|
223
|
+
const ids = this.ids;
|
|
224
|
+
const dists = this.dists;
|
|
225
|
+
while (index > 0) {
|
|
226
|
+
const parentIndex = index - 1 >> 1;
|
|
227
|
+
if (dists[parentIndex] <= dists[index])
|
|
228
|
+
break;
|
|
229
|
+
const tmpId = ids[index];
|
|
230
|
+
ids[index] = ids[parentIndex];
|
|
231
|
+
ids[parentIndex] = tmpId;
|
|
232
|
+
const tmpDist = dists[index];
|
|
233
|
+
dists[index] = dists[parentIndex];
|
|
234
|
+
dists[parentIndex] = tmpDist;
|
|
235
|
+
index = parentIndex;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
heapifyDown(index) {
|
|
239
|
+
const ids = this.ids;
|
|
240
|
+
const dists = this.dists;
|
|
241
|
+
const size = this._size;
|
|
242
|
+
while (true) {
|
|
243
|
+
const leftChild = (index << 1) + 1;
|
|
244
|
+
const rightChild = leftChild + 1;
|
|
245
|
+
let smallest = index;
|
|
246
|
+
if (leftChild < size && dists[leftChild] < dists[smallest]) {
|
|
247
|
+
smallest = leftChild;
|
|
248
|
+
}
|
|
249
|
+
if (rightChild < size && dists[rightChild] < dists[smallest]) {
|
|
250
|
+
smallest = rightChild;
|
|
251
|
+
}
|
|
252
|
+
if (smallest === index)
|
|
253
|
+
break;
|
|
254
|
+
const tmpId = ids[index];
|
|
255
|
+
ids[index] = ids[smallest];
|
|
256
|
+
ids[smallest] = tmpId;
|
|
257
|
+
const tmpDist = dists[index];
|
|
258
|
+
dists[index] = dists[smallest];
|
|
259
|
+
dists[smallest] = tmpDist;
|
|
260
|
+
index = smallest;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// src/MaxBinaryHeap.ts
|
|
266
|
+
class MaxBinaryHeap {
|
|
267
|
+
ids;
|
|
268
|
+
dists;
|
|
269
|
+
_size;
|
|
270
|
+
capacity;
|
|
271
|
+
constructor(capacity) {
|
|
272
|
+
this.capacity = capacity;
|
|
273
|
+
this.ids = new Uint32Array(capacity);
|
|
274
|
+
this.dists = new Float32Array(capacity);
|
|
275
|
+
this._size = 0;
|
|
276
|
+
}
|
|
277
|
+
push(id, dist) {
|
|
278
|
+
if (this._size >= this.capacity) {
|
|
279
|
+
if (dist < this.dists[0]) {
|
|
280
|
+
this.ids[0] = id;
|
|
281
|
+
this.dists[0] = dist;
|
|
282
|
+
this.heapifyDown(0);
|
|
283
|
+
}
|
|
284
|
+
return;
|
|
285
|
+
}
|
|
286
|
+
this.ids[this._size] = id;
|
|
287
|
+
this.dists[this._size] = dist;
|
|
288
|
+
this._size++;
|
|
289
|
+
this.heapifyUp(this._size - 1);
|
|
290
|
+
}
|
|
291
|
+
pop() {
|
|
292
|
+
if (this._size === 0)
|
|
293
|
+
return -1;
|
|
294
|
+
const result = this.ids[0];
|
|
295
|
+
this._size--;
|
|
296
|
+
if (this._size > 0) {
|
|
297
|
+
this.ids[0] = this.ids[this._size];
|
|
298
|
+
this.dists[0] = this.dists[this._size];
|
|
299
|
+
this.heapifyDown(0);
|
|
300
|
+
}
|
|
301
|
+
return result;
|
|
302
|
+
}
|
|
303
|
+
peek() {
|
|
304
|
+
return this._size > 0 ? this.ids[0] : -1;
|
|
305
|
+
}
|
|
306
|
+
peekValue() {
|
|
307
|
+
return this._size > 0 ? this.dists[0] : -Infinity;
|
|
308
|
+
}
|
|
309
|
+
size() {
|
|
310
|
+
return this._size;
|
|
311
|
+
}
|
|
312
|
+
isEmpty() {
|
|
313
|
+
return this._size === 0;
|
|
314
|
+
}
|
|
315
|
+
clear() {
|
|
316
|
+
this._size = 0;
|
|
317
|
+
}
|
|
318
|
+
getCapacity() {
|
|
319
|
+
return this.capacity;
|
|
320
|
+
}
|
|
321
|
+
heapifyUp(index) {
|
|
322
|
+
const ids = this.ids;
|
|
323
|
+
const dists = this.dists;
|
|
324
|
+
while (index > 0) {
|
|
325
|
+
const parentIndex = index - 1 >> 1;
|
|
326
|
+
if (dists[parentIndex] >= dists[index])
|
|
327
|
+
break;
|
|
328
|
+
const tmpId = ids[index];
|
|
329
|
+
ids[index] = ids[parentIndex];
|
|
330
|
+
ids[parentIndex] = tmpId;
|
|
331
|
+
const tmpDist = dists[index];
|
|
332
|
+
dists[index] = dists[parentIndex];
|
|
333
|
+
dists[parentIndex] = tmpDist;
|
|
334
|
+
index = parentIndex;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
heapifyDown(index) {
|
|
338
|
+
const ids = this.ids;
|
|
339
|
+
const dists = this.dists;
|
|
340
|
+
const size = this._size;
|
|
341
|
+
while (true) {
|
|
342
|
+
const leftChild = (index << 1) + 1;
|
|
343
|
+
const rightChild = leftChild + 1;
|
|
344
|
+
let largest = index;
|
|
345
|
+
if (leftChild < size && dists[leftChild] > dists[largest]) {
|
|
346
|
+
largest = leftChild;
|
|
347
|
+
}
|
|
348
|
+
if (rightChild < size && dists[rightChild] > dists[largest]) {
|
|
349
|
+
largest = rightChild;
|
|
350
|
+
}
|
|
351
|
+
if (largest === index)
|
|
352
|
+
break;
|
|
353
|
+
const tmpId = ids[index];
|
|
354
|
+
ids[index] = ids[largest];
|
|
355
|
+
ids[largest] = tmpId;
|
|
356
|
+
const tmpDist = dists[index];
|
|
357
|
+
dists[index] = dists[largest];
|
|
358
|
+
dists[largest] = tmpDist;
|
|
359
|
+
index = largest;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
// src/quantization/ScalarQuantizer.ts
|
|
365
|
+
class ScalarQuantizer {
|
|
366
|
+
dimension;
|
|
367
|
+
params = null;
|
|
368
|
+
trained = false;
|
|
369
|
+
constructor(dimension) {
|
|
370
|
+
this.dimension = dimension;
|
|
371
|
+
}
|
|
372
|
+
train(vectors) {
|
|
373
|
+
if (vectors.length === 0) {
|
|
374
|
+
throw new Error("Cannot train quantizer with empty vector set");
|
|
375
|
+
}
|
|
376
|
+
const dim = this.dimension;
|
|
377
|
+
const min = new Float32Array(dim).fill(Infinity);
|
|
378
|
+
const max = new Float32Array(dim).fill(-Infinity);
|
|
379
|
+
for (const vector of vectors) {
|
|
380
|
+
for (let d = 0;d < dim; d++) {
|
|
381
|
+
if (vector[d] < min[d])
|
|
382
|
+
min[d] = vector[d];
|
|
383
|
+
if (vector[d] > max[d])
|
|
384
|
+
max[d] = vector[d];
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
const scale = new Float32Array(dim);
|
|
388
|
+
const offset = new Float32Array(dim);
|
|
389
|
+
for (let d = 0;d < dim; d++) {
|
|
390
|
+
const range = max[d] - min[d];
|
|
391
|
+
if (range === 0) {
|
|
392
|
+
scale[d] = 1;
|
|
393
|
+
offset[d] = min[d];
|
|
394
|
+
} else {
|
|
395
|
+
scale[d] = 255 / range;
|
|
396
|
+
offset[d] = min[d];
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
this.params = { min, max, scale, offset };
|
|
400
|
+
this.trained = true;
|
|
401
|
+
}
|
|
402
|
+
isTrained() {
|
|
403
|
+
return this.trained;
|
|
404
|
+
}
|
|
405
|
+
getParams() {
|
|
406
|
+
return this.params;
|
|
407
|
+
}
|
|
408
|
+
setParams(params) {
|
|
409
|
+
this.params = params;
|
|
410
|
+
this.trained = true;
|
|
411
|
+
}
|
|
412
|
+
quantize(vector) {
|
|
413
|
+
if (!this.params) {
|
|
414
|
+
throw new Error("Quantizer not trained");
|
|
415
|
+
}
|
|
416
|
+
const dim = this.dimension;
|
|
417
|
+
const result = new Int8Array(dim);
|
|
418
|
+
const { scale, offset } = this.params;
|
|
419
|
+
for (let d = 0;d < dim; d++) {
|
|
420
|
+
const normalized = (vector[d] - offset[d]) * scale[d];
|
|
421
|
+
result[d] = Math.max(-128, Math.min(127, Math.round(normalized - 128)));
|
|
422
|
+
}
|
|
423
|
+
return result;
|
|
424
|
+
}
|
|
425
|
+
quantizeBatch(vectors) {
|
|
426
|
+
const result = new Array(vectors.length);
|
|
427
|
+
for (let i = 0;i < vectors.length; i++) {
|
|
428
|
+
result[i] = this.quantize(vectors[i]);
|
|
429
|
+
}
|
|
430
|
+
return result;
|
|
431
|
+
}
|
|
432
|
+
dequantize(vector) {
|
|
433
|
+
if (!this.params) {
|
|
434
|
+
throw new Error("Quantizer not trained");
|
|
435
|
+
}
|
|
436
|
+
const dim = this.dimension;
|
|
437
|
+
const result = new Float32Array(dim);
|
|
438
|
+
const { scale, offset } = this.params;
|
|
439
|
+
for (let d = 0;d < dim; d++) {
|
|
440
|
+
result[d] = (vector[d] + 128) / scale[d] + offset[d];
|
|
441
|
+
}
|
|
442
|
+
return result;
|
|
443
|
+
}
|
|
444
|
+
serialize() {
|
|
445
|
+
if (!this.params) {
|
|
446
|
+
throw new Error("Quantizer not trained");
|
|
447
|
+
}
|
|
448
|
+
const dim = this.dimension;
|
|
449
|
+
const buffer = new ArrayBuffer(4 + 4 * dim * 4);
|
|
450
|
+
const view = new DataView(buffer);
|
|
451
|
+
view.setInt32(0, dim, true);
|
|
452
|
+
let offset = 4;
|
|
453
|
+
for (const arr of [this.params.min, this.params.max, this.params.scale, this.params.offset]) {
|
|
454
|
+
for (let d = 0;d < dim; d++) {
|
|
455
|
+
view.setFloat32(offset, arr[d], true);
|
|
456
|
+
offset += 4;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
return buffer;
|
|
460
|
+
}
|
|
461
|
+
static deserialize(buffer) {
|
|
462
|
+
const view = new DataView(buffer);
|
|
463
|
+
const dim = view.getInt32(0, true);
|
|
464
|
+
const quantizer = new ScalarQuantizer(dim);
|
|
465
|
+
const min = new Float32Array(dim);
|
|
466
|
+
const max = new Float32Array(dim);
|
|
467
|
+
const scale = new Float32Array(dim);
|
|
468
|
+
const offsetArr = new Float32Array(dim);
|
|
469
|
+
let offset = 4;
|
|
470
|
+
for (const arr of [min, max, scale, offsetArr]) {
|
|
471
|
+
for (let d = 0;d < dim; d++) {
|
|
472
|
+
arr[d] = view.getFloat32(offset, true);
|
|
473
|
+
offset += 4;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
quantizer.setParams({ min, max, scale, offset: offsetArr });
|
|
477
|
+
return quantizer;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
function dotProductInt8(a, b) {
|
|
481
|
+
const len = a.length;
|
|
482
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
483
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
484
|
+
let i = 0;
|
|
485
|
+
const limit8 = len - 7;
|
|
486
|
+
for (;i < limit8; i += 8) {
|
|
487
|
+
sum0 += a[i] * b[i];
|
|
488
|
+
sum1 += a[i + 1] * b[i + 1];
|
|
489
|
+
sum2 += a[i + 2] * b[i + 2];
|
|
490
|
+
sum3 += a[i + 3] * b[i + 3];
|
|
491
|
+
sum4 += a[i + 4] * b[i + 4];
|
|
492
|
+
sum5 += a[i + 5] * b[i + 5];
|
|
493
|
+
sum6 += a[i + 6] * b[i + 6];
|
|
494
|
+
sum7 += a[i + 7] * b[i + 7];
|
|
495
|
+
}
|
|
496
|
+
for (;i < len; i++) {
|
|
497
|
+
sum0 += a[i] * b[i];
|
|
498
|
+
}
|
|
499
|
+
return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
|
|
500
|
+
}
|
|
501
|
+
function l2SquaredInt8(a, b) {
|
|
502
|
+
const len = a.length;
|
|
503
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
504
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
505
|
+
let i = 0;
|
|
506
|
+
const limit8 = len - 7;
|
|
507
|
+
for (;i < limit8; i += 8) {
|
|
508
|
+
const d0 = a[i] - b[i];
|
|
509
|
+
const d1 = a[i + 1] - b[i + 1];
|
|
510
|
+
const d2 = a[i + 2] - b[i + 2];
|
|
511
|
+
const d3 = a[i + 3] - b[i + 3];
|
|
512
|
+
const d4 = a[i + 4] - b[i + 4];
|
|
513
|
+
const d5 = a[i + 5] - b[i + 5];
|
|
514
|
+
const d6 = a[i + 6] - b[i + 6];
|
|
515
|
+
const d7 = a[i + 7] - b[i + 7];
|
|
516
|
+
sum0 += d0 * d0;
|
|
517
|
+
sum1 += d1 * d1;
|
|
518
|
+
sum2 += d2 * d2;
|
|
519
|
+
sum3 += d3 * d3;
|
|
520
|
+
sum4 += d4 * d4;
|
|
521
|
+
sum5 += d5 * d5;
|
|
522
|
+
sum6 += d6 * d6;
|
|
523
|
+
sum7 += d7 * d7;
|
|
524
|
+
}
|
|
525
|
+
for (;i < len; i++) {
|
|
526
|
+
const d = a[i] - b[i];
|
|
527
|
+
sum0 += d * d;
|
|
528
|
+
}
|
|
529
|
+
return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
|
|
530
|
+
}
|
|
531
|
+
function cosineDistanceInt8(a, b) {
|
|
532
|
+
const len = a.length;
|
|
533
|
+
let dot0 = 0, dot1 = 0, dot2 = 0, dot3 = 0;
|
|
534
|
+
let normA0 = 0, normA1 = 0, normA2 = 0, normA3 = 0;
|
|
535
|
+
let normB0 = 0, normB1 = 0, normB2 = 0, normB3 = 0;
|
|
536
|
+
let i = 0;
|
|
537
|
+
const limit8 = len - 7;
|
|
538
|
+
for (;i < limit8; i += 8) {
|
|
539
|
+
dot0 += a[i] * b[i] + a[i + 4] * b[i + 4];
|
|
540
|
+
dot1 += a[i + 1] * b[i + 1] + a[i + 5] * b[i + 5];
|
|
541
|
+
dot2 += a[i + 2] * b[i + 2] + a[i + 6] * b[i + 6];
|
|
542
|
+
dot3 += a[i + 3] * b[i + 3] + a[i + 7] * b[i + 7];
|
|
543
|
+
normA0 += a[i] * a[i] + a[i + 4] * a[i + 4];
|
|
544
|
+
normA1 += a[i + 1] * a[i + 1] + a[i + 5] * a[i + 5];
|
|
545
|
+
normA2 += a[i + 2] * a[i + 2] + a[i + 6] * a[i + 6];
|
|
546
|
+
normA3 += a[i + 3] * a[i + 3] + a[i + 7] * a[i + 7];
|
|
547
|
+
normB0 += b[i] * b[i] + b[i + 4] * b[i + 4];
|
|
548
|
+
normB1 += b[i + 1] * b[i + 1] + b[i + 5] * b[i + 5];
|
|
549
|
+
normB2 += b[i + 2] * b[i + 2] + b[i + 6] * b[i + 6];
|
|
550
|
+
normB3 += b[i + 3] * b[i + 3] + b[i + 7] * b[i + 7];
|
|
551
|
+
}
|
|
552
|
+
for (;i < len; i++) {
|
|
553
|
+
dot0 += a[i] * b[i];
|
|
554
|
+
normA0 += a[i] * a[i];
|
|
555
|
+
normB0 += b[i] * b[i];
|
|
556
|
+
}
|
|
557
|
+
const dot = dot0 + dot1 + dot2 + dot3;
|
|
558
|
+
const normA = normA0 + normA1 + normA2 + normA3;
|
|
559
|
+
const normB = normB0 + normB1 + normB2 + normB3;
|
|
560
|
+
const magnitude = Math.sqrt(normA * normB);
|
|
561
|
+
if (magnitude === 0)
|
|
562
|
+
return 1;
|
|
563
|
+
const distance = 1 - dot / magnitude;
|
|
564
|
+
return distance < 0.0000000001 ? 0 : distance;
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
class QuantizedVectorStore {
|
|
568
|
+
quantizer;
|
|
569
|
+
vectors;
|
|
570
|
+
originalVectors;
|
|
571
|
+
keepOriginals;
|
|
572
|
+
constructor(dimension, keepOriginals = true) {
|
|
573
|
+
this.quantizer = new ScalarQuantizer(dimension);
|
|
574
|
+
this.vectors = [];
|
|
575
|
+
this.originalVectors = keepOriginals ? [] : null;
|
|
576
|
+
this.keepOriginals = keepOriginals;
|
|
577
|
+
}
|
|
578
|
+
addVectors(vectors) {
|
|
579
|
+
if (!this.quantizer.isTrained()) {
|
|
580
|
+
this.quantizer.train(vectors);
|
|
581
|
+
}
|
|
582
|
+
for (const v of vectors) {
|
|
583
|
+
this.vectors.push(this.quantizer.quantize(v));
|
|
584
|
+
if (this.keepOriginals && this.originalVectors) {
|
|
585
|
+
this.originalVectors.push(v);
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
getQuantized(index) {
|
|
590
|
+
return this.vectors[index];
|
|
591
|
+
}
|
|
592
|
+
getOriginal(index) {
|
|
593
|
+
if (!this.originalVectors)
|
|
594
|
+
return null;
|
|
595
|
+
return this.originalVectors[index];
|
|
596
|
+
}
|
|
597
|
+
size() {
|
|
598
|
+
return this.vectors.length;
|
|
599
|
+
}
|
|
600
|
+
memoryUsage() {
|
|
601
|
+
const quantized = this.vectors.reduce((sum, v) => sum + v.length, 0);
|
|
602
|
+
const original = this.originalVectors ? this.originalVectors.reduce((sum, v) => sum + v.length * 4, 0) : 0;
|
|
603
|
+
return { quantized, original, total: quantized + original };
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
// src/encoding/DeltaEncoder.ts
|
|
608
|
+
function encodeVarint(value, buffer, offset) {
|
|
609
|
+
let v = value >>> 0;
|
|
610
|
+
let bytesWritten = 0;
|
|
611
|
+
while (v >= 128) {
|
|
612
|
+
buffer[offset + bytesWritten] = v & 127 | 128;
|
|
613
|
+
v >>>= 7;
|
|
614
|
+
bytesWritten++;
|
|
615
|
+
}
|
|
616
|
+
buffer[offset + bytesWritten] = v;
|
|
617
|
+
return bytesWritten + 1;
|
|
618
|
+
}
|
|
619
|
+
function decodeVarint(buffer, offset) {
|
|
620
|
+
let result = 0;
|
|
621
|
+
let shift = 0;
|
|
622
|
+
let bytesRead = 0;
|
|
623
|
+
while (offset + bytesRead < buffer.length) {
|
|
624
|
+
const byte = buffer[offset + bytesRead];
|
|
625
|
+
result |= (byte & 127) << shift;
|
|
626
|
+
bytesRead++;
|
|
627
|
+
if ((byte & 128) === 0) {
|
|
628
|
+
return [result >>> 0, bytesRead];
|
|
629
|
+
}
|
|
630
|
+
shift += 7;
|
|
631
|
+
if (shift > 35) {
|
|
632
|
+
throw new Error("Varint too long");
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
throw new Error("Unexpected end of buffer");
|
|
636
|
+
}
|
|
637
|
+
function varintSize(value) {
|
|
638
|
+
let v = value >>> 0;
|
|
639
|
+
let size = 1;
|
|
640
|
+
while (v >= 128) {
|
|
641
|
+
v >>>= 7;
|
|
642
|
+
size++;
|
|
643
|
+
}
|
|
644
|
+
return size;
|
|
645
|
+
}
|
|
646
|
+
function deltaEncodeNeighbors(neighbors) {
|
|
647
|
+
if (neighbors.length === 0) {
|
|
648
|
+
return new Uint8Array(0);
|
|
649
|
+
}
|
|
650
|
+
const sorted = neighbors.slice().sort((a, b) => a - b);
|
|
651
|
+
let size = 4;
|
|
652
|
+
let prev = sorted[0];
|
|
653
|
+
for (let i = 1;i < sorted.length; i++) {
|
|
654
|
+
const delta = sorted[i] - prev;
|
|
655
|
+
size += varintSize(delta);
|
|
656
|
+
prev = sorted[i];
|
|
657
|
+
}
|
|
658
|
+
const buffer = new Uint8Array(size);
|
|
659
|
+
const view = new DataView(buffer.buffer);
|
|
660
|
+
view.setUint32(0, sorted[0], true);
|
|
661
|
+
let offset = 4;
|
|
662
|
+
prev = sorted[0];
|
|
663
|
+
for (let i = 1;i < sorted.length; i++) {
|
|
664
|
+
const delta = sorted[i] - prev;
|
|
665
|
+
offset += encodeVarint(delta, buffer, offset);
|
|
666
|
+
prev = sorted[i];
|
|
667
|
+
}
|
|
668
|
+
return buffer;
|
|
669
|
+
}
|
|
670
|
+
function deltaDecodeNeighbors(buffer, count) {
|
|
671
|
+
if (count === 0 || buffer.length === 0) {
|
|
672
|
+
return [];
|
|
673
|
+
}
|
|
674
|
+
const view = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
|
|
675
|
+
const neighbors = new Array(count);
|
|
676
|
+
neighbors[0] = view.getUint32(0, true);
|
|
677
|
+
let offset = 4;
|
|
678
|
+
for (let i = 1;i < count; i++) {
|
|
679
|
+
const [delta, bytesRead] = decodeVarint(buffer, offset);
|
|
680
|
+
neighbors[i] = neighbors[i - 1] + delta;
|
|
681
|
+
offset += bytesRead;
|
|
682
|
+
}
|
|
683
|
+
return neighbors;
|
|
684
|
+
}
|
|
685
|
+
function deltaEncodedSize(neighbors) {
|
|
686
|
+
if (neighbors.length === 0) {
|
|
687
|
+
return 0;
|
|
688
|
+
}
|
|
689
|
+
const sorted = neighbors.slice().sort((a, b) => a - b);
|
|
690
|
+
let size = 4;
|
|
691
|
+
let prev = sorted[0];
|
|
692
|
+
for (let i = 1;i < sorted.length; i++) {
|
|
693
|
+
const delta = sorted[i] - prev;
|
|
694
|
+
size += varintSize(delta);
|
|
695
|
+
prev = sorted[i];
|
|
696
|
+
}
|
|
697
|
+
return size;
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// src/HNSWIndex.ts
|
|
701
|
+
class HNSWIndex {
|
|
702
|
+
M;
|
|
703
|
+
M0;
|
|
704
|
+
efConstruction;
|
|
705
|
+
levelMult;
|
|
706
|
+
maxLevel;
|
|
707
|
+
entryPointId;
|
|
708
|
+
nodes;
|
|
709
|
+
nodeCount = 0;
|
|
710
|
+
dimension;
|
|
711
|
+
metric;
|
|
712
|
+
maxLayers;
|
|
713
|
+
distanceBackend;
|
|
714
|
+
flatVectors;
|
|
715
|
+
flatVectorsCapacity = 0;
|
|
716
|
+
searchHeap;
|
|
717
|
+
visitedArray;
|
|
718
|
+
visitedArraySize;
|
|
719
|
+
visitedGeneration = 0;
|
|
720
|
+
candidatesHeap;
|
|
721
|
+
resultsHeap;
|
|
722
|
+
selectionHeap;
|
|
723
|
+
heapCapacity;
|
|
724
|
+
vectorsAreNormalized = false;
|
|
725
|
+
distanceFn;
|
|
726
|
+
scalarQuantizer = null;
|
|
727
|
+
int8Vectors = [];
|
|
728
|
+
quantizationEnabled = false;
|
|
729
|
+
lazyLoadEnabled = false;
|
|
730
|
+
vectorOffsets = new Map;
|
|
731
|
+
vectorBuffer = null;
|
|
732
|
+
vectorsLoaded = new Set;
|
|
733
|
+
queryNormBuffer;
|
|
734
|
+
neighborSets = new Map;
|
|
735
|
+
constructionMode = false;
|
|
736
|
+
constructor(dimension, metric = "cosine", M = 24, efConstruction = 200, distanceBackend) {
|
|
737
|
+
this.dimension = dimension;
|
|
738
|
+
this.metric = metric;
|
|
739
|
+
this.M = M;
|
|
740
|
+
this.M0 = M * 2;
|
|
741
|
+
this.efConstruction = efConstruction;
|
|
742
|
+
this.levelMult = 1 / Math.log(M);
|
|
743
|
+
this.maxLevel = -1;
|
|
744
|
+
this.entryPointId = -1;
|
|
745
|
+
const initialCapacity = 1e4;
|
|
746
|
+
this.nodes = new Array(initialCapacity);
|
|
747
|
+
this.nodeCount = 0;
|
|
748
|
+
this.flatVectorsCapacity = initialCapacity;
|
|
749
|
+
this.flatVectors = new Float32Array(initialCapacity * dimension);
|
|
750
|
+
this.maxLayers = 32;
|
|
751
|
+
this.distanceBackend = distanceBackend ?? new JsDistanceBackend;
|
|
752
|
+
this.searchHeap = new BinaryHeap(1000);
|
|
753
|
+
this.visitedArraySize = 1e4;
|
|
754
|
+
this.visitedArray = new Uint8Array(this.visitedArraySize);
|
|
755
|
+
this.visitedGeneration = 1;
|
|
756
|
+
this.heapCapacity = Math.max(efConstruction * 2, 500);
|
|
757
|
+
this.candidatesHeap = new BinaryHeap(this.heapCapacity);
|
|
758
|
+
this.resultsHeap = new MaxBinaryHeap(this.heapCapacity);
|
|
759
|
+
this.selectionHeap = new BinaryHeap(Math.max(M * 2, efConstruction));
|
|
760
|
+
this.vectorsAreNormalized = metric === "cosine";
|
|
761
|
+
this.queryNormBuffer = new Float32Array(dimension);
|
|
762
|
+
if (metric === "cosine") {
|
|
763
|
+
this.distanceFn = (a, b) => {
|
|
764
|
+
const dot = dotProductFast(a, b);
|
|
765
|
+
const distance = 1 - dot;
|
|
766
|
+
return distance < 0.0000000001 ? 0 : distance;
|
|
767
|
+
};
|
|
768
|
+
} else if (metric === "euclidean") {
|
|
769
|
+
this.distanceFn = (a, b) => {
|
|
770
|
+
return Math.sqrt(l2SquaredFast(a, b));
|
|
771
|
+
};
|
|
772
|
+
} else if (metric === "dot_product") {
|
|
773
|
+
this.distanceFn = (a, b) => {
|
|
774
|
+
return -dotProductFast(a, b);
|
|
775
|
+
};
|
|
776
|
+
} else {
|
|
777
|
+
throw new Error(`Unsupported metric: ${metric}`);
|
|
778
|
+
}
|
|
779
|
+
}
|
|
780
|
+
ensureCapacity(minCapacity) {
|
|
781
|
+
if (minCapacity > this.nodes.length) {
|
|
782
|
+
const newCapacity = Math.max(this.nodes.length * 2, minCapacity);
|
|
783
|
+
const newNodes = new Array(newCapacity);
|
|
784
|
+
for (let i = 0;i < this.nodeCount; i++) {
|
|
785
|
+
newNodes[i] = this.nodes[i];
|
|
786
|
+
}
|
|
787
|
+
this.nodes = newNodes;
|
|
788
|
+
}
|
|
789
|
+
if (minCapacity > this.flatVectorsCapacity) {
|
|
790
|
+
const newCapacity = Math.max(this.flatVectorsCapacity * 2, minCapacity);
|
|
791
|
+
const newFlatVectors = new Float32Array(newCapacity * this.dimension);
|
|
792
|
+
newFlatVectors.set(this.flatVectors);
|
|
793
|
+
this.flatVectors = newFlatVectors;
|
|
794
|
+
this.flatVectorsCapacity = newCapacity;
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
getFlatVector(nodeId) {
|
|
798
|
+
const offset = nodeId * this.dimension;
|
|
799
|
+
return this.flatVectors.subarray(offset, offset + this.dimension);
|
|
800
|
+
}
|
|
801
|
+
setFlatVector(nodeId, vector) {
|
|
802
|
+
const offset = nodeId * this.dimension;
|
|
803
|
+
this.flatVectors.set(vector, offset);
|
|
804
|
+
}
|
|
805
|
+
getNode(id) {
|
|
806
|
+
return this.nodes[id];
|
|
807
|
+
}
|
|
808
|
+
setNode(node) {
|
|
809
|
+
const id = node.id;
|
|
810
|
+
this.ensureCapacity(id + 1);
|
|
811
|
+
this.nodes[id] = node;
|
|
812
|
+
this.setFlatVector(id, node.vector);
|
|
813
|
+
if (id >= this.nodeCount) {
|
|
814
|
+
this.nodeCount = id + 1;
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
batchNeighborIds = [];
|
|
818
|
+
batchDistances = [];
|
|
819
|
+
calculateDistancesBatch(query, neighborIds, outDistances) {
|
|
820
|
+
const dim = this.dimension;
|
|
821
|
+
const flatVectors = this.flatVectors;
|
|
822
|
+
for (let i = 0;i < neighborIds.length; i++) {
|
|
823
|
+
const neighborId = neighborIds[i];
|
|
824
|
+
const offset = neighborId * dim;
|
|
825
|
+
if (this.metric === "cosine") {
|
|
826
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
827
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
828
|
+
let d = 0;
|
|
829
|
+
const limit8 = dim - 7;
|
|
830
|
+
for (;d < limit8; d += 8) {
|
|
831
|
+
sum0 += flatVectors[offset + d] * query[d];
|
|
832
|
+
sum1 += flatVectors[offset + d + 1] * query[d + 1];
|
|
833
|
+
sum2 += flatVectors[offset + d + 2] * query[d + 2];
|
|
834
|
+
sum3 += flatVectors[offset + d + 3] * query[d + 3];
|
|
835
|
+
sum4 += flatVectors[offset + d + 4] * query[d + 4];
|
|
836
|
+
sum5 += flatVectors[offset + d + 5] * query[d + 5];
|
|
837
|
+
sum6 += flatVectors[offset + d + 6] * query[d + 6];
|
|
838
|
+
sum7 += flatVectors[offset + d + 7] * query[d + 7];
|
|
839
|
+
}
|
|
840
|
+
for (;d < dim; d++) {
|
|
841
|
+
sum0 += flatVectors[offset + d] * query[d];
|
|
842
|
+
}
|
|
843
|
+
const dot = sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7;
|
|
844
|
+
const dist = 1 - dot;
|
|
845
|
+
outDistances[i] = dist < 0.0000000001 ? 0 : dist;
|
|
846
|
+
} else if (this.metric === "euclidean") {
|
|
847
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
848
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
849
|
+
let d = 0;
|
|
850
|
+
const limit8 = dim - 7;
|
|
851
|
+
for (;d < limit8; d += 8) {
|
|
852
|
+
const d0 = flatVectors[offset + d] - query[d];
|
|
853
|
+
const d1 = flatVectors[offset + d + 1] - query[d + 1];
|
|
854
|
+
const d2 = flatVectors[offset + d + 2] - query[d + 2];
|
|
855
|
+
const d3 = flatVectors[offset + d + 3] - query[d + 3];
|
|
856
|
+
const d4 = flatVectors[offset + d + 4] - query[d + 4];
|
|
857
|
+
const d5 = flatVectors[offset + d + 5] - query[d + 5];
|
|
858
|
+
const d6 = flatVectors[offset + d + 6] - query[d + 6];
|
|
859
|
+
const d7 = flatVectors[offset + d + 7] - query[d + 7];
|
|
860
|
+
sum0 += d0 * d0;
|
|
861
|
+
sum1 += d1 * d1;
|
|
862
|
+
sum2 += d2 * d2;
|
|
863
|
+
sum3 += d3 * d3;
|
|
864
|
+
sum4 += d4 * d4;
|
|
865
|
+
sum5 += d5 * d5;
|
|
866
|
+
sum6 += d6 * d6;
|
|
867
|
+
sum7 += d7 * d7;
|
|
868
|
+
}
|
|
869
|
+
for (;d < dim; d++) {
|
|
870
|
+
const diff = flatVectors[offset + d] - query[d];
|
|
871
|
+
sum0 += diff * diff;
|
|
872
|
+
}
|
|
873
|
+
outDistances[i] = Math.sqrt(sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7);
|
|
874
|
+
} else {
|
|
875
|
+
let sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
|
|
876
|
+
let sum4 = 0, sum5 = 0, sum6 = 0, sum7 = 0;
|
|
877
|
+
let d = 0;
|
|
878
|
+
const limit8 = dim - 7;
|
|
879
|
+
for (;d < limit8; d += 8) {
|
|
880
|
+
sum0 += flatVectors[offset + d] * query[d];
|
|
881
|
+
sum1 += flatVectors[offset + d + 1] * query[d + 1];
|
|
882
|
+
sum2 += flatVectors[offset + d + 2] * query[d + 2];
|
|
883
|
+
sum3 += flatVectors[offset + d + 3] * query[d + 3];
|
|
884
|
+
sum4 += flatVectors[offset + d + 4] * query[d + 4];
|
|
885
|
+
sum5 += flatVectors[offset + d + 5] * query[d + 5];
|
|
886
|
+
sum6 += flatVectors[offset + d + 6] * query[d + 6];
|
|
887
|
+
sum7 += flatVectors[offset + d + 7] * query[d + 7];
|
|
888
|
+
}
|
|
889
|
+
for (;d < dim; d++) {
|
|
890
|
+
sum0 += flatVectors[offset + d] * query[d];
|
|
891
|
+
}
|
|
892
|
+
outDistances[i] = -(sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7);
|
|
893
|
+
}
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
isVisited(id) {
|
|
897
|
+
if (id >= this.visitedArraySize) {
|
|
898
|
+
return false;
|
|
899
|
+
}
|
|
900
|
+
return this.visitedArray[id] === this.visitedGeneration;
|
|
901
|
+
}
|
|
902
|
+
markVisited(id) {
|
|
903
|
+
if (id >= this.visitedArraySize) {
|
|
904
|
+
const newSize = Math.max(this.visitedArraySize * 2, id + 1000);
|
|
905
|
+
const newArray = new Uint8Array(newSize);
|
|
906
|
+
newArray.set(this.visitedArray);
|
|
907
|
+
this.visitedArray = newArray;
|
|
908
|
+
this.visitedArraySize = newSize;
|
|
909
|
+
}
|
|
910
|
+
this.visitedArray[id] = this.visitedGeneration;
|
|
911
|
+
}
|
|
912
|
+
clearVisited() {
|
|
913
|
+
this.visitedGeneration++;
|
|
914
|
+
if (this.visitedGeneration > 250) {
|
|
915
|
+
this.visitedArray.fill(0);
|
|
916
|
+
this.visitedGeneration = 1;
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
normalizeVector(vector) {
|
|
920
|
+
const len = vector.length;
|
|
921
|
+
let s0 = 0, s1 = 0, s2 = 0, s3 = 0;
|
|
922
|
+
let s4 = 0, s5 = 0, s6 = 0, s7 = 0;
|
|
923
|
+
let i = 0;
|
|
924
|
+
const limit8 = len - 7;
|
|
925
|
+
for (;i < limit8; i += 8) {
|
|
926
|
+
s0 += vector[i] * vector[i];
|
|
927
|
+
s1 += vector[i + 1] * vector[i + 1];
|
|
928
|
+
s2 += vector[i + 2] * vector[i + 2];
|
|
929
|
+
s3 += vector[i + 3] * vector[i + 3];
|
|
930
|
+
s4 += vector[i + 4] * vector[i + 4];
|
|
931
|
+
s5 += vector[i + 5] * vector[i + 5];
|
|
932
|
+
s6 += vector[i + 6] * vector[i + 6];
|
|
933
|
+
s7 += vector[i + 7] * vector[i + 7];
|
|
934
|
+
}
|
|
935
|
+
for (;i < len; i++) {
|
|
936
|
+
s0 += vector[i] * vector[i];
|
|
937
|
+
}
|
|
938
|
+
const normSq = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7;
|
|
939
|
+
const norm = Math.sqrt(normSq);
|
|
940
|
+
if (norm > 0) {
|
|
941
|
+
const invNorm = 1 / norm;
|
|
942
|
+
for (let j = 0;j < len; j++) {
|
|
943
|
+
vector[j] *= invNorm;
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
return vector;
|
|
947
|
+
}
|
|
948
|
+
selectLevel() {
|
|
949
|
+
const r = Math.random();
|
|
950
|
+
const level = Math.floor(-Math.log(r) * this.levelMult);
|
|
951
|
+
return Math.min(level, this.maxLayers - 1);
|
|
952
|
+
}
|
|
953
|
+
calculateDistance(a, b) {
|
|
954
|
+
return this.distanceFn(a, b);
|
|
955
|
+
}
|
|
956
|
+
getNodeVector(nodeId) {
|
|
957
|
+
const node = this.nodes[nodeId];
|
|
958
|
+
if (!node)
|
|
959
|
+
return null;
|
|
960
|
+
if (this.lazyLoadEnabled && !this.vectorsLoaded.has(nodeId)) {
|
|
961
|
+
this.loadVector(nodeId);
|
|
962
|
+
}
|
|
963
|
+
return node.vector;
|
|
964
|
+
}
|
|
965
|
+
getLayerMaxConnections(layer) {
|
|
966
|
+
return layer === 0 ? this.M0 : this.M;
|
|
967
|
+
}
|
|
968
|
+
selectNeighbors(currentId, candidates, layer) {
|
|
969
|
+
const maxConnections = this.getLayerMaxConnections(layer);
|
|
970
|
+
this.selectionHeap.clear();
|
|
971
|
+
for (const candidate of candidates) {
|
|
972
|
+
if (candidate.id !== currentId) {
|
|
973
|
+
this.selectionHeap.push(candidate.id, candidate.distance);
|
|
974
|
+
}
|
|
975
|
+
}
|
|
976
|
+
const selected = new Array(Math.min(maxConnections, this.selectionHeap.size()));
|
|
977
|
+
let idx = 0;
|
|
978
|
+
while (idx < maxConnections && !this.selectionHeap.isEmpty()) {
|
|
979
|
+
const id = this.selectionHeap.pop();
|
|
980
|
+
if (id !== -1) {
|
|
981
|
+
selected[idx++] = id;
|
|
982
|
+
}
|
|
983
|
+
}
|
|
984
|
+
if (idx < selected.length) {
|
|
985
|
+
selected.length = idx;
|
|
986
|
+
}
|
|
987
|
+
return selected;
|
|
988
|
+
}
|
|
989
|
+
addBidirectionalConnection(fromId, toId, level) {
|
|
990
|
+
const fromNode = this.nodes[fromId];
|
|
991
|
+
const toNode = this.nodes[toId];
|
|
992
|
+
if (!fromNode || !toNode)
|
|
993
|
+
return;
|
|
994
|
+
if (!fromNode.neighbors[level]) {
|
|
995
|
+
fromNode.neighbors[level] = [];
|
|
996
|
+
}
|
|
997
|
+
if (!toNode.neighbors[level]) {
|
|
998
|
+
toNode.neighbors[level] = [];
|
|
999
|
+
}
|
|
1000
|
+
if (this.constructionMode) {
|
|
1001
|
+
let fromSets = this.neighborSets.get(fromId);
|
|
1002
|
+
if (!fromSets) {
|
|
1003
|
+
fromSets = [];
|
|
1004
|
+
this.neighborSets.set(fromId, fromSets);
|
|
1005
|
+
}
|
|
1006
|
+
if (!fromSets[level]) {
|
|
1007
|
+
fromSets[level] = new Set(fromNode.neighbors[level]);
|
|
1008
|
+
}
|
|
1009
|
+
let toSets = this.neighborSets.get(toId);
|
|
1010
|
+
if (!toSets) {
|
|
1011
|
+
toSets = [];
|
|
1012
|
+
this.neighborSets.set(toId, toSets);
|
|
1013
|
+
}
|
|
1014
|
+
if (!toSets[level]) {
|
|
1015
|
+
toSets[level] = new Set(toNode.neighbors[level]);
|
|
1016
|
+
}
|
|
1017
|
+
if (!fromSets[level].has(toId)) {
|
|
1018
|
+
fromSets[level].add(toId);
|
|
1019
|
+
fromNode.neighbors[level].push(toId);
|
|
1020
|
+
}
|
|
1021
|
+
if (!toSets[level].has(fromId)) {
|
|
1022
|
+
toSets[level].add(fromId);
|
|
1023
|
+
toNode.neighbors[level].push(fromId);
|
|
1024
|
+
}
|
|
1025
|
+
} else {
|
|
1026
|
+
if (!fromNode.neighbors[level].includes(toId)) {
|
|
1027
|
+
fromNode.neighbors[level].push(toId);
|
|
1028
|
+
}
|
|
1029
|
+
if (!toNode.neighbors[level].includes(fromId)) {
|
|
1030
|
+
toNode.neighbors[level].push(fromId);
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
ensureHeapCapacity(ef) {
|
|
1035
|
+
const requiredCapacity = Math.max(ef * 2, 100);
|
|
1036
|
+
if (requiredCapacity > this.heapCapacity) {
|
|
1037
|
+
this.heapCapacity = requiredCapacity;
|
|
1038
|
+
this.candidatesHeap = new BinaryHeap(this.heapCapacity);
|
|
1039
|
+
this.resultsHeap = new MaxBinaryHeap(this.heapCapacity);
|
|
1040
|
+
}
|
|
1041
|
+
}
|
|
1042
|
+
searchLayer(query, nearest, layer, ef) {
|
|
1043
|
+
this.clearVisited();
|
|
1044
|
+
this.ensureHeapCapacity(ef);
|
|
1045
|
+
this.candidatesHeap.clear();
|
|
1046
|
+
this.resultsHeap.clear();
|
|
1047
|
+
this.markVisited(nearest.id);
|
|
1048
|
+
this.candidatesHeap.push(nearest.id, nearest.distance);
|
|
1049
|
+
this.resultsHeap.push(nearest.id, nearest.distance);
|
|
1050
|
+
let furthestResultDist = nearest.distance;
|
|
1051
|
+
const batchIds = this.batchNeighborIds;
|
|
1052
|
+
const batchDists = this.batchDistances;
|
|
1053
|
+
while (!this.candidatesHeap.isEmpty()) {
|
|
1054
|
+
const closestCandidateDist = this.candidatesHeap.peekValue();
|
|
1055
|
+
const closestCandidateId = this.candidatesHeap.pop();
|
|
1056
|
+
if (closestCandidateId === -1)
|
|
1057
|
+
continue;
|
|
1058
|
+
if (this.resultsHeap.size() >= ef && closestCandidateDist > furthestResultDist) {
|
|
1059
|
+
break;
|
|
1060
|
+
}
|
|
1061
|
+
const node = this.nodes[closestCandidateId];
|
|
1062
|
+
if (!node)
|
|
1063
|
+
continue;
|
|
1064
|
+
const neighbors = node.neighbors[layer] || [];
|
|
1065
|
+
if (!this.lazyLoadEnabled) {
|
|
1066
|
+
let batchCount = 0;
|
|
1067
|
+
for (let i = 0;i < neighbors.length; i++) {
|
|
1068
|
+
const neighborId = neighbors[i];
|
|
1069
|
+
if (!this.isVisited(neighborId)) {
|
|
1070
|
+
this.markVisited(neighborId);
|
|
1071
|
+
batchIds[batchCount] = neighborId;
|
|
1072
|
+
batchCount++;
|
|
1073
|
+
}
|
|
1074
|
+
}
|
|
1075
|
+
if (batchCount > 0) {
|
|
1076
|
+
if (batchDists.length < batchCount) {
|
|
1077
|
+
this.batchDistances.length = batchCount;
|
|
1078
|
+
}
|
|
1079
|
+
this.calculateDistancesBatch(query, batchIds.slice(0, batchCount), batchDists);
|
|
1080
|
+
for (let i = 0;i < batchCount; i++) {
|
|
1081
|
+
const neighborId = batchIds[i];
|
|
1082
|
+
const distance = batchDists[i];
|
|
1083
|
+
if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
|
|
1084
|
+
this.candidatesHeap.push(neighborId, distance);
|
|
1085
|
+
this.resultsHeap.push(neighborId, distance);
|
|
1086
|
+
if (this.resultsHeap.size() > ef) {
|
|
1087
|
+
this.resultsHeap.pop();
|
|
1088
|
+
}
|
|
1089
|
+
furthestResultDist = this.resultsHeap.peekValue();
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
} else {
|
|
1094
|
+
for (const neighborId of neighbors) {
|
|
1095
|
+
if (this.isVisited(neighborId))
|
|
1096
|
+
continue;
|
|
1097
|
+
this.markVisited(neighborId);
|
|
1098
|
+
const neighborVector = this.getNodeVector(neighborId);
|
|
1099
|
+
if (!neighborVector)
|
|
1100
|
+
continue;
|
|
1101
|
+
const distance = this.calculateDistance(query, neighborVector);
|
|
1102
|
+
if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
|
|
1103
|
+
this.candidatesHeap.push(neighborId, distance);
|
|
1104
|
+
this.resultsHeap.push(neighborId, distance);
|
|
1105
|
+
if (this.resultsHeap.size() > ef) {
|
|
1106
|
+
this.resultsHeap.pop();
|
|
1107
|
+
}
|
|
1108
|
+
furthestResultDist = this.resultsHeap.peekValue();
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
const resultCount = this.resultsHeap.size();
|
|
1114
|
+
const results = new Array(resultCount);
|
|
1115
|
+
let idx = resultCount - 1;
|
|
1116
|
+
while (!this.resultsHeap.isEmpty()) {
|
|
1117
|
+
const dist = this.resultsHeap.peekValue();
|
|
1118
|
+
const id = this.resultsHeap.pop();
|
|
1119
|
+
results[idx--] = { id, distance: dist };
|
|
1120
|
+
}
|
|
1121
|
+
return results;
|
|
1122
|
+
}
|
|
1123
|
+
greedySearch(query, entryNode, level) {
|
|
1124
|
+
this.clearVisited();
|
|
1125
|
+
let currentNode = entryNode;
|
|
1126
|
+
const entryVector = this.getNodeVector(entryNode.id);
|
|
1127
|
+
let currentDistance = entryVector ? this.calculateDistance(query, entryVector) : Infinity;
|
|
1128
|
+
this.markVisited(currentNode.id);
|
|
1129
|
+
let improved = true;
|
|
1130
|
+
while (improved) {
|
|
1131
|
+
improved = false;
|
|
1132
|
+
const neighbors = currentNode.neighbors[level] || [];
|
|
1133
|
+
for (const neighborId of neighbors) {
|
|
1134
|
+
if (this.isVisited(neighborId))
|
|
1135
|
+
continue;
|
|
1136
|
+
this.markVisited(neighborId);
|
|
1137
|
+
const neighborVector = this.getNodeVector(neighborId);
|
|
1138
|
+
if (!neighborVector)
|
|
1139
|
+
continue;
|
|
1140
|
+
const neighborNode = this.nodes[neighborId];
|
|
1141
|
+
if (!neighborNode)
|
|
1142
|
+
continue;
|
|
1143
|
+
const distance = this.calculateDistance(query, neighborVector);
|
|
1144
|
+
if (distance < currentDistance) {
|
|
1145
|
+
currentDistance = distance;
|
|
1146
|
+
currentNode = neighborNode;
|
|
1147
|
+
improved = true;
|
|
1148
|
+
}
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
return { id: currentNode.id, distance: currentDistance };
|
|
1152
|
+
}
|
|
1153
|
+
async addPoint(id, vector, options) {
|
|
1154
|
+
this.addPointSync(id, vector, options);
|
|
1155
|
+
}
|
|
1156
|
+
addPointSync(id, vector, options) {
|
|
1157
|
+
let floatVector;
|
|
1158
|
+
if (Array.isArray(vector)) {
|
|
1159
|
+
floatVector = new Float32Array(vector);
|
|
1160
|
+
} else if (this.vectorsAreNormalized && !options?.skipNormalization) {
|
|
1161
|
+
floatVector = new Float32Array(vector);
|
|
1162
|
+
} else {
|
|
1163
|
+
floatVector = vector;
|
|
1164
|
+
}
|
|
1165
|
+
if (floatVector.length !== this.dimension) {
|
|
1166
|
+
throw new Error(`Vector dimension ${floatVector.length} does not match expected ${this.dimension}`);
|
|
1167
|
+
}
|
|
1168
|
+
if (this.vectorsAreNormalized && !options?.skipNormalization) {
|
|
1169
|
+
floatVector = this.normalizeVector(floatVector);
|
|
1170
|
+
}
|
|
1171
|
+
const level = this.selectLevel();
|
|
1172
|
+
const neighbors = new Array(level + 1);
|
|
1173
|
+
for (let i = 0;i <= level; i++) {
|
|
1174
|
+
neighbors[i] = [];
|
|
1175
|
+
}
|
|
1176
|
+
const newNode = {
|
|
1177
|
+
id,
|
|
1178
|
+
level,
|
|
1179
|
+
vector: floatVector,
|
|
1180
|
+
neighbors
|
|
1181
|
+
};
|
|
1182
|
+
this.setNode(newNode);
|
|
1183
|
+
if (this.entryPointId === -1) {
|
|
1184
|
+
this.entryPointId = id;
|
|
1185
|
+
this.maxLevel = level;
|
|
1186
|
+
return;
|
|
1187
|
+
}
|
|
1188
|
+
let currentEntryPoint = this.nodes[this.entryPointId];
|
|
1189
|
+
let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(floatVector, currentEntryPoint.vector) };
|
|
1190
|
+
for (let l = this.maxLevel;l > level; l--) {
|
|
1191
|
+
const result = this.greedySearch(floatVector, currentEntryPoint, l);
|
|
1192
|
+
if (result.distance < currentBest.distance) {
|
|
1193
|
+
currentBest = result;
|
|
1194
|
+
currentEntryPoint = this.nodes[currentBest.id];
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
for (let l = Math.min(level, this.maxLevel);l >= 0; l--) {
|
|
1198
|
+
const searchResults = this.searchLayer(floatVector, currentBest, l, this.efConstruction);
|
|
1199
|
+
const neighbors2 = this.selectNeighbors(id, searchResults, l);
|
|
1200
|
+
for (const neighborId of neighbors2) {
|
|
1201
|
+
this.addBidirectionalConnection(id, neighborId, l);
|
|
1202
|
+
}
|
|
1203
|
+
if (searchResults.length > 0) {
|
|
1204
|
+
currentBest = searchResults[0];
|
|
1205
|
+
}
|
|
1206
|
+
}
|
|
1207
|
+
if (level > this.maxLevel) {
|
|
1208
|
+
this.maxLevel = level;
|
|
1209
|
+
this.entryPointId = id;
|
|
1210
|
+
}
|
|
1211
|
+
}
|
|
1212
|
+
searchKNN(query, k, efSearch) {
|
|
1213
|
+
if (this.entryPointId === -1 || this.nodeCount === 0) {
|
|
1214
|
+
return [];
|
|
1215
|
+
}
|
|
1216
|
+
if (query.length !== this.dimension) {
|
|
1217
|
+
throw new Error(`Query dimension ${query.length} does not match expected ${this.dimension}`);
|
|
1218
|
+
}
|
|
1219
|
+
const effectiveEf = efSearch || Math.max(k * 2, 50);
|
|
1220
|
+
let normalizedQuery = query;
|
|
1221
|
+
if (this.vectorsAreNormalized) {
|
|
1222
|
+
this.queryNormBuffer.set(query);
|
|
1223
|
+
normalizedQuery = this.normalizeVector(this.queryNormBuffer);
|
|
1224
|
+
}
|
|
1225
|
+
let currentEntryPoint = this.nodes[this.entryPointId];
|
|
1226
|
+
const entryVector = this.getNodeVector(this.entryPointId);
|
|
1227
|
+
if (!entryVector)
|
|
1228
|
+
return [];
|
|
1229
|
+
let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(normalizedQuery, entryVector) };
|
|
1230
|
+
for (let l = this.maxLevel;l > 0; l--) {
|
|
1231
|
+
const result = this.greedySearch(normalizedQuery, currentEntryPoint, l);
|
|
1232
|
+
if (result.distance < currentBest.distance) {
|
|
1233
|
+
currentBest = result;
|
|
1234
|
+
currentEntryPoint = this.nodes[currentBest.id];
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
const candidates = this.searchLayer(normalizedQuery, currentBest, 0, effectiveEf);
|
|
1238
|
+
candidates.sort((a, b) => {
|
|
1239
|
+
const diff = a.distance - b.distance;
|
|
1240
|
+
return diff !== 0 ? diff : a.id - b.id;
|
|
1241
|
+
});
|
|
1242
|
+
if (candidates.length > k)
|
|
1243
|
+
candidates.length = k;
|
|
1244
|
+
return candidates;
|
|
1245
|
+
}
|
|
1246
|
+
searchKNNBatch(queries, k, efSearch) {
|
|
1247
|
+
const numQueries = queries.length;
|
|
1248
|
+
if (this.entryPointId === -1 || this.nodeCount === 0) {
|
|
1249
|
+
const emptyResults = new Array(numQueries);
|
|
1250
|
+
for (let i = 0;i < numQueries; i++) {
|
|
1251
|
+
emptyResults[i] = [];
|
|
1252
|
+
}
|
|
1253
|
+
return emptyResults;
|
|
1254
|
+
}
|
|
1255
|
+
const results = new Array(numQueries);
|
|
1256
|
+
this.clearVisited();
|
|
1257
|
+
for (let i = 0;i < numQueries; i++) {
|
|
1258
|
+
const query = queries[i];
|
|
1259
|
+
if (query.length !== this.dimension) {
|
|
1260
|
+
throw new Error(`Query dimension ${query.length} does not match expected ${this.dimension}`);
|
|
1261
|
+
}
|
|
1262
|
+
results[i] = this.searchKNN(query, k, efSearch);
|
|
1263
|
+
this.clearVisited();
|
|
1264
|
+
}
|
|
1265
|
+
return results;
|
|
1266
|
+
}
|
|
1267
|
+
searchKNNBatchFlat(queries, numQueries, k, efSearch) {
|
|
1268
|
+
if (this.entryPointId === -1 || this.nodeCount === 0) {
|
|
1269
|
+
return {
|
|
1270
|
+
ids: new Uint32Array(numQueries * k),
|
|
1271
|
+
distances: new Float32Array(numQueries * k).fill(Infinity)
|
|
1272
|
+
};
|
|
1273
|
+
}
|
|
1274
|
+
const ids = new Uint32Array(numQueries * k);
|
|
1275
|
+
const distances = new Float32Array(numQueries * k);
|
|
1276
|
+
for (let q = 0;q < numQueries; q++) {
|
|
1277
|
+
const queryStart = q * this.dimension;
|
|
1278
|
+
const query = queries.subarray(queryStart, queryStart + this.dimension);
|
|
1279
|
+
const results = this.searchKNN(query, k, efSearch);
|
|
1280
|
+
const resultStart = q * k;
|
|
1281
|
+
for (let i = 0;i < k; i++) {
|
|
1282
|
+
if (i < results.length) {
|
|
1283
|
+
ids[resultStart + i] = results[i].id;
|
|
1284
|
+
distances[resultStart + i] = results[i].distance;
|
|
1285
|
+
} else {
|
|
1286
|
+
ids[resultStart + i] = 0;
|
|
1287
|
+
distances[resultStart + i] = Infinity;
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
this.clearVisited();
|
|
1291
|
+
}
|
|
1292
|
+
return { ids, distances };
|
|
1293
|
+
}
|
|
1294
|
+
async add(vector) {
|
|
1295
|
+
const id = this.nodeCount;
|
|
1296
|
+
await this.addPoint(id, vector);
|
|
1297
|
+
return id;
|
|
1298
|
+
}
|
|
1299
|
+
query(vector, k = 10) {
|
|
1300
|
+
const floatVector = Array.isArray(vector) ? new Float32Array(vector) : vector;
|
|
1301
|
+
return this.searchKNN(floatVector, k);
|
|
1302
|
+
}
|
|
1303
|
+
async addAll(vectors) {
|
|
1304
|
+
const points = vectors.map((vector, i) => ({
|
|
1305
|
+
id: this.nodeCount + i,
|
|
1306
|
+
vector: vector instanceof Float32Array ? vector : new Float32Array(vector)
|
|
1307
|
+
}));
|
|
1308
|
+
await this.addPointsBulk(points);
|
|
1309
|
+
return points.map((p) => p.id);
|
|
1310
|
+
}
|
|
1311
|
+
async addPointsBulk(points, options) {
|
|
1312
|
+
this.addPointsBulkSync(points, options);
|
|
1313
|
+
}
|
|
1314
|
+
addPointsBulkSync(points, options) {
|
|
1315
|
+
if (points.length === 0)
|
|
1316
|
+
return;
|
|
1317
|
+
this.constructionMode = true;
|
|
1318
|
+
this.neighborSets.clear();
|
|
1319
|
+
try {
|
|
1320
|
+
for (const { id, vector } of points) {
|
|
1321
|
+
this.addPointSync(id, vector, options);
|
|
1322
|
+
}
|
|
1323
|
+
} finally {
|
|
1324
|
+
this.constructionMode = false;
|
|
1325
|
+
this.neighborSets.clear();
|
|
1326
|
+
}
|
|
1327
|
+
}
|
|
1328
|
+
clearConstructionCache() {
|
|
1329
|
+
this.constructionMode = false;
|
|
1330
|
+
this.neighborSets.clear();
|
|
1331
|
+
}
|
|
1332
|
+
static MAGIC = 1213092695;
|
|
1333
|
+
static FORMAT_VERSION = 3;
|
|
1334
|
+
static HEADER_SIZE = 40;
|
|
1335
|
+
getNodesArray() {
|
|
1336
|
+
const result = [];
|
|
1337
|
+
for (let i = 0;i < this.nodeCount; i++) {
|
|
1338
|
+
const node = this.nodes[i];
|
|
1339
|
+
if (node)
|
|
1340
|
+
result.push(node);
|
|
1341
|
+
}
|
|
1342
|
+
return result;
|
|
1343
|
+
}
|
|
1344
|
+
serialize() {
|
|
1345
|
+
const nodeCount = this.nodeCount;
|
|
1346
|
+
const nodesArray = this.getNodesArray();
|
|
1347
|
+
const idToIndex = new Map;
|
|
1348
|
+
for (let i = 0;i < nodesArray.length; i++) {
|
|
1349
|
+
idToIndex.set(nodesArray[i].id, i);
|
|
1350
|
+
}
|
|
1351
|
+
const encodedNeighbors = [];
|
|
1352
|
+
let totalNeighborBytes = 0;
|
|
1353
|
+
for (const node of nodesArray) {
|
|
1354
|
+
const nodeEncodings = [];
|
|
1355
|
+
for (let l = 0;l <= node.level; l++) {
|
|
1356
|
+
const neighborIndices = node.neighbors[l].map((id) => idToIndex.get(id) ?? 0);
|
|
1357
|
+
const encoded = deltaEncodeNeighbors(neighborIndices);
|
|
1358
|
+
nodeEncodings.push(encoded);
|
|
1359
|
+
totalNeighborBytes += encoded.length;
|
|
1360
|
+
}
|
|
1361
|
+
encodedNeighbors.push(nodeEncodings);
|
|
1362
|
+
}
|
|
1363
|
+
let graphSize = HNSWIndex.HEADER_SIZE;
|
|
1364
|
+
graphSize += nodeCount * 8;
|
|
1365
|
+
for (const node of nodesArray) {
|
|
1366
|
+
graphSize += (node.level + 1) * 8;
|
|
1367
|
+
}
|
|
1368
|
+
graphSize += totalNeighborBytes;
|
|
1369
|
+
graphSize += nodeCount * 4;
|
|
1370
|
+
const vectorDataOffset = graphSize;
|
|
1371
|
+
const vectorDataSize = nodeCount * this.dimension * 4;
|
|
1372
|
+
const totalSize = graphSize + vectorDataSize;
|
|
1373
|
+
const buffer = new ArrayBuffer(totalSize);
|
|
1374
|
+
const view = new DataView(buffer);
|
|
1375
|
+
const uint8Array = new Uint8Array(buffer);
|
|
1376
|
+
let offset = 0;
|
|
1377
|
+
view.setUint32(offset, HNSWIndex.MAGIC, true);
|
|
1378
|
+
offset += 4;
|
|
1379
|
+
view.setUint32(offset, HNSWIndex.FORMAT_VERSION, true);
|
|
1380
|
+
offset += 4;
|
|
1381
|
+
view.setUint32(offset, this.dimension, true);
|
|
1382
|
+
offset += 4;
|
|
1383
|
+
const metricCode = this.metric === "cosine" ? 0 : this.metric === "euclidean" ? 1 : 2;
|
|
1384
|
+
view.setUint32(offset, metricCode, true);
|
|
1385
|
+
offset += 4;
|
|
1386
|
+
view.setUint32(offset, this.M, true);
|
|
1387
|
+
offset += 4;
|
|
1388
|
+
view.setUint32(offset, this.efConstruction, true);
|
|
1389
|
+
offset += 4;
|
|
1390
|
+
view.setUint32(offset, this.maxLevel, true);
|
|
1391
|
+
offset += 4;
|
|
1392
|
+
view.setUint32(offset, this.entryPointId, true);
|
|
1393
|
+
offset += 4;
|
|
1394
|
+
view.setUint32(offset, nodeCount, true);
|
|
1395
|
+
offset += 4;
|
|
1396
|
+
view.setUint32(offset, vectorDataOffset, true);
|
|
1397
|
+
offset += 4;
|
|
1398
|
+
for (let i = 0;i < nodesArray.length; i++) {
|
|
1399
|
+
const node = nodesArray[i];
|
|
1400
|
+
view.setUint32(offset, node.id, true);
|
|
1401
|
+
offset += 4;
|
|
1402
|
+
view.setUint32(offset, node.level, true);
|
|
1403
|
+
offset += 4;
|
|
1404
|
+
}
|
|
1405
|
+
for (let i = 0;i < nodesArray.length; i++) {
|
|
1406
|
+
const node = nodesArray[i];
|
|
1407
|
+
const nodeEncodings = encodedNeighbors[i];
|
|
1408
|
+
for (let l = 0;l <= node.level; l++) {
|
|
1409
|
+
view.setUint32(offset, node.neighbors[l].length, true);
|
|
1410
|
+
offset += 4;
|
|
1411
|
+
view.setUint32(offset, nodeEncodings[l].length, true);
|
|
1412
|
+
offset += 4;
|
|
1413
|
+
}
|
|
1414
|
+
}
|
|
1415
|
+
for (let i = 0;i < nodesArray.length; i++) {
|
|
1416
|
+
const nodeEncodings = encodedNeighbors[i];
|
|
1417
|
+
for (const encoded of nodeEncodings) {
|
|
1418
|
+
uint8Array.set(encoded, offset);
|
|
1419
|
+
offset += encoded.length;
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
for (let i = 0;i < nodesArray.length; i++) {
|
|
1423
|
+
view.setUint32(offset, i * this.dimension * 4, true);
|
|
1424
|
+
offset += 4;
|
|
1425
|
+
}
|
|
1426
|
+
for (let i = 0;i < nodesArray.length; i++) {
|
|
1427
|
+
const node = nodesArray[i];
|
|
1428
|
+
for (let j = 0;j < this.dimension; j++) {
|
|
1429
|
+
view.setFloat32(offset, node.vector[j], true);
|
|
1430
|
+
offset += 4;
|
|
1431
|
+
}
|
|
1432
|
+
}
|
|
1433
|
+
return buffer;
|
|
1434
|
+
}
|
|
1435
|
+
static deserialize(buffer, options) {
|
|
1436
|
+
const view = new DataView(buffer);
|
|
1437
|
+
const uint8Array = new Uint8Array(buffer);
|
|
1438
|
+
const lazyLoad = options?.lazyLoadVectors ?? false;
|
|
1439
|
+
let offset = 0;
|
|
1440
|
+
const possibleMagic = view.getUint32(0, true);
|
|
1441
|
+
let formatVersion = 0;
|
|
1442
|
+
if (possibleMagic === HNSWIndex.MAGIC) {
|
|
1443
|
+
offset += 4;
|
|
1444
|
+
formatVersion = view.getUint32(offset, true);
|
|
1445
|
+
offset += 4;
|
|
1446
|
+
if (formatVersion > HNSWIndex.FORMAT_VERSION) {
|
|
1447
|
+
throw new Error(`Unsupported HNSW format version: ${formatVersion}. Maximum supported: ${HNSWIndex.FORMAT_VERSION}`);
|
|
1448
|
+
}
|
|
1449
|
+
} else {
|
|
1450
|
+
formatVersion = 0;
|
|
1451
|
+
offset = 0;
|
|
1452
|
+
}
|
|
1453
|
+
const dimension = view.getUint32(offset, true);
|
|
1454
|
+
offset += 4;
|
|
1455
|
+
const metricCode = view.getUint32(offset, true);
|
|
1456
|
+
const metric = metricCode === 0 ? "cosine" : metricCode === 1 ? "euclidean" : "dot_product";
|
|
1457
|
+
offset += 4;
|
|
1458
|
+
const M = view.getUint32(offset, true);
|
|
1459
|
+
offset += 4;
|
|
1460
|
+
const efConstruction = view.getUint32(offset, true);
|
|
1461
|
+
offset += 4;
|
|
1462
|
+
const maxLevel = view.getInt32(offset, true);
|
|
1463
|
+
offset += 4;
|
|
1464
|
+
const entryPointId = view.getInt32(offset, true);
|
|
1465
|
+
offset += 4;
|
|
1466
|
+
const nodeCount = view.getUint32(offset, true);
|
|
1467
|
+
offset += 4;
|
|
1468
|
+
let vectorDataOffset = 0;
|
|
1469
|
+
if (formatVersion >= 3) {
|
|
1470
|
+
vectorDataOffset = view.getUint32(offset, true);
|
|
1471
|
+
offset += 4;
|
|
1472
|
+
}
|
|
1473
|
+
const index = new HNSWIndex(dimension, metric, M, efConstruction);
|
|
1474
|
+
index.maxLevel = maxLevel;
|
|
1475
|
+
index.entryPointId = entryPointId;
|
|
1476
|
+
const indexToId = new Array(nodeCount);
|
|
1477
|
+
if (formatVersion >= 3) {
|
|
1478
|
+
const nodeMetadata = [];
|
|
1479
|
+
const neighborMetadata = [];
|
|
1480
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1481
|
+
const id = view.getUint32(offset, true);
|
|
1482
|
+
offset += 4;
|
|
1483
|
+
const level = view.getUint32(offset, true);
|
|
1484
|
+
offset += 4;
|
|
1485
|
+
indexToId[i] = id;
|
|
1486
|
+
nodeMetadata.push({ id, level });
|
|
1487
|
+
}
|
|
1488
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1489
|
+
const level = nodeMetadata[i].level;
|
|
1490
|
+
const levelMeta = [];
|
|
1491
|
+
for (let l = 0;l <= level; l++) {
|
|
1492
|
+
const count = view.getUint32(offset, true);
|
|
1493
|
+
offset += 4;
|
|
1494
|
+
const encodedSize = view.getUint32(offset, true);
|
|
1495
|
+
offset += 4;
|
|
1496
|
+
levelMeta.push({ count, encodedSize });
|
|
1497
|
+
}
|
|
1498
|
+
neighborMetadata.push(levelMeta);
|
|
1499
|
+
}
|
|
1500
|
+
const nodeNeighbors = [];
|
|
1501
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1502
|
+
const level = nodeMetadata[i].level;
|
|
1503
|
+
const neighbors = new Array(level + 1);
|
|
1504
|
+
for (let l = 0;l <= level; l++) {
|
|
1505
|
+
const { count, encodedSize } = neighborMetadata[i][l];
|
|
1506
|
+
if (count === 0 || encodedSize === 0) {
|
|
1507
|
+
neighbors[l] = [];
|
|
1508
|
+
} else {
|
|
1509
|
+
const encodedSlice = uint8Array.subarray(offset, offset + encodedSize);
|
|
1510
|
+
const neighborIndices = deltaDecodeNeighbors(encodedSlice, count);
|
|
1511
|
+
neighbors[l] = neighborIndices.map((idx) => idx >= 0 && idx < indexToId.length ? indexToId[idx] : 0);
|
|
1512
|
+
offset += encodedSize;
|
|
1513
|
+
}
|
|
1514
|
+
}
|
|
1515
|
+
nodeNeighbors.push(neighbors);
|
|
1516
|
+
}
|
|
1517
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1518
|
+
const relativeOffset = view.getUint32(offset, true);
|
|
1519
|
+
offset += 4;
|
|
1520
|
+
const id = nodeMetadata[i].id;
|
|
1521
|
+
index.vectorOffsets.set(id, vectorDataOffset + relativeOffset);
|
|
1522
|
+
}
|
|
1523
|
+
if (lazyLoad) {
|
|
1524
|
+
index.lazyLoadEnabled = true;
|
|
1525
|
+
index.vectorBuffer = buffer;
|
|
1526
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1527
|
+
const { id, level } = nodeMetadata[i];
|
|
1528
|
+
const node = {
|
|
1529
|
+
id,
|
|
1530
|
+
level,
|
|
1531
|
+
vector: new Float32Array(dimension),
|
|
1532
|
+
neighbors: nodeNeighbors[i]
|
|
1533
|
+
};
|
|
1534
|
+
index.setNode(node);
|
|
1535
|
+
}
|
|
1536
|
+
} else {
|
|
1537
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1538
|
+
const { id, level } = nodeMetadata[i];
|
|
1539
|
+
const vectorOffset = index.vectorOffsets.get(id);
|
|
1540
|
+
const vector = new Float32Array(dimension);
|
|
1541
|
+
for (let j = 0;j < dimension; j++) {
|
|
1542
|
+
vector[j] = view.getFloat32(vectorOffset + j * 4, true);
|
|
1543
|
+
}
|
|
1544
|
+
const node = { id, level, vector, neighbors: nodeNeighbors[i] };
|
|
1545
|
+
index.setNode(node);
|
|
1546
|
+
index.vectorsLoaded.add(id);
|
|
1547
|
+
}
|
|
1548
|
+
}
|
|
1549
|
+
} else if (formatVersion >= 2) {
|
|
1550
|
+
const nodeMetadata = [];
|
|
1551
|
+
const neighborMetadata = [];
|
|
1552
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1553
|
+
const id = view.getUint32(offset, true);
|
|
1554
|
+
offset += 4;
|
|
1555
|
+
const level = view.getUint32(offset, true);
|
|
1556
|
+
offset += 4;
|
|
1557
|
+
indexToId[i] = id;
|
|
1558
|
+
const vector = new Float32Array(dimension);
|
|
1559
|
+
for (let j = 0;j < dimension; j++) {
|
|
1560
|
+
vector[j] = view.getFloat32(offset, true);
|
|
1561
|
+
offset += 4;
|
|
1562
|
+
}
|
|
1563
|
+
nodeMetadata.push({ id, level, vector });
|
|
1564
|
+
}
|
|
1565
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1566
|
+
const level = nodeMetadata[i].level;
|
|
1567
|
+
const levelMeta = [];
|
|
1568
|
+
for (let l = 0;l <= level; l++) {
|
|
1569
|
+
const count = view.getUint32(offset, true);
|
|
1570
|
+
offset += 4;
|
|
1571
|
+
const encodedSize = view.getUint32(offset, true);
|
|
1572
|
+
offset += 4;
|
|
1573
|
+
levelMeta.push({ count, encodedSize });
|
|
1574
|
+
}
|
|
1575
|
+
neighborMetadata.push(levelMeta);
|
|
1576
|
+
}
|
|
1577
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1578
|
+
const { id, level, vector } = nodeMetadata[i];
|
|
1579
|
+
const neighbors = new Array(level + 1);
|
|
1580
|
+
for (let l = 0;l <= level; l++) {
|
|
1581
|
+
const { count, encodedSize } = neighborMetadata[i][l];
|
|
1582
|
+
if (count === 0 || encodedSize === 0) {
|
|
1583
|
+
neighbors[l] = [];
|
|
1584
|
+
} else {
|
|
1585
|
+
const encodedSlice = uint8Array.subarray(offset, offset + encodedSize);
|
|
1586
|
+
const neighborIndices = deltaDecodeNeighbors(encodedSlice, count);
|
|
1587
|
+
neighbors[l] = neighborIndices.map((idx) => idx >= 0 && idx < indexToId.length ? indexToId[idx] : 0);
|
|
1588
|
+
offset += encodedSize;
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
const node = { id, level, vector, neighbors };
|
|
1592
|
+
index.setNode(node);
|
|
1593
|
+
}
|
|
1594
|
+
} else {
|
|
1595
|
+
for (let i = 0;i < nodeCount; i++) {
|
|
1596
|
+
const id = view.getUint32(offset, true);
|
|
1597
|
+
offset += 4;
|
|
1598
|
+
const level = view.getUint32(offset, true);
|
|
1599
|
+
offset += 4;
|
|
1600
|
+
indexToId[i] = id;
|
|
1601
|
+
const vector = new Float32Array(dimension);
|
|
1602
|
+
for (let j = 0;j < dimension; j++) {
|
|
1603
|
+
vector[j] = view.getFloat32(offset, true);
|
|
1604
|
+
offset += 4;
|
|
1605
|
+
}
|
|
1606
|
+
const neighbors = new Array(level + 1);
|
|
1607
|
+
for (let l = 0;l <= level; l++) {
|
|
1608
|
+
const neighborCount = view.getUint32(offset, true);
|
|
1609
|
+
offset += 4;
|
|
1610
|
+
neighbors[l] = new Array(neighborCount);
|
|
1611
|
+
}
|
|
1612
|
+
const node = { id, level, vector, neighbors };
|
|
1613
|
+
index.setNode(node);
|
|
1614
|
+
}
|
|
1615
|
+
for (const node of index.nodes.values()) {
|
|
1616
|
+
if (!node)
|
|
1617
|
+
continue;
|
|
1618
|
+
for (let l = 0;l <= node.level; l++) {
|
|
1619
|
+
for (let j = 0;j < node.neighbors[l].length; j++) {
|
|
1620
|
+
const neighborIndex = view.getInt32(offset, true);
|
|
1621
|
+
offset += 4;
|
|
1622
|
+
if (neighborIndex >= 0 && neighborIndex < indexToId.length) {
|
|
1623
|
+
node.neighbors[l][j] = indexToId[neighborIndex];
|
|
1624
|
+
}
|
|
1625
|
+
}
|
|
1626
|
+
}
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
return index;
|
|
1630
|
+
}
|
|
1631
|
+
loadVector(nodeId) {
|
|
1632
|
+
const node = this.nodes[nodeId];
|
|
1633
|
+
if (!node)
|
|
1634
|
+
return null;
|
|
1635
|
+
if (!this.lazyLoadEnabled || this.vectorsLoaded.has(nodeId)) {
|
|
1636
|
+
return node.vector;
|
|
1637
|
+
}
|
|
1638
|
+
if (!this.vectorBuffer)
|
|
1639
|
+
return null;
|
|
1640
|
+
const vectorOffset = this.vectorOffsets.get(nodeId);
|
|
1641
|
+
if (vectorOffset === undefined)
|
|
1642
|
+
return null;
|
|
1643
|
+
const view = new DataView(this.vectorBuffer);
|
|
1644
|
+
const vector = new Float32Array(this.dimension);
|
|
1645
|
+
for (let j = 0;j < this.dimension; j++) {
|
|
1646
|
+
vector[j] = view.getFloat32(vectorOffset + j * 4, true);
|
|
1647
|
+
}
|
|
1648
|
+
node.vector = vector;
|
|
1649
|
+
this.vectorsLoaded.add(nodeId);
|
|
1650
|
+
this.setFlatVector(nodeId, vector);
|
|
1651
|
+
return vector;
|
|
1652
|
+
}
|
|
1653
|
+
preloadVectors(nodeIds) {
|
|
1654
|
+
if (!this.lazyLoadEnabled)
|
|
1655
|
+
return;
|
|
1656
|
+
for (const nodeId of nodeIds) {
|
|
1657
|
+
this.loadVector(nodeId);
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
isLazyLoadEnabled() {
|
|
1661
|
+
return this.lazyLoadEnabled;
|
|
1662
|
+
}
|
|
1663
|
+
getLazyLoadStats() {
|
|
1664
|
+
const totalNodes = this.nodeCount;
|
|
1665
|
+
const loadedVectors = this.vectorsLoaded.size;
|
|
1666
|
+
if (!this.lazyLoadEnabled) {
|
|
1667
|
+
return {
|
|
1668
|
+
enabled: false,
|
|
1669
|
+
totalNodes,
|
|
1670
|
+
loadedVectors: totalNodes,
|
|
1671
|
+
memoryReduction: "0%"
|
|
1672
|
+
};
|
|
1673
|
+
}
|
|
1674
|
+
const reduction = totalNodes > 0 ? ((1 - loadedVectors / totalNodes) * 100).toFixed(1) : "0";
|
|
1675
|
+
return {
|
|
1676
|
+
enabled: true,
|
|
1677
|
+
totalNodes,
|
|
1678
|
+
loadedVectors,
|
|
1679
|
+
memoryReduction: `${reduction}%`
|
|
1680
|
+
};
|
|
1681
|
+
}
|
|
1682
|
+
async saveToFile(filePath) {
|
|
1683
|
+
const buffer = this.serialize();
|
|
1684
|
+
await Bun.write(filePath, buffer);
|
|
1685
|
+
}
|
|
1686
|
+
static async loadFromFile(filePath) {
|
|
1687
|
+
const file = Bun.file(filePath);
|
|
1688
|
+
const buffer = await file.arrayBuffer();
|
|
1689
|
+
return HNSWIndex.deserialize(buffer);
|
|
1690
|
+
}
|
|
1691
|
+
destroy() {
|
|
1692
|
+
this.nodes = [];
|
|
1693
|
+
this.nodeCount = 0;
|
|
1694
|
+
this.flatVectors = new Float32Array(0);
|
|
1695
|
+
this.flatVectorsCapacity = 0;
|
|
1696
|
+
}
|
|
1697
|
+
getMemoryUsage() {
|
|
1698
|
+
let totalBytes = 0;
|
|
1699
|
+
for (let i = 0;i < this.nodeCount; i++) {
|
|
1700
|
+
const node = this.nodes[i];
|
|
1701
|
+
if (!node)
|
|
1702
|
+
continue;
|
|
1703
|
+
totalBytes += 8;
|
|
1704
|
+
totalBytes += node.vector.length * 4;
|
|
1705
|
+
totalBytes += 24;
|
|
1706
|
+
for (const neighborList of node.neighbors) {
|
|
1707
|
+
totalBytes += neighborList.length * 4;
|
|
1708
|
+
totalBytes += 16;
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
totalBytes += this.flatVectors.byteLength;
|
|
1712
|
+
totalBytes += this.nodeCount * 8;
|
|
1713
|
+
totalBytes += 1024;
|
|
1714
|
+
return totalBytes;
|
|
1715
|
+
}
|
|
1716
|
+
getAllVectors() {
|
|
1717
|
+
const result = new Map;
|
|
1718
|
+
for (let i = 0;i < this.nodeCount; i++) {
|
|
1719
|
+
const node = this.nodes[i];
|
|
1720
|
+
if (node)
|
|
1721
|
+
result.set(node.id, node.vector);
|
|
1722
|
+
}
|
|
1723
|
+
return result;
|
|
1724
|
+
}
|
|
1725
|
+
enableQuantization() {
|
|
1726
|
+
if (this.nodeCount === 0) {
|
|
1727
|
+
throw new Error("Cannot enable quantization on empty index. Add vectors first.");
|
|
1728
|
+
}
|
|
1729
|
+
const vectors = [];
|
|
1730
|
+
for (let i = 0;i < this.nodeCount; i++) {
|
|
1731
|
+
const node = this.nodes[i];
|
|
1732
|
+
if (node)
|
|
1733
|
+
vectors.push(node.vector);
|
|
1734
|
+
}
|
|
1735
|
+
this.scalarQuantizer = new ScalarQuantizer(this.dimension);
|
|
1736
|
+
this.scalarQuantizer.train(vectors);
|
|
1737
|
+
this.int8Vectors = new Array(this.nodeCount);
|
|
1738
|
+
for (let i = 0;i < this.nodeCount; i++) {
|
|
1739
|
+
const node = this.nodes[i];
|
|
1740
|
+
if (node) {
|
|
1741
|
+
this.int8Vectors[node.id] = this.scalarQuantizer.quantize(node.vector);
|
|
1742
|
+
}
|
|
1743
|
+
}
|
|
1744
|
+
this.quantizationEnabled = true;
|
|
1745
|
+
}
|
|
1746
|
+
isQuantizationEnabled() {
|
|
1747
|
+
return this.quantizationEnabled;
|
|
1748
|
+
}
|
|
1749
|
+
searchKNNQuantized(query, k, candidateMultiplier = 3, efSearch) {
|
|
1750
|
+
if (!this.quantizationEnabled) {
|
|
1751
|
+
return this.searchKNN(query, k, efSearch);
|
|
1752
|
+
}
|
|
1753
|
+
if (this.entryPointId === -1 || this.nodeCount === 0) {
|
|
1754
|
+
return [];
|
|
1755
|
+
}
|
|
1756
|
+
let normalizedQuery = query;
|
|
1757
|
+
if (this.vectorsAreNormalized) {
|
|
1758
|
+
this.queryNormBuffer.set(query);
|
|
1759
|
+
normalizedQuery = this.normalizeVector(this.queryNormBuffer);
|
|
1760
|
+
}
|
|
1761
|
+
const numCandidates = k * candidateMultiplier;
|
|
1762
|
+
const effectiveEf = efSearch || Math.max(numCandidates * 2, 50);
|
|
1763
|
+
let currentEntryPoint = this.nodes[this.entryPointId];
|
|
1764
|
+
const entryVector = this.getNodeVector(this.entryPointId);
|
|
1765
|
+
if (!entryVector)
|
|
1766
|
+
return [];
|
|
1767
|
+
let currentBest = { id: currentEntryPoint.id, distance: this.calculateDistance(normalizedQuery, entryVector) };
|
|
1768
|
+
for (let l = this.maxLevel;l > 0; l--) {
|
|
1769
|
+
const result = this.greedySearch(normalizedQuery, currentEntryPoint, l);
|
|
1770
|
+
if (result.distance < currentBest.distance) {
|
|
1771
|
+
currentBest = result;
|
|
1772
|
+
currentEntryPoint = this.nodes[currentBest.id];
|
|
1773
|
+
}
|
|
1774
|
+
}
|
|
1775
|
+
const candidates = this.searchLayerQuantized(normalizedQuery, currentBest, 0, effectiveEf);
|
|
1776
|
+
const rescoreCount = Math.min(candidates.length, numCandidates);
|
|
1777
|
+
const rescored = new Array(rescoreCount);
|
|
1778
|
+
for (let i = 0;i < rescoreCount; i++) {
|
|
1779
|
+
const c = candidates[i];
|
|
1780
|
+
const nodeVector = this.getNodeVector(c.id);
|
|
1781
|
+
if (nodeVector) {
|
|
1782
|
+
rescored[i] = { id: c.id, distance: this.calculateDistance(normalizedQuery, nodeVector) };
|
|
1783
|
+
} else {
|
|
1784
|
+
rescored[i] = c;
|
|
1785
|
+
}
|
|
1786
|
+
}
|
|
1787
|
+
rescored.sort((a, b) => a.distance - b.distance);
|
|
1788
|
+
if (rescored.length > k)
|
|
1789
|
+
rescored.length = k;
|
|
1790
|
+
return rescored;
|
|
1791
|
+
}
|
|
1792
|
+
searchLayerQuantized(query, nearest, layer, ef) {
|
|
1793
|
+
const int8Query = this.scalarQuantizer ? this.scalarQuantizer.quantize(query) : null;
|
|
1794
|
+
this.clearVisited();
|
|
1795
|
+
this.ensureHeapCapacity(ef);
|
|
1796
|
+
this.candidatesHeap.clear();
|
|
1797
|
+
this.resultsHeap.clear();
|
|
1798
|
+
this.markVisited(nearest.id);
|
|
1799
|
+
this.candidatesHeap.push(nearest.id, nearest.distance);
|
|
1800
|
+
this.resultsHeap.push(nearest.id, nearest.distance);
|
|
1801
|
+
let furthestResultDist = nearest.distance;
|
|
1802
|
+
while (!this.candidatesHeap.isEmpty()) {
|
|
1803
|
+
const closestCandidateDist = this.candidatesHeap.peekValue();
|
|
1804
|
+
const closestCandidateId = this.candidatesHeap.pop();
|
|
1805
|
+
if (closestCandidateId === -1)
|
|
1806
|
+
continue;
|
|
1807
|
+
if (this.resultsHeap.size() >= ef && closestCandidateDist > furthestResultDist) {
|
|
1808
|
+
break;
|
|
1809
|
+
}
|
|
1810
|
+
const node = this.nodes[closestCandidateId];
|
|
1811
|
+
if (!node)
|
|
1812
|
+
continue;
|
|
1813
|
+
const neighbors = node.neighbors[layer] || [];
|
|
1814
|
+
for (const neighborId of neighbors) {
|
|
1815
|
+
if (this.isVisited(neighborId))
|
|
1816
|
+
continue;
|
|
1817
|
+
this.markVisited(neighborId);
|
|
1818
|
+
let distance;
|
|
1819
|
+
if (int8Query) {
|
|
1820
|
+
const neighborInt8 = this.int8Vectors[neighborId];
|
|
1821
|
+
if (neighborInt8) {
|
|
1822
|
+
if (this.metric === "cosine") {
|
|
1823
|
+
distance = cosineDistanceInt8(int8Query, neighborInt8);
|
|
1824
|
+
} else {
|
|
1825
|
+
distance = l2SquaredInt8(int8Query, neighborInt8);
|
|
1826
|
+
}
|
|
1827
|
+
} else {
|
|
1828
|
+
const neighborNode = this.nodes[neighborId];
|
|
1829
|
+
if (!neighborNode)
|
|
1830
|
+
continue;
|
|
1831
|
+
distance = this.calculateDistance(query, neighborNode.vector);
|
|
1832
|
+
}
|
|
1833
|
+
} else {
|
|
1834
|
+
const neighborNode = this.nodes[neighborId];
|
|
1835
|
+
if (!neighborNode)
|
|
1836
|
+
continue;
|
|
1837
|
+
distance = this.calculateDistance(query, neighborNode.vector);
|
|
1838
|
+
}
|
|
1839
|
+
if (this.resultsHeap.size() < ef || distance < furthestResultDist) {
|
|
1840
|
+
this.candidatesHeap.push(neighborId, distance);
|
|
1841
|
+
this.resultsHeap.push(neighborId, distance);
|
|
1842
|
+
if (this.resultsHeap.size() > ef) {
|
|
1843
|
+
this.resultsHeap.pop();
|
|
1844
|
+
}
|
|
1845
|
+
furthestResultDist = this.resultsHeap.peekValue();
|
|
1846
|
+
}
|
|
1847
|
+
}
|
|
1848
|
+
}
|
|
1849
|
+
const resultCount = this.resultsHeap.size();
|
|
1850
|
+
const results = new Array(resultCount);
|
|
1851
|
+
let idx = resultCount - 1;
|
|
1852
|
+
while (!this.resultsHeap.isEmpty()) {
|
|
1853
|
+
const dist = this.resultsHeap.peekValue();
|
|
1854
|
+
const id = this.resultsHeap.pop();
|
|
1855
|
+
results[idx--] = { id, distance: dist };
|
|
1856
|
+
}
|
|
1857
|
+
return results;
|
|
1858
|
+
}
|
|
1859
|
+
getQuantizationStats() {
|
|
1860
|
+
const vectorCount = this.nodeCount;
|
|
1861
|
+
const float32Size = vectorCount * this.dimension * 4;
|
|
1862
|
+
if (this.quantizationEnabled) {
|
|
1863
|
+
const int8Size = vectorCount * this.dimension;
|
|
1864
|
+
const reduction = (float32Size / int8Size).toFixed(1);
|
|
1865
|
+
return {
|
|
1866
|
+
enabled: true,
|
|
1867
|
+
vectorCount,
|
|
1868
|
+
memoryReduction: `${reduction}x (${(float32Size / 1024 / 1024).toFixed(1)}MB → ${(int8Size / 1024 / 1024).toFixed(1)}MB)`,
|
|
1869
|
+
expectedSpeedup: "3-4x for distance calculations"
|
|
1870
|
+
};
|
|
1871
|
+
}
|
|
1872
|
+
return {
|
|
1873
|
+
enabled: false,
|
|
1874
|
+
vectorCount,
|
|
1875
|
+
memoryReduction: "1x (no quantization)",
|
|
1876
|
+
expectedSpeedup: "1x (baseline)"
|
|
1877
|
+
};
|
|
1878
|
+
}
|
|
1879
|
+
}
|
|
1880
|
+
|
|
1881
|
+
// src/Collection.ts
|
|
1882
|
+
var {mkdir} = (() => ({}));
|
|
1883
|
+
|
|
1884
|
+
// node:path
|
|
1885
|
+
function assertPath(path) {
|
|
1886
|
+
if (typeof path !== "string")
|
|
1887
|
+
throw TypeError("Path must be a string. Received " + JSON.stringify(path));
|
|
1888
|
+
}
|
|
1889
|
+
function normalizeStringPosix(path, allowAboveRoot) {
|
|
1890
|
+
var res = "", lastSegmentLength = 0, lastSlash = -1, dots = 0, code;
|
|
1891
|
+
for (var i = 0;i <= path.length; ++i) {
|
|
1892
|
+
if (i < path.length)
|
|
1893
|
+
code = path.charCodeAt(i);
|
|
1894
|
+
else if (code === 47)
|
|
1895
|
+
break;
|
|
1896
|
+
else
|
|
1897
|
+
code = 47;
|
|
1898
|
+
if (code === 47) {
|
|
1899
|
+
if (lastSlash === i - 1 || dots === 1)
|
|
1900
|
+
;
|
|
1901
|
+
else if (lastSlash !== i - 1 && dots === 2) {
|
|
1902
|
+
if (res.length < 2 || lastSegmentLength !== 2 || res.charCodeAt(res.length - 1) !== 46 || res.charCodeAt(res.length - 2) !== 46) {
|
|
1903
|
+
if (res.length > 2) {
|
|
1904
|
+
var lastSlashIndex = res.lastIndexOf("/");
|
|
1905
|
+
if (lastSlashIndex !== res.length - 1) {
|
|
1906
|
+
if (lastSlashIndex === -1)
|
|
1907
|
+
res = "", lastSegmentLength = 0;
|
|
1908
|
+
else
|
|
1909
|
+
res = res.slice(0, lastSlashIndex), lastSegmentLength = res.length - 1 - res.lastIndexOf("/");
|
|
1910
|
+
lastSlash = i, dots = 0;
|
|
1911
|
+
continue;
|
|
1912
|
+
}
|
|
1913
|
+
} else if (res.length === 2 || res.length === 1) {
|
|
1914
|
+
res = "", lastSegmentLength = 0, lastSlash = i, dots = 0;
|
|
1915
|
+
continue;
|
|
1916
|
+
}
|
|
1917
|
+
}
|
|
1918
|
+
if (allowAboveRoot) {
|
|
1919
|
+
if (res.length > 0)
|
|
1920
|
+
res += "/..";
|
|
1921
|
+
else
|
|
1922
|
+
res = "..";
|
|
1923
|
+
lastSegmentLength = 2;
|
|
1924
|
+
}
|
|
1925
|
+
} else {
|
|
1926
|
+
if (res.length > 0)
|
|
1927
|
+
res += "/" + path.slice(lastSlash + 1, i);
|
|
1928
|
+
else
|
|
1929
|
+
res = path.slice(lastSlash + 1, i);
|
|
1930
|
+
lastSegmentLength = i - lastSlash - 1;
|
|
1931
|
+
}
|
|
1932
|
+
lastSlash = i, dots = 0;
|
|
1933
|
+
} else if (code === 46 && dots !== -1)
|
|
1934
|
+
++dots;
|
|
1935
|
+
else
|
|
1936
|
+
dots = -1;
|
|
1937
|
+
}
|
|
1938
|
+
return res;
|
|
1939
|
+
}
|
|
1940
|
+
function _format(sep, pathObject) {
|
|
1941
|
+
var dir = pathObject.dir || pathObject.root, base = pathObject.base || (pathObject.name || "") + (pathObject.ext || "");
|
|
1942
|
+
if (!dir)
|
|
1943
|
+
return base;
|
|
1944
|
+
if (dir === pathObject.root)
|
|
1945
|
+
return dir + base;
|
|
1946
|
+
return dir + sep + base;
|
|
1947
|
+
}
|
|
1948
|
+
function resolve() {
|
|
1949
|
+
var resolvedPath = "", resolvedAbsolute = false, cwd;
|
|
1950
|
+
for (var i = arguments.length - 1;i >= -1 && !resolvedAbsolute; i--) {
|
|
1951
|
+
var path;
|
|
1952
|
+
if (i >= 0)
|
|
1953
|
+
path = arguments[i];
|
|
1954
|
+
else {
|
|
1955
|
+
if (cwd === undefined)
|
|
1956
|
+
cwd = process.cwd();
|
|
1957
|
+
path = cwd;
|
|
1958
|
+
}
|
|
1959
|
+
if (assertPath(path), path.length === 0)
|
|
1960
|
+
continue;
|
|
1961
|
+
resolvedPath = path + "/" + resolvedPath, resolvedAbsolute = path.charCodeAt(0) === 47;
|
|
1962
|
+
}
|
|
1963
|
+
if (resolvedPath = normalizeStringPosix(resolvedPath, !resolvedAbsolute), resolvedAbsolute)
|
|
1964
|
+
if (resolvedPath.length > 0)
|
|
1965
|
+
return "/" + resolvedPath;
|
|
1966
|
+
else
|
|
1967
|
+
return "/";
|
|
1968
|
+
else if (resolvedPath.length > 0)
|
|
1969
|
+
return resolvedPath;
|
|
1970
|
+
else
|
|
1971
|
+
return ".";
|
|
1972
|
+
}
|
|
1973
|
+
function normalize(path) {
|
|
1974
|
+
if (assertPath(path), path.length === 0)
|
|
1975
|
+
return ".";
|
|
1976
|
+
var isAbsolute = path.charCodeAt(0) === 47, trailingSeparator = path.charCodeAt(path.length - 1) === 47;
|
|
1977
|
+
if (path = normalizeStringPosix(path, !isAbsolute), path.length === 0 && !isAbsolute)
|
|
1978
|
+
path = ".";
|
|
1979
|
+
if (path.length > 0 && trailingSeparator)
|
|
1980
|
+
path += "/";
|
|
1981
|
+
if (isAbsolute)
|
|
1982
|
+
return "/" + path;
|
|
1983
|
+
return path;
|
|
1984
|
+
}
|
|
1985
|
+
function isAbsolute(path) {
|
|
1986
|
+
return assertPath(path), path.length > 0 && path.charCodeAt(0) === 47;
|
|
1987
|
+
}
|
|
1988
|
+
function join() {
|
|
1989
|
+
if (arguments.length === 0)
|
|
1990
|
+
return ".";
|
|
1991
|
+
var joined;
|
|
1992
|
+
for (var i = 0;i < arguments.length; ++i) {
|
|
1993
|
+
var arg = arguments[i];
|
|
1994
|
+
if (assertPath(arg), arg.length > 0)
|
|
1995
|
+
if (joined === undefined)
|
|
1996
|
+
joined = arg;
|
|
1997
|
+
else
|
|
1998
|
+
joined += "/" + arg;
|
|
1999
|
+
}
|
|
2000
|
+
if (joined === undefined)
|
|
2001
|
+
return ".";
|
|
2002
|
+
return normalize(joined);
|
|
2003
|
+
}
|
|
2004
|
+
function relative(from, to) {
|
|
2005
|
+
if (assertPath(from), assertPath(to), from === to)
|
|
2006
|
+
return "";
|
|
2007
|
+
if (from = resolve(from), to = resolve(to), from === to)
|
|
2008
|
+
return "";
|
|
2009
|
+
var fromStart = 1;
|
|
2010
|
+
for (;fromStart < from.length; ++fromStart)
|
|
2011
|
+
if (from.charCodeAt(fromStart) !== 47)
|
|
2012
|
+
break;
|
|
2013
|
+
var fromEnd = from.length, fromLen = fromEnd - fromStart, toStart = 1;
|
|
2014
|
+
for (;toStart < to.length; ++toStart)
|
|
2015
|
+
if (to.charCodeAt(toStart) !== 47)
|
|
2016
|
+
break;
|
|
2017
|
+
var toEnd = to.length, toLen = toEnd - toStart, length = fromLen < toLen ? fromLen : toLen, lastCommonSep = -1, i = 0;
|
|
2018
|
+
for (;i <= length; ++i) {
|
|
2019
|
+
if (i === length) {
|
|
2020
|
+
if (toLen > length) {
|
|
2021
|
+
if (to.charCodeAt(toStart + i) === 47)
|
|
2022
|
+
return to.slice(toStart + i + 1);
|
|
2023
|
+
else if (i === 0)
|
|
2024
|
+
return to.slice(toStart + i);
|
|
2025
|
+
} else if (fromLen > length) {
|
|
2026
|
+
if (from.charCodeAt(fromStart + i) === 47)
|
|
2027
|
+
lastCommonSep = i;
|
|
2028
|
+
else if (i === 0)
|
|
2029
|
+
lastCommonSep = 0;
|
|
2030
|
+
}
|
|
2031
|
+
break;
|
|
2032
|
+
}
|
|
2033
|
+
var fromCode = from.charCodeAt(fromStart + i), toCode = to.charCodeAt(toStart + i);
|
|
2034
|
+
if (fromCode !== toCode)
|
|
2035
|
+
break;
|
|
2036
|
+
else if (fromCode === 47)
|
|
2037
|
+
lastCommonSep = i;
|
|
2038
|
+
}
|
|
2039
|
+
var out = "";
|
|
2040
|
+
for (i = fromStart + lastCommonSep + 1;i <= fromEnd; ++i)
|
|
2041
|
+
if (i === fromEnd || from.charCodeAt(i) === 47)
|
|
2042
|
+
if (out.length === 0)
|
|
2043
|
+
out += "..";
|
|
2044
|
+
else
|
|
2045
|
+
out += "/..";
|
|
2046
|
+
if (out.length > 0)
|
|
2047
|
+
return out + to.slice(toStart + lastCommonSep);
|
|
2048
|
+
else {
|
|
2049
|
+
if (toStart += lastCommonSep, to.charCodeAt(toStart) === 47)
|
|
2050
|
+
++toStart;
|
|
2051
|
+
return to.slice(toStart);
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
2054
|
+
function _makeLong(path) {
|
|
2055
|
+
return path;
|
|
2056
|
+
}
|
|
2057
|
+
function dirname(path) {
|
|
2058
|
+
if (assertPath(path), path.length === 0)
|
|
2059
|
+
return ".";
|
|
2060
|
+
var code = path.charCodeAt(0), hasRoot = code === 47, end = -1, matchedSlash = true;
|
|
2061
|
+
for (var i = path.length - 1;i >= 1; --i)
|
|
2062
|
+
if (code = path.charCodeAt(i), code === 47) {
|
|
2063
|
+
if (!matchedSlash) {
|
|
2064
|
+
end = i;
|
|
2065
|
+
break;
|
|
2066
|
+
}
|
|
2067
|
+
} else
|
|
2068
|
+
matchedSlash = false;
|
|
2069
|
+
if (end === -1)
|
|
2070
|
+
return hasRoot ? "/" : ".";
|
|
2071
|
+
if (hasRoot && end === 1)
|
|
2072
|
+
return "//";
|
|
2073
|
+
return path.slice(0, end);
|
|
2074
|
+
}
|
|
2075
|
+
function basename(path, ext) {
|
|
2076
|
+
if (ext !== undefined && typeof ext !== "string")
|
|
2077
|
+
throw TypeError('"ext" argument must be a string');
|
|
2078
|
+
assertPath(path);
|
|
2079
|
+
var start = 0, end = -1, matchedSlash = true, i;
|
|
2080
|
+
if (ext !== undefined && ext.length > 0 && ext.length <= path.length) {
|
|
2081
|
+
if (ext.length === path.length && ext === path)
|
|
2082
|
+
return "";
|
|
2083
|
+
var extIdx = ext.length - 1, firstNonSlashEnd = -1;
|
|
2084
|
+
for (i = path.length - 1;i >= 0; --i) {
|
|
2085
|
+
var code = path.charCodeAt(i);
|
|
2086
|
+
if (code === 47) {
|
|
2087
|
+
if (!matchedSlash) {
|
|
2088
|
+
start = i + 1;
|
|
2089
|
+
break;
|
|
2090
|
+
}
|
|
2091
|
+
} else {
|
|
2092
|
+
if (firstNonSlashEnd === -1)
|
|
2093
|
+
matchedSlash = false, firstNonSlashEnd = i + 1;
|
|
2094
|
+
if (extIdx >= 0)
|
|
2095
|
+
if (code === ext.charCodeAt(extIdx)) {
|
|
2096
|
+
if (--extIdx === -1)
|
|
2097
|
+
end = i;
|
|
2098
|
+
} else
|
|
2099
|
+
extIdx = -1, end = firstNonSlashEnd;
|
|
2100
|
+
}
|
|
2101
|
+
}
|
|
2102
|
+
if (start === end)
|
|
2103
|
+
end = firstNonSlashEnd;
|
|
2104
|
+
else if (end === -1)
|
|
2105
|
+
end = path.length;
|
|
2106
|
+
return path.slice(start, end);
|
|
2107
|
+
} else {
|
|
2108
|
+
for (i = path.length - 1;i >= 0; --i)
|
|
2109
|
+
if (path.charCodeAt(i) === 47) {
|
|
2110
|
+
if (!matchedSlash) {
|
|
2111
|
+
start = i + 1;
|
|
2112
|
+
break;
|
|
2113
|
+
}
|
|
2114
|
+
} else if (end === -1)
|
|
2115
|
+
matchedSlash = false, end = i + 1;
|
|
2116
|
+
if (end === -1)
|
|
2117
|
+
return "";
|
|
2118
|
+
return path.slice(start, end);
|
|
2119
|
+
}
|
|
2120
|
+
}
|
|
2121
|
+
function extname(path) {
|
|
2122
|
+
assertPath(path);
|
|
2123
|
+
var startDot = -1, startPart = 0, end = -1, matchedSlash = true, preDotState = 0;
|
|
2124
|
+
for (var i = path.length - 1;i >= 0; --i) {
|
|
2125
|
+
var code = path.charCodeAt(i);
|
|
2126
|
+
if (code === 47) {
|
|
2127
|
+
if (!matchedSlash) {
|
|
2128
|
+
startPart = i + 1;
|
|
2129
|
+
break;
|
|
2130
|
+
}
|
|
2131
|
+
continue;
|
|
2132
|
+
}
|
|
2133
|
+
if (end === -1)
|
|
2134
|
+
matchedSlash = false, end = i + 1;
|
|
2135
|
+
if (code === 46) {
|
|
2136
|
+
if (startDot === -1)
|
|
2137
|
+
startDot = i;
|
|
2138
|
+
else if (preDotState !== 1)
|
|
2139
|
+
preDotState = 1;
|
|
2140
|
+
} else if (startDot !== -1)
|
|
2141
|
+
preDotState = -1;
|
|
2142
|
+
}
|
|
2143
|
+
if (startDot === -1 || end === -1 || preDotState === 0 || preDotState === 1 && startDot === end - 1 && startDot === startPart + 1)
|
|
2144
|
+
return "";
|
|
2145
|
+
return path.slice(startDot, end);
|
|
2146
|
+
}
|
|
2147
|
+
function format(pathObject) {
|
|
2148
|
+
if (pathObject === null || typeof pathObject !== "object")
|
|
2149
|
+
throw TypeError('The "pathObject" argument must be of type Object. Received type ' + typeof pathObject);
|
|
2150
|
+
return _format("/", pathObject);
|
|
2151
|
+
}
|
|
2152
|
+
function parse(path) {
|
|
2153
|
+
assertPath(path);
|
|
2154
|
+
var ret = { root: "", dir: "", base: "", ext: "", name: "" };
|
|
2155
|
+
if (path.length === 0)
|
|
2156
|
+
return ret;
|
|
2157
|
+
var code = path.charCodeAt(0), isAbsolute2 = code === 47, start;
|
|
2158
|
+
if (isAbsolute2)
|
|
2159
|
+
ret.root = "/", start = 1;
|
|
2160
|
+
else
|
|
2161
|
+
start = 0;
|
|
2162
|
+
var startDot = -1, startPart = 0, end = -1, matchedSlash = true, i = path.length - 1, preDotState = 0;
|
|
2163
|
+
for (;i >= start; --i) {
|
|
2164
|
+
if (code = path.charCodeAt(i), code === 47) {
|
|
2165
|
+
if (!matchedSlash) {
|
|
2166
|
+
startPart = i + 1;
|
|
2167
|
+
break;
|
|
2168
|
+
}
|
|
2169
|
+
continue;
|
|
2170
|
+
}
|
|
2171
|
+
if (end === -1)
|
|
2172
|
+
matchedSlash = false, end = i + 1;
|
|
2173
|
+
if (code === 46) {
|
|
2174
|
+
if (startDot === -1)
|
|
2175
|
+
startDot = i;
|
|
2176
|
+
else if (preDotState !== 1)
|
|
2177
|
+
preDotState = 1;
|
|
2178
|
+
} else if (startDot !== -1)
|
|
2179
|
+
preDotState = -1;
|
|
2180
|
+
}
|
|
2181
|
+
if (startDot === -1 || end === -1 || preDotState === 0 || preDotState === 1 && startDot === end - 1 && startDot === startPart + 1) {
|
|
2182
|
+
if (end !== -1)
|
|
2183
|
+
if (startPart === 0 && isAbsolute2)
|
|
2184
|
+
ret.base = ret.name = path.slice(1, end);
|
|
2185
|
+
else
|
|
2186
|
+
ret.base = ret.name = path.slice(startPart, end);
|
|
2187
|
+
} else {
|
|
2188
|
+
if (startPart === 0 && isAbsolute2)
|
|
2189
|
+
ret.name = path.slice(1, startDot), ret.base = path.slice(1, end);
|
|
2190
|
+
else
|
|
2191
|
+
ret.name = path.slice(startPart, startDot), ret.base = path.slice(startPart, end);
|
|
2192
|
+
ret.ext = path.slice(startDot, end);
|
|
2193
|
+
}
|
|
2194
|
+
if (startPart > 0)
|
|
2195
|
+
ret.dir = path.slice(0, startPart - 1);
|
|
2196
|
+
else if (isAbsolute2)
|
|
2197
|
+
ret.dir = "/";
|
|
2198
|
+
return ret;
|
|
2199
|
+
}
|
|
2200
|
+
var sep = "/";
|
|
2201
|
+
var delimiter = ":";
|
|
2202
|
+
var posix = ((p) => (p.posix = p, p))({ resolve, normalize, isAbsolute, join, relative, _makeLong, dirname, basename, extname, format, parse, sep, delimiter, win32: null, posix: null });
|
|
2203
|
+
|
|
2204
|
+
// src/Collection.ts
|
|
2205
|
+
class Collection {
|
|
2206
|
+
name;
|
|
2207
|
+
dimension;
|
|
2208
|
+
metric;
|
|
2209
|
+
M;
|
|
2210
|
+
efConstruction;
|
|
2211
|
+
indexPath;
|
|
2212
|
+
metaPath;
|
|
2213
|
+
deletedPath;
|
|
2214
|
+
hnsw;
|
|
2215
|
+
idMap;
|
|
2216
|
+
idReverseMap;
|
|
2217
|
+
metadata;
|
|
2218
|
+
deletedIds;
|
|
2219
|
+
constructor(name, config, collectionPath) {
|
|
2220
|
+
this.name = name;
|
|
2221
|
+
this.dimension = config.dimension;
|
|
2222
|
+
this.metric = config.metric || "cosine";
|
|
2223
|
+
this.M = config.M || 16;
|
|
2224
|
+
this.efConstruction = config.efConstruction || 200;
|
|
2225
|
+
this.indexPath = join(collectionPath, `${name}.hnsw`);
|
|
2226
|
+
this.metaPath = join(collectionPath, `${name}.meta`);
|
|
2227
|
+
this.deletedPath = join(collectionPath, `${name}.deleted`);
|
|
2228
|
+
this.hnsw = new HNSWIndex(config.dimension, this.metric, this.M, this.efConstruction);
|
|
2229
|
+
this.idMap = new Map;
|
|
2230
|
+
this.idReverseMap = new Map;
|
|
2231
|
+
this.metadata = new Map;
|
|
2232
|
+
this.deletedIds = new Set;
|
|
2233
|
+
}
|
|
2234
|
+
async init() {
|
|
2235
|
+
await this.loadFromDisk();
|
|
2236
|
+
}
|
|
2237
|
+
async loadFromDisk() {
|
|
2238
|
+
const indexFile = Bun.file(this.indexPath);
|
|
2239
|
+
if (await indexFile.exists()) {
|
|
2240
|
+
try {
|
|
2241
|
+
this.hnsw = await HNSWIndex.loadFromFile(this.indexPath);
|
|
2242
|
+
} catch (e) {
|
|
2243
|
+
console.warn(`Failed to load HNSW index from ${this.indexPath}:`, e);
|
|
2244
|
+
}
|
|
2245
|
+
}
|
|
2246
|
+
const metaFile = Bun.file(this.metaPath);
|
|
2247
|
+
if (await metaFile.exists()) {
|
|
2248
|
+
try {
|
|
2249
|
+
const metaContent = await metaFile.text();
|
|
2250
|
+
const lines = metaContent.split(`
|
|
2251
|
+
`);
|
|
2252
|
+
for (let i = 0;i < lines.length; i++) {
|
|
2253
|
+
const line = lines[i];
|
|
2254
|
+
if (line.length === 0 || line.trim().length === 0)
|
|
2255
|
+
continue;
|
|
2256
|
+
try {
|
|
2257
|
+
const parts = line.split("\t");
|
|
2258
|
+
const id = parts[0];
|
|
2259
|
+
const internalId = parseInt(parts[1], 10);
|
|
2260
|
+
if (!isNaN(internalId)) {
|
|
2261
|
+
this.idMap.set(id, internalId);
|
|
2262
|
+
this.idReverseMap.set(internalId, id);
|
|
2263
|
+
if (parts.length > 2) {
|
|
2264
|
+
const metaStr = parts.slice(2).join("\t");
|
|
2265
|
+
const metadata = JSON.parse(metaStr);
|
|
2266
|
+
this.metadata.set(internalId, metadata);
|
|
2267
|
+
}
|
|
2268
|
+
}
|
|
2269
|
+
} catch (e) {
|
|
2270
|
+
console.warn(`Skipping malformed line in metadata file: ${line}`);
|
|
2271
|
+
}
|
|
2272
|
+
}
|
|
2273
|
+
} catch (e) {}
|
|
2274
|
+
}
|
|
2275
|
+
const deletedFile = Bun.file(this.deletedPath);
|
|
2276
|
+
if (await deletedFile.exists()) {
|
|
2277
|
+
try {
|
|
2278
|
+
const deletedContent = await deletedFile.text();
|
|
2279
|
+
const deletedArray = JSON.parse(deletedContent);
|
|
2280
|
+
if (Array.isArray(deletedArray)) {
|
|
2281
|
+
this.deletedIds = new Set(deletedArray);
|
|
2282
|
+
}
|
|
2283
|
+
} catch (e) {}
|
|
2284
|
+
}
|
|
2285
|
+
}
|
|
2286
|
+
async add(config) {
|
|
2287
|
+
if (config.vectors.length !== config.ids.length) {
|
|
2288
|
+
throw new Error("Number of vectors must match number of IDs");
|
|
2289
|
+
}
|
|
2290
|
+
if (config.metadata && config.metadata.length !== config.ids.length) {
|
|
2291
|
+
throw new Error("Number of metadata entries must match number of IDs");
|
|
2292
|
+
}
|
|
2293
|
+
const duplicates = [];
|
|
2294
|
+
for (const id of config.ids) {
|
|
2295
|
+
if (this.idMap.has(id)) {
|
|
2296
|
+
duplicates.push(id);
|
|
2297
|
+
}
|
|
2298
|
+
}
|
|
2299
|
+
if (duplicates.length > 0) {
|
|
2300
|
+
throw new Error(`Cannot add: IDs already exist: ${duplicates.slice(0, 5).join(", ")}${duplicates.length > 5 ? ` and ${duplicates.length - 5} more` : ""}. ` + `Use upsert() to update existing vectors.`);
|
|
2301
|
+
}
|
|
2302
|
+
const points = new Array(config.ids.length);
|
|
2303
|
+
const startId = this.idMap.size;
|
|
2304
|
+
for (let i = 0;i < config.ids.length; i++) {
|
|
2305
|
+
const id = config.ids[i];
|
|
2306
|
+
const vector = config.vectors[i];
|
|
2307
|
+
const metadata = config.metadata ? config.metadata[i] : undefined;
|
|
2308
|
+
if (vector.length !== this.dimension) {
|
|
2309
|
+
throw new Error(`Vector at index ${i} has dimension ${vector.length}, expected ${this.dimension}`);
|
|
2310
|
+
}
|
|
2311
|
+
const numericId = startId + i;
|
|
2312
|
+
this.idMap.set(id, numericId);
|
|
2313
|
+
this.idReverseMap.set(numericId, id);
|
|
2314
|
+
if (metadata) {
|
|
2315
|
+
this.metadata.set(numericId, metadata);
|
|
2316
|
+
}
|
|
2317
|
+
points[i] = { id: numericId, vector: new Float32Array(vector) };
|
|
2318
|
+
}
|
|
2319
|
+
await this.hnsw.addPointsBulk(points);
|
|
2320
|
+
}
|
|
2321
|
+
async query(config) {
|
|
2322
|
+
const { queryVector, k, filter, efSearch } = config;
|
|
2323
|
+
if (queryVector.length !== this.dimension) {
|
|
2324
|
+
throw new Error(`Query vector has dimension ${queryVector.length}, expected ${this.dimension}`);
|
|
2325
|
+
}
|
|
2326
|
+
const ef = efSearch || Math.max(k * 2, 50);
|
|
2327
|
+
const results = this.hnsw.searchKNN(new Float32Array(queryVector), k * 2, ef);
|
|
2328
|
+
let filteredResults = results;
|
|
2329
|
+
if (filter) {
|
|
2330
|
+
let hasFilter = false;
|
|
2331
|
+
for (const _ in filter) {
|
|
2332
|
+
hasFilter = true;
|
|
2333
|
+
break;
|
|
2334
|
+
}
|
|
2335
|
+
if (hasFilter) {
|
|
2336
|
+
filteredResults = results.filter((result) => {
|
|
2337
|
+
const metadata2 = this.metadata.get(result.id) || {};
|
|
2338
|
+
return this.matchesFilter(metadata2, filter);
|
|
2339
|
+
});
|
|
2340
|
+
}
|
|
2341
|
+
}
|
|
2342
|
+
const seenIds = new Set;
|
|
2343
|
+
const deduplicatedResults = [];
|
|
2344
|
+
for (const result of filteredResults) {
|
|
2345
|
+
if (this.deletedIds.has(result.id))
|
|
2346
|
+
continue;
|
|
2347
|
+
if (!seenIds.has(result.id)) {
|
|
2348
|
+
seenIds.add(result.id);
|
|
2349
|
+
deduplicatedResults.push(result);
|
|
2350
|
+
}
|
|
2351
|
+
}
|
|
2352
|
+
deduplicatedResults.sort((a, b) => a.distance - b.distance);
|
|
2353
|
+
const resultCount = Math.min(deduplicatedResults.length, k);
|
|
2354
|
+
const ids = new Array(resultCount);
|
|
2355
|
+
const distances = new Array(resultCount);
|
|
2356
|
+
const metadata = new Array(resultCount);
|
|
2357
|
+
let outIdx = 0;
|
|
2358
|
+
for (let i = 0;i < resultCount; i++) {
|
|
2359
|
+
const result = deduplicatedResults[i];
|
|
2360
|
+
const id = this.idReverseMap.get(result.id);
|
|
2361
|
+
if (id) {
|
|
2362
|
+
ids[outIdx] = id;
|
|
2363
|
+
distances[outIdx] = result.distance;
|
|
2364
|
+
metadata[outIdx] = this.metadata.get(result.id) || {};
|
|
2365
|
+
outIdx++;
|
|
2366
|
+
}
|
|
2367
|
+
}
|
|
2368
|
+
if (outIdx < resultCount) {
|
|
2369
|
+
ids.length = outIdx;
|
|
2370
|
+
distances.length = outIdx;
|
|
2371
|
+
metadata.length = outIdx;
|
|
2372
|
+
}
|
|
2373
|
+
return { ids, distances, metadata };
|
|
2374
|
+
}
|
|
2375
|
+
async queryBatch(configs) {
|
|
2376
|
+
if (configs.length === 0)
|
|
2377
|
+
return [];
|
|
2378
|
+
for (let i = 0;i < configs.length; i++) {
|
|
2379
|
+
if (configs[i].queryVector.length !== this.dimension) {
|
|
2380
|
+
throw new Error(`Query ${i} has dimension ${configs[i].queryVector.length}, expected ${this.dimension}`);
|
|
2381
|
+
}
|
|
2382
|
+
}
|
|
2383
|
+
const queries = new Array(configs.length);
|
|
2384
|
+
let k = 0;
|
|
2385
|
+
let efSearch = 0;
|
|
2386
|
+
for (let i = 0;i < configs.length; i++) {
|
|
2387
|
+
const c = configs[i];
|
|
2388
|
+
queries[i] = new Float32Array(c.queryVector);
|
|
2389
|
+
if (c.k > k)
|
|
2390
|
+
k = c.k;
|
|
2391
|
+
const ef = c.efSearch || Math.max(c.k * 2, 50);
|
|
2392
|
+
if (ef > efSearch)
|
|
2393
|
+
efSearch = ef;
|
|
2394
|
+
}
|
|
2395
|
+
const batchResults = this.hnsw.searchKNNBatch(queries, k * 2, efSearch);
|
|
2396
|
+
const results = [];
|
|
2397
|
+
for (let q = 0;q < configs.length; q++) {
|
|
2398
|
+
const config = configs[q];
|
|
2399
|
+
const rawResults = batchResults[q];
|
|
2400
|
+
let filteredResults = rawResults;
|
|
2401
|
+
if (config.filter) {
|
|
2402
|
+
let hasFilter = false;
|
|
2403
|
+
for (const _ in config.filter) {
|
|
2404
|
+
hasFilter = true;
|
|
2405
|
+
break;
|
|
2406
|
+
}
|
|
2407
|
+
if (hasFilter) {
|
|
2408
|
+
filteredResults = rawResults.filter((result) => {
|
|
2409
|
+
const metadata2 = this.metadata.get(result.id) || {};
|
|
2410
|
+
return this.matchesFilter(metadata2, config.filter);
|
|
2411
|
+
});
|
|
2412
|
+
}
|
|
2413
|
+
}
|
|
2414
|
+
const seenIds = new Set;
|
|
2415
|
+
const deduplicatedResults = [];
|
|
2416
|
+
for (const result of filteredResults) {
|
|
2417
|
+
if (this.deletedIds.has(result.id))
|
|
2418
|
+
continue;
|
|
2419
|
+
if (!seenIds.has(result.id)) {
|
|
2420
|
+
seenIds.add(result.id);
|
|
2421
|
+
deduplicatedResults.push(result);
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
deduplicatedResults.sort((a, b) => a.distance - b.distance);
|
|
2425
|
+
const resultCount = Math.min(deduplicatedResults.length, config.k);
|
|
2426
|
+
const ids = new Array(resultCount);
|
|
2427
|
+
const distances = new Array(resultCount);
|
|
2428
|
+
const metadata = new Array(resultCount);
|
|
2429
|
+
let outIdx = 0;
|
|
2430
|
+
for (let i = 0;i < resultCount; i++) {
|
|
2431
|
+
const result = deduplicatedResults[i];
|
|
2432
|
+
const id = this.idReverseMap.get(result.id);
|
|
2433
|
+
if (id) {
|
|
2434
|
+
ids[outIdx] = id;
|
|
2435
|
+
distances[outIdx] = result.distance;
|
|
2436
|
+
metadata[outIdx] = this.metadata.get(result.id) || {};
|
|
2437
|
+
outIdx++;
|
|
2438
|
+
}
|
|
2439
|
+
}
|
|
2440
|
+
if (outIdx < resultCount) {
|
|
2441
|
+
ids.length = outIdx;
|
|
2442
|
+
distances.length = outIdx;
|
|
2443
|
+
metadata.length = outIdx;
|
|
2444
|
+
}
|
|
2445
|
+
results.push({ ids, distances, metadata });
|
|
2446
|
+
}
|
|
2447
|
+
return results;
|
|
2448
|
+
}
|
|
2449
|
+
async queryBruteForce(config) {
|
|
2450
|
+
const { queryVector, k, filter } = config;
|
|
2451
|
+
if (queryVector.length !== this.dimension) {
|
|
2452
|
+
throw new Error(`Query vector has dimension ${queryVector.length}, expected ${this.dimension}`);
|
|
2453
|
+
}
|
|
2454
|
+
const allNodes = [];
|
|
2455
|
+
for (const [numericId, vector] of this.hnsw.getAllVectors()) {
|
|
2456
|
+
allNodes.push({ id: numericId, vector });
|
|
2457
|
+
}
|
|
2458
|
+
const distances = [];
|
|
2459
|
+
for (const node of allNodes) {
|
|
2460
|
+
const distance = this.hnsw.calculateDistance(new Float32Array(queryVector), node.vector);
|
|
2461
|
+
distances.push({ id: node.id, distance });
|
|
2462
|
+
}
|
|
2463
|
+
distances.sort((a, b) => a.distance - b.distance);
|
|
2464
|
+
const topK = distances.slice(0, k);
|
|
2465
|
+
let filteredResults = topK;
|
|
2466
|
+
if (filter && Object.keys(filter).length > 0) {
|
|
2467
|
+
filteredResults = [];
|
|
2468
|
+
for (const result of topK) {
|
|
2469
|
+
const metadata2 = this.metadata.get(result.id) || {};
|
|
2470
|
+
if (this.matchesFilter(metadata2, filter)) {
|
|
2471
|
+
filteredResults.push(result);
|
|
2472
|
+
}
|
|
2473
|
+
if (filteredResults.length >= k)
|
|
2474
|
+
break;
|
|
2475
|
+
}
|
|
2476
|
+
}
|
|
2477
|
+
const ids = [];
|
|
2478
|
+
const distancesOut = [];
|
|
2479
|
+
const metadata = [];
|
|
2480
|
+
for (const result of filteredResults) {
|
|
2481
|
+
const id = this.idReverseMap.get(result.id);
|
|
2482
|
+
if (id) {
|
|
2483
|
+
ids.push(id);
|
|
2484
|
+
distancesOut.push(result.distance);
|
|
2485
|
+
metadata.push(this.metadata.get(result.id) || {});
|
|
2486
|
+
}
|
|
2487
|
+
}
|
|
2488
|
+
return {
|
|
2489
|
+
ids,
|
|
2490
|
+
distances: distancesOut,
|
|
2491
|
+
metadata
|
|
2492
|
+
};
|
|
2493
|
+
}
|
|
2494
|
+
async upsert(config) {
|
|
2495
|
+
const duplicates = [];
|
|
2496
|
+
for (const id of config.ids) {
|
|
2497
|
+
if (this.idMap.has(id)) {
|
|
2498
|
+
duplicates.push(id);
|
|
2499
|
+
}
|
|
2500
|
+
}
|
|
2501
|
+
if (duplicates.length > 0) {
|
|
2502
|
+
throw new Error(`Cannot upsert: IDs already exist: ${duplicates.join(", ")}. ` + `HNSW indices don't support efficient updates. ` + `To update vectors, create a new index without the old vectors.`);
|
|
2503
|
+
}
|
|
2504
|
+
await this.add(config);
|
|
2505
|
+
}
|
|
2506
|
+
count() {
|
|
2507
|
+
return this.idMap.size - this.deletedIds.size;
|
|
2508
|
+
}
|
|
2509
|
+
countWithDeleted() {
|
|
2510
|
+
return this.idMap.size;
|
|
2511
|
+
}
|
|
2512
|
+
deletedCount() {
|
|
2513
|
+
return this.deletedIds.size;
|
|
2514
|
+
}
|
|
2515
|
+
delete(id) {
|
|
2516
|
+
const numericId = this.idMap.get(id);
|
|
2517
|
+
if (numericId === undefined)
|
|
2518
|
+
return false;
|
|
2519
|
+
if (this.deletedIds.has(numericId))
|
|
2520
|
+
return false;
|
|
2521
|
+
this.deletedIds.add(numericId);
|
|
2522
|
+
return true;
|
|
2523
|
+
}
|
|
2524
|
+
deleteBatch(ids) {
|
|
2525
|
+
let deleted = 0;
|
|
2526
|
+
for (const id of ids) {
|
|
2527
|
+
if (this.delete(id))
|
|
2528
|
+
deleted++;
|
|
2529
|
+
}
|
|
2530
|
+
return deleted;
|
|
2531
|
+
}
|
|
2532
|
+
has(id) {
|
|
2533
|
+
const numericId = this.idMap.get(id);
|
|
2534
|
+
if (numericId === undefined)
|
|
2535
|
+
return false;
|
|
2536
|
+
return !this.deletedIds.has(numericId);
|
|
2537
|
+
}
|
|
2538
|
+
isDeleted(id) {
|
|
2539
|
+
const numericId = this.idMap.get(id);
|
|
2540
|
+
if (numericId === undefined)
|
|
2541
|
+
return false;
|
|
2542
|
+
return this.deletedIds.has(numericId);
|
|
2543
|
+
}
|
|
2544
|
+
async saveToDisk() {
|
|
2545
|
+
const dirPath = dirname(this.indexPath);
|
|
2546
|
+
await mkdir(dirPath, { recursive: true }).catch(() => {});
|
|
2547
|
+
await this.hnsw.saveToFile(this.indexPath);
|
|
2548
|
+
const metaLines = [];
|
|
2549
|
+
for (const [numericId, id] of this.idReverseMap) {
|
|
2550
|
+
const meta = this.metadata.get(numericId);
|
|
2551
|
+
const metaStr = meta ? JSON.stringify(meta) : "{}";
|
|
2552
|
+
metaLines.push(`${id} ${numericId} ${metaStr}`);
|
|
2553
|
+
}
|
|
2554
|
+
await Bun.write(this.metaPath, metaLines.join(`
|
|
2555
|
+
`));
|
|
2556
|
+
if (this.deletedIds.size > 0) {
|
|
2557
|
+
await Bun.write(this.deletedPath, JSON.stringify([...this.deletedIds]));
|
|
2558
|
+
} else {
|
|
2559
|
+
const deletedFile = Bun.file(this.deletedPath);
|
|
2560
|
+
if (await deletedFile.exists()) {
|
|
2561
|
+
await Bun.write(this.deletedPath, "[]");
|
|
2562
|
+
}
|
|
2563
|
+
}
|
|
2564
|
+
}
|
|
2565
|
+
matchesFilter(metadata, filter) {
|
|
2566
|
+
for (const key in filter) {
|
|
2567
|
+
const value = filter[key];
|
|
2568
|
+
const metaValue = metadata[key];
|
|
2569
|
+
if (typeof value === "object" && value !== null) {
|
|
2570
|
+
if (value.$gt !== undefined && metaValue <= value.$gt)
|
|
2571
|
+
return false;
|
|
2572
|
+
if (value.$lt !== undefined && metaValue >= value.$lt)
|
|
2573
|
+
return false;
|
|
2574
|
+
if (value.$gte !== undefined && metaValue < value.$gte)
|
|
2575
|
+
return false;
|
|
2576
|
+
if (value.$lte !== undefined && metaValue > value.$lte)
|
|
2577
|
+
return false;
|
|
2578
|
+
if (value.$ne !== undefined && metaValue === value.$ne)
|
|
2579
|
+
return false;
|
|
2580
|
+
if (value.$in !== undefined && !value.$in.includes(metaValue))
|
|
2581
|
+
return false;
|
|
2582
|
+
if (value.$nin !== undefined && value.$nin.includes(metaValue))
|
|
2583
|
+
return false;
|
|
2584
|
+
} else {
|
|
2585
|
+
if (metaValue !== value)
|
|
2586
|
+
return false;
|
|
2587
|
+
}
|
|
2588
|
+
}
|
|
2589
|
+
return true;
|
|
2590
|
+
}
|
|
2591
|
+
async compact() {
|
|
2592
|
+
if (this.deletedIds.size === 0)
|
|
2593
|
+
return 0;
|
|
2594
|
+
const removedCount = this.deletedIds.size;
|
|
2595
|
+
const activeVectors = [];
|
|
2596
|
+
for (const [numericId, vector] of this.hnsw.getAllVectors()) {
|
|
2597
|
+
if (!this.deletedIds.has(numericId)) {
|
|
2598
|
+
const stringId = this.idReverseMap.get(numericId);
|
|
2599
|
+
if (stringId) {
|
|
2600
|
+
activeVectors.push({
|
|
2601
|
+
id: stringId,
|
|
2602
|
+
numericId,
|
|
2603
|
+
vector,
|
|
2604
|
+
meta: this.metadata.get(numericId)
|
|
2605
|
+
});
|
|
2606
|
+
}
|
|
2607
|
+
}
|
|
2608
|
+
}
|
|
2609
|
+
this.hnsw.destroy();
|
|
2610
|
+
this.hnsw = new HNSWIndex(this.dimension, this.metric, this.M, this.efConstruction);
|
|
2611
|
+
this.idMap.clear();
|
|
2612
|
+
this.idReverseMap.clear();
|
|
2613
|
+
this.metadata.clear();
|
|
2614
|
+
this.deletedIds.clear();
|
|
2615
|
+
const points = new Array(activeVectors.length);
|
|
2616
|
+
for (let i = 0;i < activeVectors.length; i++) {
|
|
2617
|
+
const { id, vector, meta } = activeVectors[i];
|
|
2618
|
+
const newNumericId = i;
|
|
2619
|
+
this.idMap.set(id, newNumericId);
|
|
2620
|
+
this.idReverseMap.set(newNumericId, id);
|
|
2621
|
+
if (meta) {
|
|
2622
|
+
this.metadata.set(newNumericId, meta);
|
|
2623
|
+
}
|
|
2624
|
+
points[i] = { id: newNumericId, vector };
|
|
2625
|
+
}
|
|
2626
|
+
await this.hnsw.addPointsBulk(points);
|
|
2627
|
+
return removedCount;
|
|
2628
|
+
}
|
|
2629
|
+
async destroy() {
|
|
2630
|
+
await this.saveToDisk();
|
|
2631
|
+
if (this.hnsw && typeof this.hnsw.destroy === "function") {
|
|
2632
|
+
this.hnsw.destroy();
|
|
2633
|
+
}
|
|
2634
|
+
this.idMap.clear();
|
|
2635
|
+
this.idReverseMap.clear();
|
|
2636
|
+
this.metadata.clear();
|
|
2637
|
+
this.deletedIds.clear();
|
|
2638
|
+
}
|
|
2639
|
+
}
|
|
2640
|
+
|
|
2641
|
+
// src/storage/BunStorageBackend.ts
|
|
2642
|
+
var {mkdir: mkdir2, readdir, unlink, rm, appendFile} = (() => ({}));
|
|
2643
|
+
class BunStorageBackend {
|
|
2644
|
+
type = "bun";
|
|
2645
|
+
basePath;
|
|
2646
|
+
dirCache = new Set;
|
|
2647
|
+
constructor(basePath = "./vectordb_data") {
|
|
2648
|
+
this.basePath = basePath;
|
|
2649
|
+
}
|
|
2650
|
+
async ensureDir(dir) {
|
|
2651
|
+
if (this.dirCache.has(dir))
|
|
2652
|
+
return;
|
|
2653
|
+
await mkdir2(dir, { recursive: true }).catch(() => {});
|
|
2654
|
+
this.dirCache.add(dir);
|
|
2655
|
+
}
|
|
2656
|
+
getFullPath(key) {
|
|
2657
|
+
return join(this.basePath, key);
|
|
2658
|
+
}
|
|
2659
|
+
async init() {
|
|
2660
|
+
await mkdir2(this.basePath, { recursive: true }).catch(() => {});
|
|
2661
|
+
}
|
|
2662
|
+
async read(key) {
|
|
2663
|
+
const fullPath = this.getFullPath(key);
|
|
2664
|
+
const file = Bun.file(fullPath);
|
|
2665
|
+
if (!await file.exists()) {
|
|
2666
|
+
return null;
|
|
2667
|
+
}
|
|
2668
|
+
return file.arrayBuffer();
|
|
2669
|
+
}
|
|
2670
|
+
async write(key, data) {
|
|
2671
|
+
const fullPath = this.getFullPath(key);
|
|
2672
|
+
await this.ensureDir(dirname(fullPath));
|
|
2673
|
+
await Bun.write(fullPath, data);
|
|
2674
|
+
}
|
|
2675
|
+
async append(key, data) {
|
|
2676
|
+
const fullPath = this.getFullPath(key);
|
|
2677
|
+
await this.ensureDir(dirname(fullPath));
|
|
2678
|
+
const appendData = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
|
|
2679
|
+
await appendFile(fullPath, appendData);
|
|
2680
|
+
}
|
|
2681
|
+
async delete(key) {
|
|
2682
|
+
const fullPath = this.getFullPath(key);
|
|
2683
|
+
try {
|
|
2684
|
+
await unlink(fullPath);
|
|
2685
|
+
} catch {}
|
|
2686
|
+
}
|
|
2687
|
+
async exists(key) {
|
|
2688
|
+
const fullPath = this.getFullPath(key);
|
|
2689
|
+
const file = Bun.file(fullPath);
|
|
2690
|
+
return file.exists();
|
|
2691
|
+
}
|
|
2692
|
+
async list(prefix) {
|
|
2693
|
+
const searchPath = prefix ? this.getFullPath(prefix) : this.basePath;
|
|
2694
|
+
try {
|
|
2695
|
+
const entries = await readdir(searchPath, { recursive: true });
|
|
2696
|
+
return entries.map((entry) => {
|
|
2697
|
+
const fullPath = join(searchPath, entry);
|
|
2698
|
+
return relative(this.basePath, fullPath);
|
|
2699
|
+
});
|
|
2700
|
+
} catch {
|
|
2701
|
+
return [];
|
|
2702
|
+
}
|
|
2703
|
+
}
|
|
2704
|
+
async mkdir(dirPath) {
|
|
2705
|
+
const fullPath = this.getFullPath(dirPath);
|
|
2706
|
+
await mkdir2(fullPath, { recursive: true }).catch(() => {});
|
|
2707
|
+
}
|
|
2708
|
+
async clear() {
|
|
2709
|
+
await rm(this.basePath, { recursive: true, force: true }).catch(() => {});
|
|
2710
|
+
await mkdir2(this.basePath, { recursive: true }).catch(() => {});
|
|
2711
|
+
this.dirCache.clear();
|
|
2712
|
+
this.dirCache.add(this.basePath);
|
|
2713
|
+
}
|
|
2714
|
+
async size(key) {
|
|
2715
|
+
const fullPath = this.getFullPath(key);
|
|
2716
|
+
const file = Bun.file(fullPath);
|
|
2717
|
+
if (!await file.exists()) {
|
|
2718
|
+
return 0;
|
|
2719
|
+
}
|
|
2720
|
+
return file.size;
|
|
2721
|
+
}
|
|
2722
|
+
stream(key) {
|
|
2723
|
+
const fullPath = this.getFullPath(key);
|
|
2724
|
+
const file = Bun.file(fullPath);
|
|
2725
|
+
return file.stream();
|
|
2726
|
+
}
|
|
2727
|
+
getBasePath() {
|
|
2728
|
+
return this.basePath;
|
|
2729
|
+
}
|
|
2730
|
+
}
|
|
2731
|
+
|
|
2732
|
+
// src/VectorDB.ts
|
|
2733
|
+
class VectorDB {
|
|
2734
|
+
collections;
|
|
2735
|
+
storagePath;
|
|
2736
|
+
storageBackend;
|
|
2737
|
+
initialized = false;
|
|
2738
|
+
constructor(config) {
|
|
2739
|
+
this.collections = new Map;
|
|
2740
|
+
this.storagePath = config?.storagePath || "./vectordb_data";
|
|
2741
|
+
this.storageBackend = config?.storageBackend || new BunStorageBackend(this.storagePath);
|
|
2742
|
+
}
|
|
2743
|
+
getStorageBackend() {
|
|
2744
|
+
return this.storageBackend;
|
|
2745
|
+
}
|
|
2746
|
+
async init() {
|
|
2747
|
+
if (this.initialized)
|
|
2748
|
+
return;
|
|
2749
|
+
await this.storageBackend.mkdir("");
|
|
2750
|
+
this.initialized = true;
|
|
2751
|
+
}
|
|
2752
|
+
async createCollection(name, config) {
|
|
2753
|
+
await this.init();
|
|
2754
|
+
if (this.collections.has(name)) {
|
|
2755
|
+
throw new Error(`Collection ${name} already exists`);
|
|
2756
|
+
}
|
|
2757
|
+
const collectionPath = join(this.storagePath, name);
|
|
2758
|
+
await this.storageBackend.mkdir(name);
|
|
2759
|
+
const collection = new Collection(name, config, collectionPath);
|
|
2760
|
+
await collection.init();
|
|
2761
|
+
this.collections.set(name, collection);
|
|
2762
|
+
return collection;
|
|
2763
|
+
}
|
|
2764
|
+
getCollection(name) {
|
|
2765
|
+
return this.collections.get(name);
|
|
2766
|
+
}
|
|
2767
|
+
async listCollections() {
|
|
2768
|
+
return [...this.collections.keys()];
|
|
2769
|
+
}
|
|
2770
|
+
async deleteCollection(name) {
|
|
2771
|
+
const collection = this.collections.get(name);
|
|
2772
|
+
if (!collection) {
|
|
2773
|
+
throw new Error(`Collection ${name} does not exist`);
|
|
2774
|
+
}
|
|
2775
|
+
await collection.destroy();
|
|
2776
|
+
this.collections.delete(name);
|
|
2777
|
+
}
|
|
2778
|
+
async close() {
|
|
2779
|
+
for (const collection of this.collections.values()) {
|
|
2780
|
+
await collection.destroy();
|
|
2781
|
+
}
|
|
2782
|
+
this.collections.clear();
|
|
2783
|
+
}
|
|
2784
|
+
}
|
|
2785
|
+
// src/presets.ts
|
|
2786
|
+
var PRESET_LOW_DIM = {
|
|
2787
|
+
name: "low-dim",
|
|
2788
|
+
description: "Optimized for low-dimensional vectors (<=128D)",
|
|
2789
|
+
M: 16,
|
|
2790
|
+
efConstruction: 200,
|
|
2791
|
+
efSearch: 100,
|
|
2792
|
+
expectedRecall: 0.99,
|
|
2793
|
+
targetDimensions: "<=128",
|
|
2794
|
+
targetDatasetSize: "1K-100K"
|
|
2795
|
+
};
|
|
2796
|
+
var PRESET_MEDIUM_DIM = {
|
|
2797
|
+
name: "medium-dim",
|
|
2798
|
+
description: "Optimized for medium-dimensional vectors (256-512D)",
|
|
2799
|
+
M: 24,
|
|
2800
|
+
efConstruction: 200,
|
|
2801
|
+
efSearch: 150,
|
|
2802
|
+
expectedRecall: 0.97,
|
|
2803
|
+
targetDimensions: "256-512",
|
|
2804
|
+
targetDatasetSize: "1K-100K"
|
|
2805
|
+
};
|
|
2806
|
+
var PRESET_HIGH_DIM = {
|
|
2807
|
+
name: "high-dim",
|
|
2808
|
+
description: "Optimized for high-dimensional vectors (768D+)",
|
|
2809
|
+
M: 32,
|
|
2810
|
+
efConstruction: 200,
|
|
2811
|
+
efSearch: 128,
|
|
2812
|
+
expectedRecall: 0.99,
|
|
2813
|
+
targetDimensions: ">=768",
|
|
2814
|
+
targetDatasetSize: "1K-500K"
|
|
2815
|
+
};
|
|
2816
|
+
var PRESET_VERY_HIGH_DIM = {
|
|
2817
|
+
name: "very-high-dim",
|
|
2818
|
+
description: "Optimized for very high-dimensional vectors (1536D+)",
|
|
2819
|
+
M: 48,
|
|
2820
|
+
efConstruction: 300,
|
|
2821
|
+
efSearch: 150,
|
|
2822
|
+
expectedRecall: 0.99,
|
|
2823
|
+
targetDimensions: ">=1536",
|
|
2824
|
+
targetDatasetSize: "1K-500K"
|
|
2825
|
+
};
|
|
2826
|
+
var PRESET_SMALL_DATASET = {
|
|
2827
|
+
name: "small-dataset",
|
|
2828
|
+
description: "Optimized for small datasets (<10K vectors)",
|
|
2829
|
+
M: 16,
|
|
2830
|
+
efConstruction: 200,
|
|
2831
|
+
efSearch: 200,
|
|
2832
|
+
expectedRecall: 0.99,
|
|
2833
|
+
targetDimensions: "any",
|
|
2834
|
+
targetDatasetSize: "<10K"
|
|
2835
|
+
};
|
|
2836
|
+
var PRESET_LARGE_DATASET = {
|
|
2837
|
+
name: "large-dataset",
|
|
2838
|
+
description: "Optimized for large datasets (100K-1M vectors)",
|
|
2839
|
+
M: 32,
|
|
2840
|
+
efConstruction: 200,
|
|
2841
|
+
efSearch: 128,
|
|
2842
|
+
expectedRecall: 0.99,
|
|
2843
|
+
targetDimensions: "any",
|
|
2844
|
+
targetDatasetSize: "100K-1M"
|
|
2845
|
+
};
|
|
2846
|
+
var PRESET_MAX_RECALL = {
|
|
2847
|
+
name: "max-recall",
|
|
2848
|
+
description: "Maximum recall configuration",
|
|
2849
|
+
M: 48,
|
|
2850
|
+
efConstruction: 500,
|
|
2851
|
+
efSearch: 400,
|
|
2852
|
+
expectedRecall: 0.99,
|
|
2853
|
+
targetDimensions: "any",
|
|
2854
|
+
targetDatasetSize: "any"
|
|
2855
|
+
};
|
|
2856
|
+
var PRESET_LOW_LATENCY = {
|
|
2857
|
+
name: "low-latency",
|
|
2858
|
+
description: "Minimum latency configuration (90% recall)",
|
|
2859
|
+
M: 12,
|
|
2860
|
+
efConstruction: 100,
|
|
2861
|
+
efSearch: 50,
|
|
2862
|
+
expectedRecall: 0.9,
|
|
2863
|
+
targetDimensions: "any",
|
|
2864
|
+
targetDatasetSize: "any"
|
|
2865
|
+
};
|
|
2866
|
+
var PRESETS = {
|
|
2867
|
+
"low-dim": PRESET_LOW_DIM,
|
|
2868
|
+
"medium-dim": PRESET_MEDIUM_DIM,
|
|
2869
|
+
"high-dim": PRESET_HIGH_DIM,
|
|
2870
|
+
"very-high-dim": PRESET_VERY_HIGH_DIM,
|
|
2871
|
+
"small-dataset": PRESET_SMALL_DATASET,
|
|
2872
|
+
"large-dataset": PRESET_LARGE_DATASET,
|
|
2873
|
+
"max-recall": PRESET_MAX_RECALL,
|
|
2874
|
+
"low-latency": PRESET_LOW_LATENCY
|
|
2875
|
+
};
|
|
2876
|
+
function getRecommendedPreset(dimension, datasetSize) {
|
|
2877
|
+
if (dimension >= 1536)
|
|
2878
|
+
return PRESET_VERY_HIGH_DIM;
|
|
2879
|
+
if (dimension >= 768)
|
|
2880
|
+
return PRESET_HIGH_DIM;
|
|
2881
|
+
if (datasetSize !== undefined) {
|
|
2882
|
+
if (datasetSize < 1e4)
|
|
2883
|
+
return PRESET_SMALL_DATASET;
|
|
2884
|
+
if (datasetSize > 1e5)
|
|
2885
|
+
return PRESET_LARGE_DATASET;
|
|
2886
|
+
}
|
|
2887
|
+
if (dimension <= 128)
|
|
2888
|
+
return PRESET_LOW_DIM;
|
|
2889
|
+
if (dimension <= 512)
|
|
2890
|
+
return PRESET_MEDIUM_DIM;
|
|
2891
|
+
return PRESET_HIGH_DIM;
|
|
2892
|
+
}
|
|
2893
|
+
function getPreset(name) {
|
|
2894
|
+
return PRESETS[name];
|
|
2895
|
+
}
|
|
2896
|
+
function getRAGPreset(embeddingModel) {
|
|
2897
|
+
const model = embeddingModel.toLowerCase();
|
|
2898
|
+
if (model.includes("ada-002") || model.includes("text-embedding-3")) {
|
|
2899
|
+
return PRESET_VERY_HIGH_DIM;
|
|
2900
|
+
}
|
|
2901
|
+
if (model.includes("cohere") || model.includes("embed-")) {
|
|
2902
|
+
return PRESET_HIGH_DIM;
|
|
2903
|
+
}
|
|
2904
|
+
if (model.includes("bert") || model.includes("minilm") || model.includes("mpnet")) {
|
|
2905
|
+
return PRESET_HIGH_DIM;
|
|
2906
|
+
}
|
|
2907
|
+
if (model.includes("e5-")) {
|
|
2908
|
+
return model.includes("large") ? PRESET_HIGH_DIM : PRESET_MEDIUM_DIM;
|
|
2909
|
+
}
|
|
2910
|
+
return PRESET_HIGH_DIM;
|
|
2911
|
+
}
|
|
2912
|
+
// src/storage/MemoryBackend.ts
|
|
2913
|
+
class MemoryBackend {
|
|
2914
|
+
type = "memory";
|
|
2915
|
+
storage = new Map;
|
|
2916
|
+
directories = new Set;
|
|
2917
|
+
constructor() {
|
|
2918
|
+
this.directories.add("");
|
|
2919
|
+
}
|
|
2920
|
+
async read(key) {
|
|
2921
|
+
const data = this.storage.get(key);
|
|
2922
|
+
if (!data)
|
|
2923
|
+
return null;
|
|
2924
|
+
return data.slice(0);
|
|
2925
|
+
}
|
|
2926
|
+
async write(key, data) {
|
|
2927
|
+
const buffer = data instanceof ArrayBuffer ? data : data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
2928
|
+
this.storage.set(key, buffer.slice(0));
|
|
2929
|
+
}
|
|
2930
|
+
async append(key, data) {
|
|
2931
|
+
const appendData = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
|
|
2932
|
+
const existing = this.storage.get(key);
|
|
2933
|
+
if (existing) {
|
|
2934
|
+
const existingArray = new Uint8Array(existing);
|
|
2935
|
+
const combined = new Uint8Array(existingArray.length + appendData.length);
|
|
2936
|
+
combined.set(existingArray, 0);
|
|
2937
|
+
combined.set(appendData, existingArray.length);
|
|
2938
|
+
this.storage.set(key, combined.buffer.slice(0));
|
|
2939
|
+
} else {
|
|
2940
|
+
this.storage.set(key, appendData.buffer.slice(appendData.byteOffset, appendData.byteOffset + appendData.byteLength));
|
|
2941
|
+
}
|
|
2942
|
+
}
|
|
2943
|
+
async delete(key) {
|
|
2944
|
+
this.storage.delete(key);
|
|
2945
|
+
}
|
|
2946
|
+
async exists(key) {
|
|
2947
|
+
return this.storage.has(key) || this.directories.has(key);
|
|
2948
|
+
}
|
|
2949
|
+
async list(prefix) {
|
|
2950
|
+
const keys = [];
|
|
2951
|
+
for (const key of this.storage.keys()) {
|
|
2952
|
+
if (!prefix || key.startsWith(prefix)) {
|
|
2953
|
+
keys.push(key);
|
|
2954
|
+
}
|
|
2955
|
+
}
|
|
2956
|
+
return keys;
|
|
2957
|
+
}
|
|
2958
|
+
async mkdir(path) {
|
|
2959
|
+
this.directories.add(path);
|
|
2960
|
+
const parts = path.split("/");
|
|
2961
|
+
for (let i = 1;i <= parts.length; i++) {
|
|
2962
|
+
this.directories.add(parts.slice(0, i).join("/"));
|
|
2963
|
+
}
|
|
2964
|
+
}
|
|
2965
|
+
clear() {
|
|
2966
|
+
this.storage.clear();
|
|
2967
|
+
this.directories.clear();
|
|
2968
|
+
this.directories.add("");
|
|
2969
|
+
}
|
|
2970
|
+
getStats() {
|
|
2971
|
+
let totalBytes = 0;
|
|
2972
|
+
for (const value of this.storage.values()) {
|
|
2973
|
+
totalBytes += value.byteLength;
|
|
2974
|
+
}
|
|
2975
|
+
return {
|
|
2976
|
+
keyCount: this.storage.size,
|
|
2977
|
+
totalBytes
|
|
2978
|
+
};
|
|
2979
|
+
}
|
|
2980
|
+
export() {
|
|
2981
|
+
const result = {};
|
|
2982
|
+
for (const [key, value] of this.storage.entries()) {
|
|
2983
|
+
result[key] = value.slice(0);
|
|
2984
|
+
}
|
|
2985
|
+
return result;
|
|
2986
|
+
}
|
|
2987
|
+
import(data) {
|
|
2988
|
+
for (const [key, value] of Object.entries(data)) {
|
|
2989
|
+
this.storage.set(key, value.slice(0));
|
|
2990
|
+
}
|
|
2991
|
+
}
|
|
2992
|
+
}
|
|
2993
|
+
// src/storage/OPFSBackend.ts
|
|
2994
|
+
class OPFSBackend {
|
|
2995
|
+
type = "opfs";
|
|
2996
|
+
root = null;
|
|
2997
|
+
initialized = false;
|
|
2998
|
+
async init() {
|
|
2999
|
+
if (this.initialized)
|
|
3000
|
+
return;
|
|
3001
|
+
if (typeof navigator === "undefined" || !navigator.storage?.getDirectory) {
|
|
3002
|
+
throw new Error("OPFS not available in this environment. Use MemoryBackend or IndexedDBBackend instead.");
|
|
3003
|
+
}
|
|
3004
|
+
this.root = await navigator.storage.getDirectory();
|
|
3005
|
+
this.initialized = true;
|
|
3006
|
+
}
|
|
3007
|
+
async ensureInitialized() {
|
|
3008
|
+
if (!this.initialized) {
|
|
3009
|
+
await this.init();
|
|
3010
|
+
}
|
|
3011
|
+
}
|
|
3012
|
+
async getFileHandle(key, create = false) {
|
|
3013
|
+
await this.ensureInitialized();
|
|
3014
|
+
const parts = key.split("/");
|
|
3015
|
+
const fileName = parts.pop();
|
|
3016
|
+
let currentDir = this.root;
|
|
3017
|
+
for (const part of parts) {
|
|
3018
|
+
if (part === "")
|
|
3019
|
+
continue;
|
|
3020
|
+
try {
|
|
3021
|
+
currentDir = await currentDir.getDirectoryHandle(part, { create });
|
|
3022
|
+
} catch {
|
|
3023
|
+
if (!create)
|
|
3024
|
+
return null;
|
|
3025
|
+
throw new Error(`Failed to create directory: ${part}`);
|
|
3026
|
+
}
|
|
3027
|
+
}
|
|
3028
|
+
try {
|
|
3029
|
+
return await currentDir.getFileHandle(fileName, { create });
|
|
3030
|
+
} catch {
|
|
3031
|
+
return null;
|
|
3032
|
+
}
|
|
3033
|
+
}
|
|
3034
|
+
async getDirectoryHandle(path, create = false) {
|
|
3035
|
+
await this.ensureInitialized();
|
|
3036
|
+
const parts = path.split("/").filter((p) => p !== "");
|
|
3037
|
+
let currentDir = this.root;
|
|
3038
|
+
for (const part of parts) {
|
|
3039
|
+
try {
|
|
3040
|
+
currentDir = await currentDir.getDirectoryHandle(part, { create });
|
|
3041
|
+
} catch {
|
|
3042
|
+
if (!create)
|
|
3043
|
+
return null;
|
|
3044
|
+
throw new Error(`Failed to get directory: ${part}`);
|
|
3045
|
+
}
|
|
3046
|
+
}
|
|
3047
|
+
return currentDir;
|
|
3048
|
+
}
|
|
3049
|
+
async read(key) {
|
|
3050
|
+
await this.ensureInitialized();
|
|
3051
|
+
const handle = await this.getFileHandle(key, false);
|
|
3052
|
+
if (!handle)
|
|
3053
|
+
return null;
|
|
3054
|
+
try {
|
|
3055
|
+
const file = await handle.getFile();
|
|
3056
|
+
return file.arrayBuffer();
|
|
3057
|
+
} catch {
|
|
3058
|
+
return null;
|
|
3059
|
+
}
|
|
3060
|
+
}
|
|
3061
|
+
async write(key, data) {
|
|
3062
|
+
await this.ensureInitialized();
|
|
3063
|
+
const handle = await this.getFileHandle(key, true);
|
|
3064
|
+
if (!handle) {
|
|
3065
|
+
throw new Error(`Failed to create file: ${key}`);
|
|
3066
|
+
}
|
|
3067
|
+
const writable = await handle.createWritable();
|
|
3068
|
+
try {
|
|
3069
|
+
const writeData = data instanceof ArrayBuffer ? data : data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
3070
|
+
await writable.write(writeData);
|
|
3071
|
+
} finally {
|
|
3072
|
+
await writable.close();
|
|
3073
|
+
}
|
|
3074
|
+
}
|
|
3075
|
+
async append(key, data) {
|
|
3076
|
+
await this.ensureInitialized();
|
|
3077
|
+
const handle = await this.getFileHandle(key, true);
|
|
3078
|
+
if (!handle) {
|
|
3079
|
+
throw new Error(`Failed to create file: ${key}`);
|
|
3080
|
+
}
|
|
3081
|
+
const appendData = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
|
|
3082
|
+
let existingSize = 0;
|
|
3083
|
+
try {
|
|
3084
|
+
const file = await handle.getFile();
|
|
3085
|
+
existingSize = file.size;
|
|
3086
|
+
} catch {
|
|
3087
|
+
existingSize = 0;
|
|
3088
|
+
}
|
|
3089
|
+
const writable = await handle.createWritable({ keepExistingData: true });
|
|
3090
|
+
try {
|
|
3091
|
+
await writable.seek(existingSize);
|
|
3092
|
+
const writeData = appendData.buffer.slice(appendData.byteOffset, appendData.byteOffset + appendData.byteLength);
|
|
3093
|
+
await writable.write(writeData);
|
|
3094
|
+
} finally {
|
|
3095
|
+
await writable.close();
|
|
3096
|
+
}
|
|
3097
|
+
}
|
|
3098
|
+
async delete(key) {
|
|
3099
|
+
await this.ensureInitialized();
|
|
3100
|
+
const parts = key.split("/");
|
|
3101
|
+
const fileName = parts.pop();
|
|
3102
|
+
let currentDir = this.root;
|
|
3103
|
+
for (const part of parts) {
|
|
3104
|
+
if (part === "")
|
|
3105
|
+
continue;
|
|
3106
|
+
try {
|
|
3107
|
+
currentDir = await currentDir.getDirectoryHandle(part, { create: false });
|
|
3108
|
+
} catch {
|
|
3109
|
+
return;
|
|
3110
|
+
}
|
|
3111
|
+
}
|
|
3112
|
+
try {
|
|
3113
|
+
await currentDir.removeEntry(fileName);
|
|
3114
|
+
} catch {}
|
|
3115
|
+
}
|
|
3116
|
+
async exists(key) {
|
|
3117
|
+
await this.ensureInitialized();
|
|
3118
|
+
const handle = await this.getFileHandle(key, false);
|
|
3119
|
+
return handle !== null;
|
|
3120
|
+
}
|
|
3121
|
+
async list(prefix) {
|
|
3122
|
+
await this.ensureInitialized();
|
|
3123
|
+
const results = [];
|
|
3124
|
+
const basePath = prefix || "";
|
|
3125
|
+
const dir = basePath ? await this.getDirectoryHandle(basePath, false) : this.root;
|
|
3126
|
+
if (!dir)
|
|
3127
|
+
return results;
|
|
3128
|
+
const listDir = async (dirHandle, pathPrefix) => {
|
|
3129
|
+
for await (const entry of dirHandle.values()) {
|
|
3130
|
+
const entryPath = pathPrefix ? `${pathPrefix}/${entry.name}` : entry.name;
|
|
3131
|
+
if (entry.kind === "file") {
|
|
3132
|
+
results.push(entryPath);
|
|
3133
|
+
} else if (entry.kind === "directory") {
|
|
3134
|
+
await listDir(entry, entryPath);
|
|
3135
|
+
}
|
|
3136
|
+
}
|
|
3137
|
+
};
|
|
3138
|
+
await listDir(dir, basePath);
|
|
3139
|
+
return results;
|
|
3140
|
+
}
|
|
3141
|
+
async mkdir(path) {
|
|
3142
|
+
await this.ensureInitialized();
|
|
3143
|
+
await this.getDirectoryHandle(path, true);
|
|
3144
|
+
}
|
|
3145
|
+
async clear() {
|
|
3146
|
+
await this.ensureInitialized();
|
|
3147
|
+
for await (const entry of this.root.values()) {
|
|
3148
|
+
await this.root.removeEntry(entry.name, { recursive: true });
|
|
3149
|
+
}
|
|
3150
|
+
}
|
|
3151
|
+
static isAvailable() {
|
|
3152
|
+
return typeof navigator !== "undefined" && !!navigator.storage?.getDirectory;
|
|
3153
|
+
}
|
|
3154
|
+
}
|
|
3155
|
+
// src/storage/createStorageBackend.ts
|
|
3156
|
+
function detectEnvironment() {
|
|
3157
|
+
if (typeof Bun !== "undefined") {
|
|
3158
|
+
return "bun";
|
|
3159
|
+
}
|
|
3160
|
+
if (typeof window !== "undefined" && typeof navigator !== "undefined") {
|
|
3161
|
+
return "browser";
|
|
3162
|
+
}
|
|
3163
|
+
return "unknown";
|
|
3164
|
+
}
|
|
3165
|
+
async function createStorageBackend(options) {
|
|
3166
|
+
const type = options?.type ?? "auto";
|
|
3167
|
+
if (type === "bun") {
|
|
3168
|
+
const backend = new BunStorageBackend(options?.path ?? "./vectordb_data");
|
|
3169
|
+
await backend.init();
|
|
3170
|
+
return backend;
|
|
3171
|
+
}
|
|
3172
|
+
if (type === "opfs") {
|
|
3173
|
+
if (!OPFSBackend.isAvailable()) {
|
|
3174
|
+
throw new Error("OPFS not available in this environment");
|
|
3175
|
+
}
|
|
3176
|
+
const backend = new OPFSBackend;
|
|
3177
|
+
await backend.init();
|
|
3178
|
+
return backend;
|
|
3179
|
+
}
|
|
3180
|
+
if (type === "memory") {
|
|
3181
|
+
return new MemoryBackend;
|
|
3182
|
+
}
|
|
3183
|
+
const env = detectEnvironment();
|
|
3184
|
+
if (env === "bun") {
|
|
3185
|
+
const backend = new BunStorageBackend(options?.path ?? "./vectordb_data");
|
|
3186
|
+
await backend.init();
|
|
3187
|
+
return backend;
|
|
3188
|
+
}
|
|
3189
|
+
if (env === "browser") {
|
|
3190
|
+
if (OPFSBackend.isAvailable()) {
|
|
3191
|
+
try {
|
|
3192
|
+
const backend = new OPFSBackend;
|
|
3193
|
+
await backend.init();
|
|
3194
|
+
return backend;
|
|
3195
|
+
} catch {}
|
|
3196
|
+
}
|
|
3197
|
+
}
|
|
3198
|
+
return new MemoryBackend;
|
|
3199
|
+
}
|
|
3200
|
+
function getRecommendedStorageType() {
|
|
3201
|
+
const env = detectEnvironment();
|
|
3202
|
+
if (env === "bun") {
|
|
3203
|
+
return "bun";
|
|
3204
|
+
}
|
|
3205
|
+
if (env === "browser" && OPFSBackend.isAvailable()) {
|
|
3206
|
+
return "opfs";
|
|
3207
|
+
}
|
|
3208
|
+
return "memory";
|
|
3209
|
+
}
|
|
3210
|
+
function isStorageTypeAvailable(type) {
|
|
3211
|
+
switch (type) {
|
|
3212
|
+
case "bun":
|
|
3213
|
+
return typeof Bun !== "undefined";
|
|
3214
|
+
case "opfs":
|
|
3215
|
+
return OPFSBackend.isAvailable();
|
|
3216
|
+
case "memory":
|
|
3217
|
+
return true;
|
|
3218
|
+
case "auto":
|
|
3219
|
+
return true;
|
|
3220
|
+
default:
|
|
3221
|
+
return false;
|
|
3222
|
+
}
|
|
3223
|
+
}
|
|
3224
|
+
// src/storage/WriteAheadLog.ts
|
|
3225
|
+
var {appendFile: appendFile2, mkdir: mkdir3, unlink: unlink2} = (() => ({}));
|
|
3226
|
+
var WALOperationType;
|
|
3227
|
+
((WALOperationType2) => {
|
|
3228
|
+
WALOperationType2[WALOperationType2["ADD_VECTOR"] = 1] = "ADD_VECTOR";
|
|
3229
|
+
WALOperationType2[WALOperationType2["ADD_NEIGHBORS"] = 2] = "ADD_NEIGHBORS";
|
|
3230
|
+
WALOperationType2[WALOperationType2["UPDATE_ENTRY_POINT"] = 3] = "UPDATE_ENTRY_POINT";
|
|
3231
|
+
WALOperationType2[WALOperationType2["CHECKPOINT"] = 4] = "CHECKPOINT";
|
|
3232
|
+
})(WALOperationType ||= {});
|
|
3233
|
+
|
|
3234
|
+
class WriteAheadLog {
|
|
3235
|
+
logPath;
|
|
3236
|
+
pendingEntries = [];
|
|
3237
|
+
flushThreshold;
|
|
3238
|
+
lastFlushTime = 0;
|
|
3239
|
+
entryCount = 0;
|
|
3240
|
+
constructor(basePath, flushThreshold = 100) {
|
|
3241
|
+
this.logPath = `${basePath}.wal`;
|
|
3242
|
+
this.flushThreshold = flushThreshold;
|
|
3243
|
+
}
|
|
3244
|
+
getPath() {
|
|
3245
|
+
return this.logPath;
|
|
3246
|
+
}
|
|
3247
|
+
async exists() {
|
|
3248
|
+
const file = Bun.file(this.logPath);
|
|
3249
|
+
return file.exists();
|
|
3250
|
+
}
|
|
3251
|
+
async appendVector(id, vector) {
|
|
3252
|
+
const dataSize = 4 + 4 + vector.length * 4;
|
|
3253
|
+
const buffer = new ArrayBuffer(dataSize);
|
|
3254
|
+
const view = new DataView(buffer);
|
|
3255
|
+
view.setUint32(0, id, true);
|
|
3256
|
+
view.setUint32(4, vector.length, true);
|
|
3257
|
+
const floatView = new Float32Array(buffer, 8);
|
|
3258
|
+
floatView.set(vector);
|
|
3259
|
+
const entry = {
|
|
3260
|
+
type: 1 /* ADD_VECTOR */,
|
|
3261
|
+
timestamp: Date.now(),
|
|
3262
|
+
data: buffer
|
|
3263
|
+
};
|
|
3264
|
+
this.pendingEntries.push(entry);
|
|
3265
|
+
this.entryCount++;
|
|
3266
|
+
if (this.pendingEntries.length >= this.flushThreshold) {
|
|
3267
|
+
await this.flush();
|
|
3268
|
+
}
|
|
3269
|
+
}
|
|
3270
|
+
async appendNeighbors(nodeId, layer, neighbors) {
|
|
3271
|
+
const dataSize = 4 + 4 + 4 + neighbors.length * 4;
|
|
3272
|
+
const buffer = new ArrayBuffer(dataSize);
|
|
3273
|
+
const view = new DataView(buffer);
|
|
3274
|
+
view.setUint32(0, nodeId, true);
|
|
3275
|
+
view.setUint32(4, layer, true);
|
|
3276
|
+
view.setUint32(8, neighbors.length, true);
|
|
3277
|
+
let offset = 12;
|
|
3278
|
+
for (const neighbor of neighbors) {
|
|
3279
|
+
view.setUint32(offset, neighbor, true);
|
|
3280
|
+
offset += 4;
|
|
3281
|
+
}
|
|
3282
|
+
const entry = {
|
|
3283
|
+
type: 2 /* ADD_NEIGHBORS */,
|
|
3284
|
+
timestamp: Date.now(),
|
|
3285
|
+
data: buffer
|
|
3286
|
+
};
|
|
3287
|
+
this.pendingEntries.push(entry);
|
|
3288
|
+
this.entryCount++;
|
|
3289
|
+
if (this.pendingEntries.length >= this.flushThreshold) {
|
|
3290
|
+
await this.flush();
|
|
3291
|
+
}
|
|
3292
|
+
}
|
|
3293
|
+
async appendEntryPointUpdate(entryPointId, maxLevel) {
|
|
3294
|
+
const buffer = new ArrayBuffer(8);
|
|
3295
|
+
const view = new DataView(buffer);
|
|
3296
|
+
view.setInt32(0, entryPointId, true);
|
|
3297
|
+
view.setInt32(4, maxLevel, true);
|
|
3298
|
+
const entry = {
|
|
3299
|
+
type: 3 /* UPDATE_ENTRY_POINT */,
|
|
3300
|
+
timestamp: Date.now(),
|
|
3301
|
+
data: buffer
|
|
3302
|
+
};
|
|
3303
|
+
this.pendingEntries.push(entry);
|
|
3304
|
+
this.entryCount++;
|
|
3305
|
+
if (this.pendingEntries.length >= this.flushThreshold) {
|
|
3306
|
+
await this.flush();
|
|
3307
|
+
}
|
|
3308
|
+
}
|
|
3309
|
+
async checkpoint() {
|
|
3310
|
+
const buffer = new ArrayBuffer(8);
|
|
3311
|
+
const view = new DataView(buffer);
|
|
3312
|
+
view.setFloat64(0, Date.now(), true);
|
|
3313
|
+
const entry = {
|
|
3314
|
+
type: 4 /* CHECKPOINT */,
|
|
3315
|
+
timestamp: Date.now(),
|
|
3316
|
+
data: buffer
|
|
3317
|
+
};
|
|
3318
|
+
this.pendingEntries.push(entry);
|
|
3319
|
+
await this.flush();
|
|
3320
|
+
}
|
|
3321
|
+
serializeEntry(entry) {
|
|
3322
|
+
const headerSize = 1 + 8 + 4;
|
|
3323
|
+
const totalSize = headerSize + entry.data.byteLength;
|
|
3324
|
+
const buffer = new ArrayBuffer(totalSize);
|
|
3325
|
+
const view = new DataView(buffer);
|
|
3326
|
+
view.setUint8(0, entry.type);
|
|
3327
|
+
view.setFloat64(1, entry.timestamp, true);
|
|
3328
|
+
view.setUint32(9, entry.data.byteLength, true);
|
|
3329
|
+
const dataView = new Uint8Array(buffer, headerSize);
|
|
3330
|
+
dataView.set(new Uint8Array(entry.data));
|
|
3331
|
+
return new Uint8Array(buffer);
|
|
3332
|
+
}
|
|
3333
|
+
async flush() {
|
|
3334
|
+
if (this.pendingEntries.length === 0)
|
|
3335
|
+
return;
|
|
3336
|
+
const serializedEntries = this.pendingEntries.map((e) => this.serializeEntry(e));
|
|
3337
|
+
let totalSize = 0;
|
|
3338
|
+
for (const entry of serializedEntries) {
|
|
3339
|
+
totalSize += entry.length;
|
|
3340
|
+
}
|
|
3341
|
+
const combined = new Uint8Array(totalSize);
|
|
3342
|
+
let offset = 0;
|
|
3343
|
+
for (const entry of serializedEntries) {
|
|
3344
|
+
combined.set(entry, offset);
|
|
3345
|
+
offset += entry.length;
|
|
3346
|
+
}
|
|
3347
|
+
const dir = dirname(this.logPath);
|
|
3348
|
+
await mkdir3(dir, { recursive: true }).catch(() => {});
|
|
3349
|
+
await appendFile2(this.logPath, combined);
|
|
3350
|
+
this.pendingEntries = [];
|
|
3351
|
+
this.lastFlushTime = Date.now();
|
|
3352
|
+
}
|
|
3353
|
+
async readEntries() {
|
|
3354
|
+
const file = Bun.file(this.logPath);
|
|
3355
|
+
if (!await file.exists()) {
|
|
3356
|
+
return [];
|
|
3357
|
+
}
|
|
3358
|
+
const buffer = await file.arrayBuffer();
|
|
3359
|
+
const view = new DataView(buffer);
|
|
3360
|
+
const entries = [];
|
|
3361
|
+
let offset = 0;
|
|
3362
|
+
while (offset < buffer.byteLength) {
|
|
3363
|
+
const type = view.getUint8(offset);
|
|
3364
|
+
const timestamp = view.getFloat64(offset + 1, true);
|
|
3365
|
+
const dataLength = view.getUint32(offset + 9, true);
|
|
3366
|
+
const data = buffer.slice(offset + 13, offset + 13 + dataLength);
|
|
3367
|
+
entries.push({ type, timestamp, data });
|
|
3368
|
+
offset += 13 + dataLength;
|
|
3369
|
+
}
|
|
3370
|
+
return entries;
|
|
3371
|
+
}
|
|
3372
|
+
static parseVectorEntry(data) {
|
|
3373
|
+
const view = new DataView(data);
|
|
3374
|
+
const id = view.getUint32(0, true);
|
|
3375
|
+
const vectorLength = view.getUint32(4, true);
|
|
3376
|
+
const vector = new Float32Array(data, 8, vectorLength);
|
|
3377
|
+
return { id, vector };
|
|
3378
|
+
}
|
|
3379
|
+
static parseNeighborsEntry(data) {
|
|
3380
|
+
const view = new DataView(data);
|
|
3381
|
+
const nodeId = view.getUint32(0, true);
|
|
3382
|
+
const layer = view.getUint32(4, true);
|
|
3383
|
+
const neighborCount = view.getUint32(8, true);
|
|
3384
|
+
const neighbors = [];
|
|
3385
|
+
for (let i = 0;i < neighborCount; i++) {
|
|
3386
|
+
neighbors.push(view.getUint32(12 + i * 4, true));
|
|
3387
|
+
}
|
|
3388
|
+
return { nodeId, layer, neighbors };
|
|
3389
|
+
}
|
|
3390
|
+
static parseEntryPointEntry(data) {
|
|
3391
|
+
const view = new DataView(data);
|
|
3392
|
+
return {
|
|
3393
|
+
entryPointId: view.getInt32(0, true),
|
|
3394
|
+
maxLevel: view.getInt32(4, true)
|
|
3395
|
+
};
|
|
3396
|
+
}
|
|
3397
|
+
getEntryCount() {
|
|
3398
|
+
return this.entryCount;
|
|
3399
|
+
}
|
|
3400
|
+
async clear() {
|
|
3401
|
+
const file = Bun.file(this.logPath);
|
|
3402
|
+
if (await file.exists()) {
|
|
3403
|
+
await Bun.write(this.logPath, new Uint8Array(0));
|
|
3404
|
+
}
|
|
3405
|
+
this.pendingEntries = [];
|
|
3406
|
+
this.entryCount = 0;
|
|
3407
|
+
}
|
|
3408
|
+
async delete() {
|
|
3409
|
+
try {
|
|
3410
|
+
await unlink2(this.logPath);
|
|
3411
|
+
} catch {}
|
|
3412
|
+
this.pendingEntries = [];
|
|
3413
|
+
this.entryCount = 0;
|
|
3414
|
+
}
|
|
3415
|
+
}
|
|
3416
|
+
// src/storage/BatchWriter.ts
|
|
3417
|
+
class BatchWriter {
|
|
3418
|
+
backend;
|
|
3419
|
+
pendingWrites = new Map;
|
|
3420
|
+
pendingAppends = new Map;
|
|
3421
|
+
pendingBytes = 0;
|
|
3422
|
+
maxPendingWrites;
|
|
3423
|
+
maxPendingBytes;
|
|
3424
|
+
autoFlushInterval;
|
|
3425
|
+
flushTimer = null;
|
|
3426
|
+
isFlushing = false;
|
|
3427
|
+
flushPromise = null;
|
|
3428
|
+
constructor(backend, options = {}) {
|
|
3429
|
+
this.backend = backend;
|
|
3430
|
+
this.maxPendingWrites = options.maxPendingWrites ?? 100;
|
|
3431
|
+
this.maxPendingBytes = options.maxPendingBytes ?? 1024 * 1024;
|
|
3432
|
+
this.autoFlushInterval = options.autoFlushInterval ?? 0;
|
|
3433
|
+
if (this.autoFlushInterval > 0) {
|
|
3434
|
+
this.startAutoFlush();
|
|
3435
|
+
}
|
|
3436
|
+
}
|
|
3437
|
+
async write(key, data) {
|
|
3438
|
+
const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
|
|
3439
|
+
const existing = this.pendingWrites.get(key);
|
|
3440
|
+
if (existing) {
|
|
3441
|
+
this.pendingBytes -= existing.data.length;
|
|
3442
|
+
}
|
|
3443
|
+
const existingAppends = this.pendingAppends.get(key);
|
|
3444
|
+
if (existingAppends) {
|
|
3445
|
+
for (const append of existingAppends) {
|
|
3446
|
+
this.pendingBytes -= append.length;
|
|
3447
|
+
}
|
|
3448
|
+
this.pendingAppends.delete(key);
|
|
3449
|
+
}
|
|
3450
|
+
this.pendingWrites.set(key, {
|
|
3451
|
+
key,
|
|
3452
|
+
data: bytes,
|
|
3453
|
+
append: false
|
|
3454
|
+
});
|
|
3455
|
+
this.pendingBytes += bytes.length;
|
|
3456
|
+
await this.checkThresholds();
|
|
3457
|
+
}
|
|
3458
|
+
async append(key, data) {
|
|
3459
|
+
const bytes = data instanceof ArrayBuffer ? new Uint8Array(data) : data;
|
|
3460
|
+
const existingWrite = this.pendingWrites.get(key);
|
|
3461
|
+
if (existingWrite) {
|
|
3462
|
+
const newData = new Uint8Array(existingWrite.data.length + bytes.length);
|
|
3463
|
+
newData.set(existingWrite.data);
|
|
3464
|
+
newData.set(bytes, existingWrite.data.length);
|
|
3465
|
+
this.pendingBytes -= existingWrite.data.length;
|
|
3466
|
+
this.pendingBytes += newData.length;
|
|
3467
|
+
existingWrite.data = newData;
|
|
3468
|
+
} else {
|
|
3469
|
+
let appends = this.pendingAppends.get(key);
|
|
3470
|
+
if (!appends) {
|
|
3471
|
+
appends = [];
|
|
3472
|
+
this.pendingAppends.set(key, appends);
|
|
3473
|
+
}
|
|
3474
|
+
appends.push(bytes);
|
|
3475
|
+
this.pendingBytes += bytes.length;
|
|
3476
|
+
}
|
|
3477
|
+
await this.checkThresholds();
|
|
3478
|
+
}
|
|
3479
|
+
async delete(key) {
|
|
3480
|
+
const existing = this.pendingWrites.get(key);
|
|
3481
|
+
if (existing) {
|
|
3482
|
+
this.pendingBytes -= existing.data.length;
|
|
3483
|
+
this.pendingWrites.delete(key);
|
|
3484
|
+
}
|
|
3485
|
+
const existingAppends = this.pendingAppends.get(key);
|
|
3486
|
+
if (existingAppends) {
|
|
3487
|
+
for (const append of existingAppends) {
|
|
3488
|
+
this.pendingBytes -= append.length;
|
|
3489
|
+
}
|
|
3490
|
+
this.pendingAppends.delete(key);
|
|
3491
|
+
}
|
|
3492
|
+
await this.backend.delete(key);
|
|
3493
|
+
}
|
|
3494
|
+
async read(key) {
|
|
3495
|
+
const pending = this.pendingWrites.get(key);
|
|
3496
|
+
if (pending) {
|
|
3497
|
+
const buffer = pending.data.buffer.slice(pending.data.byteOffset, pending.data.byteOffset + pending.data.length);
|
|
3498
|
+
return buffer;
|
|
3499
|
+
}
|
|
3500
|
+
const appends = this.pendingAppends.get(key);
|
|
3501
|
+
if (appends && appends.length > 0) {
|
|
3502
|
+
const existing = await this.backend.read(key);
|
|
3503
|
+
const existingBytes = existing ? new Uint8Array(existing) : new Uint8Array(0);
|
|
3504
|
+
let totalSize = existingBytes.length;
|
|
3505
|
+
for (const append of appends) {
|
|
3506
|
+
totalSize += append.length;
|
|
3507
|
+
}
|
|
3508
|
+
const result = new Uint8Array(totalSize);
|
|
3509
|
+
result.set(existingBytes);
|
|
3510
|
+
let offset = existingBytes.length;
|
|
3511
|
+
for (const append of appends) {
|
|
3512
|
+
result.set(append, offset);
|
|
3513
|
+
offset += append.length;
|
|
3514
|
+
}
|
|
3515
|
+
return result.buffer;
|
|
3516
|
+
}
|
|
3517
|
+
return this.backend.read(key);
|
|
3518
|
+
}
|
|
3519
|
+
async checkThresholds() {
|
|
3520
|
+
const totalWrites = this.pendingWrites.size + this.pendingAppends.size;
|
|
3521
|
+
if (totalWrites >= this.maxPendingWrites || this.pendingBytes >= this.maxPendingBytes) {
|
|
3522
|
+
await this.flush();
|
|
3523
|
+
}
|
|
3524
|
+
}
|
|
3525
|
+
async flush() {
|
|
3526
|
+
if (this.isFlushing && this.flushPromise) {
|
|
3527
|
+
await this.flushPromise;
|
|
3528
|
+
return;
|
|
3529
|
+
}
|
|
3530
|
+
if (this.pendingWrites.size === 0 && this.pendingAppends.size === 0) {
|
|
3531
|
+
return;
|
|
3532
|
+
}
|
|
3533
|
+
this.isFlushing = true;
|
|
3534
|
+
this.flushPromise = this.doFlush();
|
|
3535
|
+
try {
|
|
3536
|
+
await this.flushPromise;
|
|
3537
|
+
} finally {
|
|
3538
|
+
this.isFlushing = false;
|
|
3539
|
+
this.flushPromise = null;
|
|
3540
|
+
}
|
|
3541
|
+
}
|
|
3542
|
+
async doFlush() {
|
|
3543
|
+
const writes = Array.from(this.pendingWrites.values());
|
|
3544
|
+
const appends = Array.from(this.pendingAppends.entries());
|
|
3545
|
+
this.pendingWrites.clear();
|
|
3546
|
+
this.pendingAppends.clear();
|
|
3547
|
+
this.pendingBytes = 0;
|
|
3548
|
+
const promises = [];
|
|
3549
|
+
for (const write of writes) {
|
|
3550
|
+
promises.push(this.backend.write(write.key, write.data));
|
|
3551
|
+
}
|
|
3552
|
+
for (const [key, chunks] of appends) {
|
|
3553
|
+
let totalSize = 0;
|
|
3554
|
+
for (const chunk of chunks) {
|
|
3555
|
+
totalSize += chunk.length;
|
|
3556
|
+
}
|
|
3557
|
+
const combined = new Uint8Array(totalSize);
|
|
3558
|
+
let offset = 0;
|
|
3559
|
+
for (const chunk of chunks) {
|
|
3560
|
+
combined.set(chunk, offset);
|
|
3561
|
+
offset += chunk.length;
|
|
3562
|
+
}
|
|
3563
|
+
promises.push(this.backend.append(key, combined));
|
|
3564
|
+
}
|
|
3565
|
+
await Promise.all(promises);
|
|
3566
|
+
}
|
|
3567
|
+
getStats() {
|
|
3568
|
+
return {
|
|
3569
|
+
pendingWrites: this.pendingWrites.size,
|
|
3570
|
+
pendingAppends: this.pendingAppends.size,
|
|
3571
|
+
pendingBytes: this.pendingBytes,
|
|
3572
|
+
maxPendingWrites: this.maxPendingWrites,
|
|
3573
|
+
maxPendingBytes: this.maxPendingBytes
|
|
3574
|
+
};
|
|
3575
|
+
}
|
|
3576
|
+
hasPendingWrites() {
|
|
3577
|
+
return this.pendingWrites.size > 0 || this.pendingAppends.size > 0;
|
|
3578
|
+
}
|
|
3579
|
+
startAutoFlush() {
|
|
3580
|
+
if (this.flushTimer)
|
|
3581
|
+
return;
|
|
3582
|
+
this.flushTimer = setInterval(async () => {
|
|
3583
|
+
if (this.hasPendingWrites()) {
|
|
3584
|
+
await this.flush();
|
|
3585
|
+
}
|
|
3586
|
+
}, this.autoFlushInterval);
|
|
3587
|
+
}
|
|
3588
|
+
stopAutoFlush() {
|
|
3589
|
+
if (this.flushTimer) {
|
|
3590
|
+
clearInterval(this.flushTimer);
|
|
3591
|
+
this.flushTimer = null;
|
|
3592
|
+
}
|
|
3593
|
+
}
|
|
3594
|
+
async close() {
|
|
3595
|
+
this.stopAutoFlush();
|
|
3596
|
+
await this.flush();
|
|
3597
|
+
}
|
|
3598
|
+
getBackend() {
|
|
3599
|
+
return this.backend;
|
|
3600
|
+
}
|
|
3601
|
+
}
|
|
3602
|
+
function createBatchWriter(backend, options) {
|
|
3603
|
+
return new BatchWriter(backend, options);
|
|
3604
|
+
}
|
|
3605
|
+
// src/errors.ts
|
|
3606
|
+
class VectorDBError extends Error {
|
|
3607
|
+
code;
|
|
3608
|
+
constructor(message, code) {
|
|
3609
|
+
super(message);
|
|
3610
|
+
this.name = "VectorDBError";
|
|
3611
|
+
this.code = code;
|
|
3612
|
+
}
|
|
3613
|
+
}
|
|
3614
|
+
|
|
3615
|
+
class DimensionMismatchError extends VectorDBError {
|
|
3616
|
+
expected;
|
|
3617
|
+
actual;
|
|
3618
|
+
constructor(expected, actual, context) {
|
|
3619
|
+
const message = context ? `${context}: expected dimension ${expected}, got ${actual}` : `Dimension mismatch: expected ${expected}, got ${actual}`;
|
|
3620
|
+
super(message, "DIMENSION_MISMATCH");
|
|
3621
|
+
this.name = "DimensionMismatchError";
|
|
3622
|
+
this.expected = expected;
|
|
3623
|
+
this.actual = actual;
|
|
3624
|
+
}
|
|
3625
|
+
}
|
|
3626
|
+
|
|
3627
|
+
class DuplicateVectorError extends VectorDBError {
|
|
3628
|
+
ids;
|
|
3629
|
+
constructor(ids) {
|
|
3630
|
+
const message = ids.length === 1 ? `Vector with ID '${ids[0]}' already exists` : `Vectors with IDs already exist: ${ids.join(", ")}`;
|
|
3631
|
+
super(message, "DUPLICATE_VECTOR");
|
|
3632
|
+
this.name = "DuplicateVectorError";
|
|
3633
|
+
this.ids = ids;
|
|
3634
|
+
}
|
|
3635
|
+
}
|
|
3636
|
+
|
|
3637
|
+
class CollectionNotFoundError extends VectorDBError {
|
|
3638
|
+
collectionName;
|
|
3639
|
+
constructor(collectionName) {
|
|
3640
|
+
super(`Collection '${collectionName}' does not exist`, "COLLECTION_NOT_FOUND");
|
|
3641
|
+
this.name = "CollectionNotFoundError";
|
|
3642
|
+
this.collectionName = collectionName;
|
|
3643
|
+
}
|
|
3644
|
+
}
|
|
3645
|
+
|
|
3646
|
+
class CollectionExistsError extends VectorDBError {
|
|
3647
|
+
collectionName;
|
|
3648
|
+
constructor(collectionName) {
|
|
3649
|
+
super(`Collection '${collectionName}' already exists`, "COLLECTION_EXISTS");
|
|
3650
|
+
this.name = "CollectionExistsError";
|
|
3651
|
+
this.collectionName = collectionName;
|
|
3652
|
+
}
|
|
3653
|
+
}
|
|
3654
|
+
|
|
3655
|
+
class StorageError extends VectorDBError {
|
|
3656
|
+
operation;
|
|
3657
|
+
path;
|
|
3658
|
+
constructor(operation, message, path) {
|
|
3659
|
+
super(`Storage ${operation} failed: ${message}`, "STORAGE_ERROR");
|
|
3660
|
+
this.name = "StorageError";
|
|
3661
|
+
this.operation = operation;
|
|
3662
|
+
this.path = path;
|
|
3663
|
+
}
|
|
3664
|
+
}
|
|
3665
|
+
|
|
3666
|
+
class QuantizationError extends VectorDBError {
|
|
3667
|
+
constructor(message) {
|
|
3668
|
+
super(message, "QUANTIZATION_ERROR");
|
|
3669
|
+
this.name = "QuantizationError";
|
|
3670
|
+
}
|
|
3671
|
+
}
|
|
3672
|
+
|
|
3673
|
+
class VectorNotFoundError extends VectorDBError {
|
|
3674
|
+
vectorId;
|
|
3675
|
+
constructor(vectorId) {
|
|
3676
|
+
super(`Vector '${vectorId}' not found`, "VECTOR_NOT_FOUND");
|
|
3677
|
+
this.name = "VectorNotFoundError";
|
|
3678
|
+
this.vectorId = vectorId;
|
|
3679
|
+
}
|
|
3680
|
+
}
|
|
3681
|
+
export {
|
|
3682
|
+
normalizeInPlace,
|
|
3683
|
+
l2SquaredInt8,
|
|
3684
|
+
l2SquaredFast,
|
|
3685
|
+
isStorageTypeAvailable,
|
|
3686
|
+
getRecommendedStorageType,
|
|
3687
|
+
getRecommendedPreset,
|
|
3688
|
+
getRAGPreset,
|
|
3689
|
+
getPreset,
|
|
3690
|
+
encodeVarint,
|
|
3691
|
+
dotProductInt8,
|
|
3692
|
+
dotProductFast,
|
|
3693
|
+
deltaEncodedSize,
|
|
3694
|
+
deltaEncodeNeighbors,
|
|
3695
|
+
deltaDecodeNeighbors,
|
|
3696
|
+
decodeVarint,
|
|
3697
|
+
createStorageBackend,
|
|
3698
|
+
createBatchWriter,
|
|
3699
|
+
cosineDistanceInt8,
|
|
3700
|
+
cosineDistanceFast,
|
|
3701
|
+
WriteAheadLog,
|
|
3702
|
+
WALOperationType,
|
|
3703
|
+
VectorNotFoundError,
|
|
3704
|
+
VectorDBError,
|
|
3705
|
+
VectorDB,
|
|
3706
|
+
StorageError,
|
|
3707
|
+
ScalarQuantizer,
|
|
3708
|
+
QuantizedVectorStore,
|
|
3709
|
+
QuantizationError,
|
|
3710
|
+
PRESET_VERY_HIGH_DIM,
|
|
3711
|
+
PRESET_SMALL_DATASET,
|
|
3712
|
+
PRESET_MEDIUM_DIM,
|
|
3713
|
+
PRESET_MAX_RECALL,
|
|
3714
|
+
PRESET_LOW_LATENCY,
|
|
3715
|
+
PRESET_LOW_DIM,
|
|
3716
|
+
PRESET_LARGE_DATASET,
|
|
3717
|
+
PRESET_HIGH_DIM,
|
|
3718
|
+
PRESETS,
|
|
3719
|
+
OPFSBackend,
|
|
3720
|
+
MemoryBackend,
|
|
3721
|
+
MaxBinaryHeap,
|
|
3722
|
+
JsDistanceBackend,
|
|
3723
|
+
HNSWIndex,
|
|
3724
|
+
DuplicateVectorError,
|
|
3725
|
+
DimensionMismatchError,
|
|
3726
|
+
CollectionNotFoundError,
|
|
3727
|
+
CollectionExistsError,
|
|
3728
|
+
Collection,
|
|
3729
|
+
BunStorageBackend,
|
|
3730
|
+
BinaryHeap,
|
|
3731
|
+
BatchWriter
|
|
3732
|
+
};
|