eigen-db 4.3.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +74 -23
- package/dist/compute.d.ts +20 -0
- package/dist/eigen-db.js +228 -173
- package/dist/eigen-db.js.map +1 -1
- package/dist/eigen-db.umd.cjs +1 -1
- package/dist/eigen-db.umd.cjs.map +1 -1
- package/dist/errors.d.ts +7 -0
- package/dist/index.d.ts +12 -0
- package/dist/lexicon.d.ts +28 -0
- package/dist/memory-manager.d.ts +68 -0
- package/dist/result-set.d.ts +46 -0
- package/dist/simd-binary.d.ts +1 -0
- package/dist/storage.d.ts +38 -0
- package/dist/types.d.ts +48 -0
- package/dist/vector-db.d.ts +131 -0
- package/dist/wasm-compute.d.ts +13 -0
- package/package.json +4 -4
- package/src/lib/__tests__/result-set.test.ts +146 -27
- package/src/lib/__tests__/vector-db.test.ts +476 -4
- package/src/lib/result-set.ts +55 -24
- package/src/lib/types.ts +5 -1
- package/src/lib/vector-db.ts +99 -20
package/src/lib/vector-db.ts
CHANGED
|
@@ -17,7 +17,7 @@ import { VectorCapacityExceededError } from "./errors";
|
|
|
17
17
|
import { decodeLexicon, encodeLexicon } from "./lexicon";
|
|
18
18
|
import { MemoryManager } from "./memory-manager";
|
|
19
19
|
import type { ResultItem } from "./result-set";
|
|
20
|
-
import { iterableResults,
|
|
20
|
+
import { iterableResults, queryResults } from "./result-set";
|
|
21
21
|
import { getSimdWasmBinary } from "./simd-binary";
|
|
22
22
|
import type { StorageProvider } from "./storage";
|
|
23
23
|
import { InMemoryStorageProvider } from "./storage";
|
|
@@ -38,7 +38,7 @@ const STREAM_CHUNK_SIZE = 65536;
|
|
|
38
38
|
export class VectorDB {
|
|
39
39
|
private readonly memoryManager: MemoryManager;
|
|
40
40
|
private readonly storage: StorageProvider;
|
|
41
|
-
private readonly
|
|
41
|
+
private readonly _dimensions: number;
|
|
42
42
|
private readonly shouldNormalize: boolean;
|
|
43
43
|
private wasmExports: WasmExports | null;
|
|
44
44
|
|
|
@@ -62,7 +62,7 @@ export class VectorDB {
|
|
|
62
62
|
) {
|
|
63
63
|
this.memoryManager = memoryManager;
|
|
64
64
|
this.storage = storage;
|
|
65
|
-
this.
|
|
65
|
+
this._dimensions = dimensions;
|
|
66
66
|
this.shouldNormalize = shouldNormalize;
|
|
67
67
|
this.wasmExports = wasmExports;
|
|
68
68
|
this.keyToSlot = keyToSlot;
|
|
@@ -122,6 +122,81 @@ export class VectorDB {
|
|
|
122
122
|
return this.keyToSlot.size;
|
|
123
123
|
}
|
|
124
124
|
|
|
125
|
+
/** Number of dimensions per vector */
|
|
126
|
+
get dimensions(): number {
|
|
127
|
+
return this._dimensions;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Check whether a key exists in the database.
|
|
132
|
+
* Uses the internal key-to-slot map for O(1) lookup.
|
|
133
|
+
*/
|
|
134
|
+
has(key: string): boolean {
|
|
135
|
+
this.assertOpen();
|
|
136
|
+
return this.keyToSlot.has(key);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Delete an entry by key. Returns true if the key existed, false otherwise.
|
|
141
|
+
* Uses swap-and-pop to avoid gaps in the underlying vector array.
|
|
142
|
+
*/
|
|
143
|
+
delete(key: string): boolean {
|
|
144
|
+
this.assertOpen();
|
|
145
|
+
|
|
146
|
+
const slot = this.keyToSlot.get(key);
|
|
147
|
+
if (slot === undefined) return false;
|
|
148
|
+
|
|
149
|
+
const lastSlot = this.memoryManager.vectorCount - 1;
|
|
150
|
+
|
|
151
|
+
if (slot !== lastSlot) {
|
|
152
|
+
// Move last vector into the deleted slot
|
|
153
|
+
const lastVector = new Float32Array(this.memoryManager.readVector(lastSlot));
|
|
154
|
+
this.memoryManager.writeVector(slot, lastVector);
|
|
155
|
+
|
|
156
|
+
// Update mappings for the moved key
|
|
157
|
+
const movedKey = this.slotToKey[lastSlot];
|
|
158
|
+
this.keyToSlot.set(movedKey, slot);
|
|
159
|
+
this.slotToKey[slot] = movedKey;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Remove the deleted key and shrink
|
|
163
|
+
this.keyToSlot.delete(key);
|
|
164
|
+
this.slotToKey.length = lastSlot;
|
|
165
|
+
this.memoryManager.setVectorCount(lastSlot);
|
|
166
|
+
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Returns an iterable of all keys in the database.
|
|
172
|
+
*/
|
|
173
|
+
keys(): IterableIterator<string> {
|
|
174
|
+
this.assertOpen();
|
|
175
|
+
return this.keyToSlot.keys();
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Returns an iterable of [key, value] pairs.
|
|
180
|
+
* Values are returned as plain number array copies.
|
|
181
|
+
*/
|
|
182
|
+
entries(): IterableIterator<[string, number[]]> {
|
|
183
|
+
this.assertOpen();
|
|
184
|
+
const keyToSlot = this.keyToSlot;
|
|
185
|
+
const mm = this.memoryManager;
|
|
186
|
+
return (function* () {
|
|
187
|
+
for (const [key, slot] of keyToSlot) {
|
|
188
|
+
yield [key, Array.from(mm.readVector(slot))] as [string, number[]];
|
|
189
|
+
}
|
|
190
|
+
})();
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Implements the iterable protocol. Same as entries().
|
|
195
|
+
*/
|
|
196
|
+
[Symbol.iterator](): IterableIterator<[string, number[]]> {
|
|
197
|
+
return this.entries();
|
|
198
|
+
}
|
|
199
|
+
|
|
125
200
|
/**
|
|
126
201
|
* Set a key-value pair. If the key already exists, its vector is overwritten (last-write-wins).
|
|
127
202
|
* The value is a number[] or Float32Array of length equal to the configured dimensions.
|
|
@@ -129,8 +204,8 @@ export class VectorDB {
|
|
|
129
204
|
set(key: string, value: VectorInput, options?: SetOptions): void {
|
|
130
205
|
this.assertOpen();
|
|
131
206
|
|
|
132
|
-
if (value.length !== this.
|
|
133
|
-
throw new Error(`Vector dimension mismatch: expected ${this.
|
|
207
|
+
if (value.length !== this._dimensions) {
|
|
208
|
+
throw new Error(`Vector dimension mismatch: expected ${this._dimensions}, got ${value.length}`);
|
|
134
209
|
}
|
|
135
210
|
|
|
136
211
|
// Convert to Float32Array (also clones to avoid mutating caller's array)
|
|
@@ -206,16 +281,18 @@ export class VectorDB {
|
|
|
206
281
|
query(value: VectorInput, options?: QueryOptions): ResultItem[] | Iterable<ResultItem> {
|
|
207
282
|
this.assertOpen();
|
|
208
283
|
|
|
209
|
-
const
|
|
284
|
+
const limit = options?.limit ?? Infinity;
|
|
285
|
+
const order = options?.order ?? "descend";
|
|
210
286
|
const minSimilarity = options?.minSimilarity;
|
|
287
|
+
const maxSimilarity = options?.maxSimilarity;
|
|
211
288
|
const iterable = options && "iterable" in options && options.iterable;
|
|
212
289
|
|
|
213
290
|
if (this.size === 0) {
|
|
214
291
|
return [];
|
|
215
292
|
}
|
|
216
293
|
|
|
217
|
-
if (value.length !== this.
|
|
218
|
-
throw new Error(`Query vector dimension mismatch: expected ${this.
|
|
294
|
+
if (value.length !== this._dimensions) {
|
|
295
|
+
throw new Error(`Query vector dimension mismatch: expected ${this._dimensions}, got ${value.length}`);
|
|
219
296
|
}
|
|
220
297
|
|
|
221
298
|
// Convert to Float32Array and optionally normalize the query vector
|
|
@@ -242,21 +319,21 @@ export class VectorDB {
|
|
|
242
319
|
this.memoryManager.dbOffset,
|
|
243
320
|
scoresOffset,
|
|
244
321
|
totalVectors,
|
|
245
|
-
this.
|
|
322
|
+
this._dimensions,
|
|
246
323
|
);
|
|
247
324
|
} else {
|
|
248
325
|
const queryView = new Float32Array(
|
|
249
326
|
this.memoryManager.memory.buffer,
|
|
250
327
|
this.memoryManager.queryOffset,
|
|
251
|
-
this.
|
|
328
|
+
this._dimensions,
|
|
252
329
|
);
|
|
253
330
|
const dbView = new Float32Array(
|
|
254
331
|
this.memoryManager.memory.buffer,
|
|
255
332
|
this.memoryManager.dbOffset,
|
|
256
|
-
totalVectors * this.
|
|
333
|
+
totalVectors * this._dimensions,
|
|
257
334
|
);
|
|
258
335
|
const scoresView = new Float32Array(this.memoryManager.memory.buffer, scoresOffset, totalVectors);
|
|
259
|
-
searchAll(queryView, dbView, scoresView, totalVectors, this.
|
|
336
|
+
searchAll(queryView, dbView, scoresView, totalVectors, this._dimensions);
|
|
260
337
|
}
|
|
261
338
|
|
|
262
339
|
// Read scores (make a copy so the buffer can be reused)
|
|
@@ -268,10 +345,12 @@ export class VectorDB {
|
|
|
268
345
|
return slotToKey[slotIndex];
|
|
269
346
|
};
|
|
270
347
|
|
|
348
|
+
const resultOptions = { limit, order, minSimilarity, maxSimilarity } as const;
|
|
349
|
+
|
|
271
350
|
if (iterable) {
|
|
272
|
-
return iterableResults(scores, resolveKey,
|
|
351
|
+
return iterableResults(scores, resolveKey, resultOptions);
|
|
273
352
|
}
|
|
274
|
-
return
|
|
353
|
+
return queryResults(scores, resolveKey, resultOptions);
|
|
275
354
|
}
|
|
276
355
|
|
|
277
356
|
/**
|
|
@@ -283,12 +362,12 @@ export class VectorDB {
|
|
|
283
362
|
const totalVectors = this.memoryManager.vectorCount;
|
|
284
363
|
|
|
285
364
|
// Serialize vectors from WASM memory
|
|
286
|
-
const vectorBytes = new Uint8Array(totalVectors * this.
|
|
365
|
+
const vectorBytes = new Uint8Array(totalVectors * this._dimensions * 4);
|
|
287
366
|
if (totalVectors > 0) {
|
|
288
367
|
const src = new Uint8Array(
|
|
289
368
|
this.memoryManager.memory.buffer,
|
|
290
369
|
this.memoryManager.dbOffset,
|
|
291
|
-
totalVectors * this.
|
|
370
|
+
totalVectors * this._dimensions * 4,
|
|
292
371
|
);
|
|
293
372
|
vectorBytes.set(src);
|
|
294
373
|
}
|
|
@@ -335,7 +414,7 @@ export class VectorDB {
|
|
|
335
414
|
this.assertOpen();
|
|
336
415
|
|
|
337
416
|
const totalVectors = this.memoryManager.vectorCount;
|
|
338
|
-
const vectorDataLen = totalVectors * this.
|
|
417
|
+
const vectorDataLen = totalVectors * this._dimensions * 4;
|
|
339
418
|
|
|
340
419
|
// Encode keys (typically much smaller than vectors)
|
|
341
420
|
const keysBytes = encodeLexicon(this.slotToKey);
|
|
@@ -346,7 +425,7 @@ export class VectorDB {
|
|
|
346
425
|
const headerView = new DataView(header);
|
|
347
426
|
headerView.setUint32(0, EXPORT_MAGIC, true);
|
|
348
427
|
headerView.setUint32(4, EXPORT_VERSION, true);
|
|
349
|
-
headerView.setUint32(8, this.
|
|
428
|
+
headerView.setUint32(8, this._dimensions, true);
|
|
350
429
|
headerView.setUint32(12, totalVectors, true);
|
|
351
430
|
headerView.setUint32(16, vectorDataLen, true);
|
|
352
431
|
headerView.setUint32(20, keysDataLen, true);
|
|
@@ -415,8 +494,8 @@ export class VectorDB {
|
|
|
415
494
|
}
|
|
416
495
|
|
|
417
496
|
const dimensions = headerView.getUint32(8, true);
|
|
418
|
-
if (dimensions !== this.
|
|
419
|
-
throw new Error(`Import dimension mismatch: expected ${this.
|
|
497
|
+
if (dimensions !== this._dimensions) {
|
|
498
|
+
throw new Error(`Import dimension mismatch: expected ${this._dimensions}, got ${dimensions}`);
|
|
420
499
|
}
|
|
421
500
|
|
|
422
501
|
const vectorCount = headerView.getUint32(12, true);
|