eigen-db 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -0
- package/dist/eigen-db.js +446 -0
- package/dist/eigen-db.js.map +1 -0
- package/dist/eigen-db.umd.cjs +2 -0
- package/dist/eigen-db.umd.cjs.map +1 -0
- package/package.json +27 -0
- package/src/lib/__tests__/compute.bench.ts +70 -0
- package/src/lib/__tests__/compute.test.ts +121 -0
- package/src/lib/__tests__/lexicon.test.ts +96 -0
- package/src/lib/__tests__/memory-manager.test.ts +94 -0
- package/src/lib/__tests__/result-set.test.ts +90 -0
- package/src/lib/__tests__/vector-db.test.ts +443 -0
- package/src/lib/__tests__/wasm-compute.test.ts +152 -0
- package/src/lib/compute.ts +48 -0
- package/src/lib/errors.ts +10 -0
- package/src/lib/index.ts +14 -0
- package/src/lib/lexicon.ts +95 -0
- package/src/lib/memory-manager.ts +147 -0
- package/src/lib/result-set.ts +89 -0
- package/src/lib/simd-binary.ts +11 -0
- package/src/lib/simd.wat +220 -0
- package/src/lib/storage.ts +111 -0
- package/src/lib/types.ts +39 -0
- package/src/lib/vector-db.ts +346 -0
- package/src/lib/wasm-compute.ts +41 -0
package/src/lib/types.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Options for opening a VectorDB instance.
|
|
3
|
+
*/
|
|
4
|
+
export interface OpenOptions {
|
|
5
|
+
/** Name of the OPFS directory. Defaults to "default". */
|
|
6
|
+
name?: string;
|
|
7
|
+
/** Vector dimensions (e.g., 1536 for OpenAI text-embedding-3-small) */
|
|
8
|
+
dimensions: number;
|
|
9
|
+
/** Whether to normalize vectors on set/query (default: true). */
|
|
10
|
+
normalize?: boolean;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Extended options including internal overrides for testing/advanced use.
|
|
15
|
+
*/
|
|
16
|
+
export interface OpenOptionsInternal extends OpenOptions {
|
|
17
|
+
/** Override the storage provider (default: OPFS). Useful for testing. */
|
|
18
|
+
storage?: import("./storage").StorageProvider;
|
|
19
|
+
/** Pre-compiled WASM binary. If not provided, uses the embedded SIMD binary. Set to null to force JS-only mode. */
|
|
20
|
+
wasmBinary?: Uint8Array | null;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Options for set/setMany operations.
|
|
25
|
+
*/
|
|
26
|
+
export interface SetOptions {
|
|
27
|
+
/** Override normalization for this call. */
|
|
28
|
+
normalize?: boolean;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Options for query operations.
|
|
33
|
+
*/
|
|
34
|
+
export interface QueryOptions {
|
|
35
|
+
/** Maximum number of results to return. Defaults to all. */
|
|
36
|
+
topK?: number;
|
|
37
|
+
/** Override normalization for this call. */
|
|
38
|
+
normalize?: boolean;
|
|
39
|
+
}
|
|
@@ -0,0 +1,346 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VectorDB — Key-Value Vector Database
|
|
3
|
+
*
|
|
4
|
+
* Decoupled from embedding providers. Users pass pre-computed vectors
|
|
5
|
+
* as Float32Array values with string keys.
|
|
6
|
+
*
|
|
7
|
+
* Supports:
|
|
8
|
+
* - set/get/setMany/getMany for key-value CRUD
|
|
9
|
+
* - query for similarity search (dot product on normalized vectors)
|
|
10
|
+
* - flush to persist, close to flush+release, clear to wipe
|
|
11
|
+
* - Last-write-wins semantics for duplicate keys (append-only storage)
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { normalize, searchAll } from "./compute";
|
|
15
|
+
import { VectorCapacityExceededError } from "./errors";
|
|
16
|
+
import { encodeLexicon, decodeLexicon } from "./lexicon";
|
|
17
|
+
import { MemoryManager } from "./memory-manager";
|
|
18
|
+
import { ResultSet } from "./result-set";
|
|
19
|
+
import { getSimdWasmBinary } from "./simd-binary";
|
|
20
|
+
import type { StorageProvider } from "./storage";
|
|
21
|
+
import { OPFSStorageProvider } from "./storage";
|
|
22
|
+
import type { OpenOptions, OpenOptionsInternal, SetOptions, QueryOptions } from "./types";
|
|
23
|
+
import { instantiateWasm, type WasmExports } from "./wasm-compute";
|
|
24
|
+
|
|
25
|
+
const VECTORS_FILE = "vectors.bin";
|
|
26
|
+
const KEYS_FILE = "keys.bin";
|
|
27
|
+
|
|
28
|
+
export class VectorDB {
|
|
29
|
+
private readonly memoryManager: MemoryManager;
|
|
30
|
+
private readonly storage: StorageProvider;
|
|
31
|
+
private readonly dimensions: number;
|
|
32
|
+
private readonly shouldNormalize: boolean;
|
|
33
|
+
private wasmExports: WasmExports | null;
|
|
34
|
+
|
|
35
|
+
/** Maps key to its slot index in the vector array */
|
|
36
|
+
private keyToSlot: Map<string, number>;
|
|
37
|
+
|
|
38
|
+
/** Maps slot index back to its key */
|
|
39
|
+
private slotToKey: string[];
|
|
40
|
+
|
|
41
|
+
/** Whether this instance has been closed */
|
|
42
|
+
private closed = false;
|
|
43
|
+
|
|
44
|
+
private constructor(
|
|
45
|
+
memoryManager: MemoryManager,
|
|
46
|
+
storage: StorageProvider,
|
|
47
|
+
dimensions: number,
|
|
48
|
+
shouldNormalize: boolean,
|
|
49
|
+
wasmExports: WasmExports | null,
|
|
50
|
+
keyToSlot: Map<string, number>,
|
|
51
|
+
slotToKey: string[],
|
|
52
|
+
) {
|
|
53
|
+
this.memoryManager = memoryManager;
|
|
54
|
+
this.storage = storage;
|
|
55
|
+
this.dimensions = dimensions;
|
|
56
|
+
this.shouldNormalize = shouldNormalize;
|
|
57
|
+
this.wasmExports = wasmExports;
|
|
58
|
+
this.keyToSlot = keyToSlot;
|
|
59
|
+
this.slotToKey = slotToKey;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Opens a VectorDB instance.
|
|
64
|
+
* Loads existing data from storage into WASM memory.
|
|
65
|
+
*/
|
|
66
|
+
static async open(options: OpenOptions): Promise<VectorDB>;
|
|
67
|
+
static async open(options: OpenOptionsInternal): Promise<VectorDB>;
|
|
68
|
+
static async open(options: OpenOptionsInternal): Promise<VectorDB> {
|
|
69
|
+
const name = options.name ?? "default";
|
|
70
|
+
const storage = options.storage ?? new OPFSStorageProvider(name);
|
|
71
|
+
const shouldNormalize = options.normalize !== false;
|
|
72
|
+
|
|
73
|
+
// Load existing data from storage
|
|
74
|
+
const [vectorBytes, keysBytes] = await Promise.all([
|
|
75
|
+
storage.readAll(VECTORS_FILE),
|
|
76
|
+
storage.readAll(KEYS_FILE),
|
|
77
|
+
]);
|
|
78
|
+
|
|
79
|
+
// Decode stored keys
|
|
80
|
+
const keys = keysBytes.byteLength > 0 ? decodeLexicon(keysBytes) : [];
|
|
81
|
+
const vectorCount = vectorBytes.byteLength / (options.dimensions * 4);
|
|
82
|
+
|
|
83
|
+
// Build key-to-slot mapping.
|
|
84
|
+
// flush() always writes deduplicated state, so keys are unique on load.
|
|
85
|
+
const keyToSlot = new Map<string, number>();
|
|
86
|
+
const slotToKey: string[] = [];
|
|
87
|
+
|
|
88
|
+
for (let i = 0; i < keys.length; i++) {
|
|
89
|
+
keyToSlot.set(keys[i], i);
|
|
90
|
+
slotToKey[i] = keys[i];
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Initialize memory manager
|
|
94
|
+
const mm = new MemoryManager(options.dimensions, vectorCount);
|
|
95
|
+
|
|
96
|
+
if (vectorBytes.byteLength > 0) {
|
|
97
|
+
mm.loadVectorBytes(vectorBytes, vectorCount);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Try to instantiate WASM SIMD module
|
|
101
|
+
let wasmExports: WasmExports | null = null;
|
|
102
|
+
const wasmBinary = options.wasmBinary !== undefined ? options.wasmBinary : getSimdWasmBinary();
|
|
103
|
+
if (wasmBinary !== null) {
|
|
104
|
+
try {
|
|
105
|
+
wasmExports = await instantiateWasm(wasmBinary, mm.memory);
|
|
106
|
+
} catch {
|
|
107
|
+
// Fall back to JS compute
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return new VectorDB(
|
|
112
|
+
mm,
|
|
113
|
+
storage,
|
|
114
|
+
options.dimensions,
|
|
115
|
+
shouldNormalize,
|
|
116
|
+
wasmExports,
|
|
117
|
+
keyToSlot,
|
|
118
|
+
slotToKey,
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/** Total number of key-value pairs in the database */
|
|
123
|
+
get size(): number {
|
|
124
|
+
return this.keyToSlot.size;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Set a key-value pair. If the key already exists, its vector is overwritten (last-write-wins).
|
|
129
|
+
* The value is a Float32Array of length equal to the configured dimensions.
|
|
130
|
+
*/
|
|
131
|
+
set(key: string, value: Float32Array, options?: SetOptions): void {
|
|
132
|
+
this.assertOpen();
|
|
133
|
+
|
|
134
|
+
if (value.length !== this.dimensions) {
|
|
135
|
+
throw new Error(
|
|
136
|
+
`Vector dimension mismatch: expected ${this.dimensions}, got ${value.length}`,
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Clone to avoid mutating caller's array during normalization
|
|
141
|
+
const vec = new Float32Array(value);
|
|
142
|
+
|
|
143
|
+
// Normalize if needed
|
|
144
|
+
const doNormalize = options?.normalize ?? this.shouldNormalize;
|
|
145
|
+
if (doNormalize) {
|
|
146
|
+
this.normalizeVector(vec);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const existingSlot = this.keyToSlot.get(key);
|
|
150
|
+
if (existingSlot !== undefined) {
|
|
151
|
+
// Overwrite existing slot
|
|
152
|
+
this.memoryManager.writeVector(existingSlot, vec);
|
|
153
|
+
} else {
|
|
154
|
+
// Append new entry
|
|
155
|
+
const newTotal = this.memoryManager.vectorCount + 1;
|
|
156
|
+
if (newTotal > this.memoryManager.maxVectors) {
|
|
157
|
+
throw new VectorCapacityExceededError(this.memoryManager.maxVectors);
|
|
158
|
+
}
|
|
159
|
+
this.memoryManager.ensureCapacity(1);
|
|
160
|
+
const slotIndex = this.memoryManager.vectorCount;
|
|
161
|
+
this.memoryManager.appendVectors([vec]);
|
|
162
|
+
this.keyToSlot.set(key, slotIndex);
|
|
163
|
+
this.slotToKey[slotIndex] = key;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Get the stored vector for a key. Returns undefined if the key does not exist.
|
|
169
|
+
* Returns a copy of the stored vector.
|
|
170
|
+
*/
|
|
171
|
+
get(key: string): Float32Array | undefined {
|
|
172
|
+
this.assertOpen();
|
|
173
|
+
|
|
174
|
+
const slot = this.keyToSlot.get(key);
|
|
175
|
+
if (slot === undefined) return undefined;
|
|
176
|
+
|
|
177
|
+
// Return a copy so callers can't corrupt WASM memory
|
|
178
|
+
return new Float32Array(this.memoryManager.readVector(slot));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Set multiple key-value pairs at once. Last-write-wins applies within the batch.
|
|
183
|
+
*/
|
|
184
|
+
setMany(entries: [string, Float32Array][]): void {
|
|
185
|
+
for (const [key, value] of entries) {
|
|
186
|
+
this.set(key, value);
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Get vectors for multiple keys. Returns undefined for keys that don't exist.
|
|
192
|
+
*/
|
|
193
|
+
getMany(keys: string[]): (Float32Array | undefined)[] {
|
|
194
|
+
return keys.map((key) => this.get(key));
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Search for the most similar vectors to the given query vector.
|
|
199
|
+
* Returns a ResultSet sorted by descending similarity score.
|
|
200
|
+
*/
|
|
201
|
+
query(value: Float32Array, options?: QueryOptions): ResultSet {
|
|
202
|
+
this.assertOpen();
|
|
203
|
+
|
|
204
|
+
const k = options?.topK ?? this.size;
|
|
205
|
+
|
|
206
|
+
if (this.size === 0) {
|
|
207
|
+
return ResultSet.fromScores(new Float32Array(0), () => "", 0);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (value.length !== this.dimensions) {
|
|
211
|
+
throw new Error(
|
|
212
|
+
`Query vector dimension mismatch: expected ${this.dimensions}, got ${value.length}`,
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Clone and optionally normalize the query vector
|
|
217
|
+
const queryVec = new Float32Array(value);
|
|
218
|
+
const doNormalize = options?.normalize ?? this.shouldNormalize;
|
|
219
|
+
if (doNormalize) {
|
|
220
|
+
this.normalizeVector(queryVec);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Write query to WASM memory
|
|
224
|
+
this.memoryManager.writeQuery(queryVec);
|
|
225
|
+
|
|
226
|
+
// Ensure memory has space for scores buffer
|
|
227
|
+
this.memoryManager.ensureCapacity(0);
|
|
228
|
+
|
|
229
|
+
// Total vectors in memory
|
|
230
|
+
const totalVectors = this.memoryManager.vectorCount;
|
|
231
|
+
|
|
232
|
+
// Execute search
|
|
233
|
+
const scoresOffset = this.memoryManager.scoresOffset;
|
|
234
|
+
if (this.wasmExports) {
|
|
235
|
+
this.wasmExports.search_all(
|
|
236
|
+
this.memoryManager.queryOffset,
|
|
237
|
+
this.memoryManager.dbOffset,
|
|
238
|
+
scoresOffset,
|
|
239
|
+
totalVectors,
|
|
240
|
+
this.dimensions,
|
|
241
|
+
);
|
|
242
|
+
} else {
|
|
243
|
+
const queryView = new Float32Array(
|
|
244
|
+
this.memoryManager.memory.buffer,
|
|
245
|
+
this.memoryManager.queryOffset,
|
|
246
|
+
this.dimensions,
|
|
247
|
+
);
|
|
248
|
+
const dbView = new Float32Array(
|
|
249
|
+
this.memoryManager.memory.buffer,
|
|
250
|
+
this.memoryManager.dbOffset,
|
|
251
|
+
totalVectors * this.dimensions,
|
|
252
|
+
);
|
|
253
|
+
const scoresView = new Float32Array(
|
|
254
|
+
this.memoryManager.memory.buffer,
|
|
255
|
+
scoresOffset,
|
|
256
|
+
totalVectors,
|
|
257
|
+
);
|
|
258
|
+
searchAll(queryView, dbView, scoresView, totalVectors, this.dimensions);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Read scores (make a copy so the buffer can be reused)
|
|
262
|
+
const scores = new Float32Array(this.memoryManager.readScores());
|
|
263
|
+
|
|
264
|
+
// Resolve key from slot index
|
|
265
|
+
const slotToKey = this.slotToKey;
|
|
266
|
+
const resolveKey = (slotIndex: number): string => {
|
|
267
|
+
return slotToKey[slotIndex];
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
return ResultSet.fromScores(scores, resolveKey, k);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Persist the current in-memory state to storage.
|
|
275
|
+
*/
|
|
276
|
+
async flush(): Promise<void> {
|
|
277
|
+
this.assertOpen();
|
|
278
|
+
|
|
279
|
+
const totalVectors = this.memoryManager.vectorCount;
|
|
280
|
+
|
|
281
|
+
// Serialize vectors from WASM memory
|
|
282
|
+
const vectorBytes = new Uint8Array(
|
|
283
|
+
totalVectors * this.dimensions * 4,
|
|
284
|
+
);
|
|
285
|
+
if (totalVectors > 0) {
|
|
286
|
+
const src = new Uint8Array(
|
|
287
|
+
this.memoryManager.memory.buffer,
|
|
288
|
+
this.memoryManager.dbOffset,
|
|
289
|
+
totalVectors * this.dimensions * 4,
|
|
290
|
+
);
|
|
291
|
+
vectorBytes.set(src);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
// Serialize keys using lexicon format
|
|
295
|
+
const keysBytes = encodeLexicon(this.slotToKey);
|
|
296
|
+
|
|
297
|
+
await Promise.all([
|
|
298
|
+
this.storage.write(VECTORS_FILE, vectorBytes),
|
|
299
|
+
this.storage.write(KEYS_FILE, keysBytes),
|
|
300
|
+
]);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* Flush data to storage and release the instance.
|
|
305
|
+
* The instance cannot be used after close.
|
|
306
|
+
*/
|
|
307
|
+
async close(): Promise<void> {
|
|
308
|
+
if (this.closed) return;
|
|
309
|
+
await this.flush();
|
|
310
|
+
this.closed = true;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Clear all data from the database and storage.
|
|
315
|
+
*/
|
|
316
|
+
async clear(): Promise<void> {
|
|
317
|
+
this.assertOpen();
|
|
318
|
+
|
|
319
|
+
this.keyToSlot.clear();
|
|
320
|
+
this.slotToKey.length = 0;
|
|
321
|
+
this.memoryManager.reset();
|
|
322
|
+
|
|
323
|
+
await this.storage.destroy();
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Normalize a vector using WASM (if available) or JS fallback.
|
|
328
|
+
*/
|
|
329
|
+
private normalizeVector(vec: Float32Array): void {
|
|
330
|
+
if (this.wasmExports) {
|
|
331
|
+
const ptr = this.memoryManager.queryOffset;
|
|
332
|
+
new Float32Array(this.memoryManager.memory.buffer, ptr, vec.length).set(vec);
|
|
333
|
+
this.wasmExports.normalize(ptr, vec.length);
|
|
334
|
+
const normalized = new Float32Array(this.memoryManager.memory.buffer, ptr, vec.length);
|
|
335
|
+
vec.set(normalized);
|
|
336
|
+
} else {
|
|
337
|
+
normalize(vec);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
private assertOpen(): void {
|
|
342
|
+
if (this.closed) {
|
|
343
|
+
throw new Error("VectorDB instance has been closed");
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WASM SIMD compute layer.
|
|
3
|
+
* Compiles the hand-written WAT module and provides typed wrappers
|
|
4
|
+
* that operate on shared WebAssembly.Memory.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
export interface WasmExports {
|
|
8
|
+
normalize(ptr: number, dimensions: number): void;
|
|
9
|
+
search_all(queryPtr: number, dbPtr: number, scoresPtr: number, dbSize: number, dimensions: number): void;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Compiles a WAT string into a WASM module using the wabt library.
|
|
14
|
+
* This is used at build time or test time; at runtime, the pre-compiled binary is used.
|
|
15
|
+
*/
|
|
16
|
+
export async function compileWatToWasm(watSource: string): Promise<Uint8Array> {
|
|
17
|
+
const wabt = await import("wabt");
|
|
18
|
+
const wabtModule = await wabt.default();
|
|
19
|
+
const parsed = wabtModule.parseWat("simd.wat", watSource, {
|
|
20
|
+
simd: true,
|
|
21
|
+
});
|
|
22
|
+
parsed.resolveNames();
|
|
23
|
+
parsed.validate();
|
|
24
|
+
const { buffer } = parsed.toBinary({});
|
|
25
|
+
parsed.destroy();
|
|
26
|
+
return new Uint8Array(buffer);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Instantiates a WASM module with the given memory and returns typed exports.
|
|
31
|
+
*/
|
|
32
|
+
export async function instantiateWasm(
|
|
33
|
+
wasmBinary: Uint8Array,
|
|
34
|
+
memory: WebAssembly.Memory,
|
|
35
|
+
): Promise<WasmExports> {
|
|
36
|
+
const importObject = { env: { memory } };
|
|
37
|
+
const result = await WebAssembly.instantiate(wasmBinary, importObject);
|
|
38
|
+
// WebAssembly.instantiate with a buffer returns { instance, module }
|
|
39
|
+
const instance = (result as unknown as { instance: WebAssembly.Instance }).instance;
|
|
40
|
+
return instance.exports as unknown as WasmExports;
|
|
41
|
+
}
|