eigen-db 4.1.0 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +79 -27
- package/dist/eigen-db.js +317 -195
- package/dist/eigen-db.js.map +1 -1
- package/dist/eigen-db.umd.cjs +1 -1
- package/dist/eigen-db.umd.cjs.map +1 -1
- package/package.json +1 -1
- package/src/lib/__tests__/result-set.test.ts +19 -19
- package/src/lib/__tests__/vector-db.test.ts +429 -16
- package/src/lib/memory-manager.ts +8 -0
- package/src/lib/result-set.ts +16 -15
- package/src/lib/simd-binary.ts +1 -1
- package/src/lib/simd-optimized.wat +362 -0
- package/src/lib/simd.wat +42 -248
- package/src/lib/types.ts +4 -6
- package/src/lib/vector-db.ts +241 -9
package/CHANGELOG.md
CHANGED
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@ High-performance vector database for the web.
|
|
|
4
4
|
|
|
5
5
|
`eigen-db` stores and queries embedding vectors in-browser, using:
|
|
6
6
|
|
|
7
|
-
-
|
|
7
|
+
- Pluggable storage backends (in-memory by default, OPFS for browser persistence)
|
|
8
8
|
- WASM SIMD for fast compute when available
|
|
9
9
|
- JavaScript fallback when WASM SIMD is unavailable
|
|
10
10
|
|
|
@@ -21,13 +21,24 @@ npm install eigen-db
|
|
|
21
21
|
```ts
|
|
22
22
|
import { DB } from "eigen-db";
|
|
23
23
|
|
|
24
|
+
// In-memory (default) — no persistence, great for ephemeral sessions
|
|
24
25
|
const db = await DB.open({
|
|
25
|
-
name: "my-index", // optional, defaults to "default"
|
|
26
26
|
dimensions: 1536, // required
|
|
27
27
|
normalize: true, // optional, defaults to true
|
|
28
28
|
});
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
+
For browser persistence, mount an OPFS storage backend:
|
|
32
|
+
|
|
33
|
+
```ts
|
|
34
|
+
import { DB, OPFSStorageProvider } from "eigen-db";
|
|
35
|
+
|
|
36
|
+
const db = await DB.open({
|
|
37
|
+
dimensions: 1536,
|
|
38
|
+
storage: new OPFSStorageProvider("my-index"), // persistent OPFS directory
|
|
39
|
+
});
|
|
40
|
+
```
|
|
41
|
+
|
|
31
42
|
### 2) Insert vectors
|
|
32
43
|
|
|
33
44
|
```ts
|
|
@@ -50,11 +61,11 @@ Notes:
|
|
|
50
61
|
```ts
|
|
51
62
|
const queryVector = embeddingQuery;
|
|
52
63
|
|
|
53
|
-
// Returns a plain array of { key,
|
|
64
|
+
// Returns a plain array of { key, similarity } sorted by descending similarity
|
|
54
65
|
const results = db.query(queryVector, { topK: 10 });
|
|
55
66
|
|
|
56
|
-
for (const { key,
|
|
57
|
-
console.log(key,
|
|
67
|
+
for (const { key, similarity } of results) {
|
|
68
|
+
console.log(key, similarity);
|
|
58
69
|
}
|
|
59
70
|
```
|
|
60
71
|
|
|
@@ -64,23 +75,23 @@ For lazy iteration (useful for pagination or early stopping):
|
|
|
64
75
|
const results = db.query(queryVector, { topK: 100, iterable: true });
|
|
65
76
|
|
|
66
77
|
// Iterate and break early — keys are resolved on demand
|
|
67
|
-
for (const { key,
|
|
68
|
-
if (
|
|
69
|
-
console.log(key,
|
|
78
|
+
for (const { key, similarity } of results) {
|
|
79
|
+
if (similarity < 0.5) break;
|
|
80
|
+
console.log(key, similarity);
|
|
70
81
|
}
|
|
71
82
|
|
|
72
83
|
// Or spread into an array when you need all results
|
|
73
84
|
const all = [...results];
|
|
74
85
|
```
|
|
75
86
|
|
|
76
|
-
Use `
|
|
87
|
+
Use `minSimilarity` to automatically cut off results below a threshold:
|
|
77
88
|
|
|
78
89
|
```ts
|
|
79
|
-
// Only return results
|
|
80
|
-
const results = db.query(queryVector, {
|
|
90
|
+
// Only return results with similarity ≥ 0.7 (inclusive)
|
|
91
|
+
const results = db.query(queryVector, { minSimilarity: 0.7 });
|
|
81
92
|
|
|
82
93
|
// Works with iterable mode too — iteration stops early at the threshold
|
|
83
|
-
const results = db.query(queryVector, {
|
|
94
|
+
const results = db.query(queryVector, { minSimilarity: 0.7, iterable: true });
|
|
84
95
|
```
|
|
85
96
|
|
|
86
97
|
### 4) Persist and lifecycle
|
|
@@ -96,20 +107,54 @@ To delete all vectors and storage:
|
|
|
96
107
|
await db.clear();
|
|
97
108
|
```
|
|
98
109
|
|
|
99
|
-
|
|
110
|
+
### 5) Export and import
|
|
111
|
+
|
|
112
|
+
Export the entire database as a streaming binary file:
|
|
113
|
+
|
|
114
|
+
```ts
|
|
115
|
+
const stream = await db.export(); // ReadableStream<Uint8Array>
|
|
116
|
+
|
|
117
|
+
// In a browser — download as a file
|
|
118
|
+
const response = new Response(stream);
|
|
119
|
+
const blob = await response.blob();
|
|
120
|
+
const url = URL.createObjectURL(blob);
|
|
121
|
+
const a = document.createElement("a");
|
|
122
|
+
a.href = url;
|
|
123
|
+
a.download = "database.bin";
|
|
124
|
+
a.click();
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Import from a stream, replacing all existing data:
|
|
128
|
+
|
|
129
|
+
```ts
|
|
130
|
+
// From a File (e.g., <input type="file">)
|
|
131
|
+
await db.import(file.stream());
|
|
132
|
+
|
|
133
|
+
// From a fetch response
|
|
134
|
+
const res = await fetch("/path/to/database.bin");
|
|
135
|
+
await db.import(res.body!);
|
|
136
|
+
```
|
|
100
137
|
|
|
101
|
-
|
|
138
|
+
Notes:
|
|
102
139
|
|
|
103
|
-
-
|
|
104
|
-
-
|
|
140
|
+
- `import()` replaces all existing data in the target database.
|
|
141
|
+
- A dimension check is performed on import: the stream must contain data exported from a database with the same `dimensions` setting.
|
|
142
|
+
- Both methods use the Web Streams API to avoid large heap allocations — vectors are streamed in 64KB chunks.
|
|
143
|
+
|
|
144
|
+
## Similarity metric
|
|
145
|
+
|
|
146
|
+
Similarity is the dot product of the query and stored vectors.
|
|
147
|
+
|
|
148
|
+
- **With normalization enabled** (the default): vectors are L2-normalized before storage and query, so the dot product equals cosine similarity. Similarity ranges from **1** (identical) to **-1** (opposite), with **0** indicating orthogonal vectors.
|
|
149
|
+
- **With normalization disabled** (`normalize: false`): the dot product is computed on raw vectors. The range depends on the magnitude of your vectors. Use this mode when your vectors are already normalized or when you want raw dot-product semantics.
|
|
105
150
|
|
|
106
151
|
**When to normalize:**
|
|
107
152
|
|
|
108
153
|
| Scenario | Normalize? | Notes |
|
|
109
154
|
| --- | --- | --- |
|
|
110
|
-
| Using embeddings from OpenAI, Cohere, etc. | `true` (default) | Embeddings may not be unit-length; normalization ensures cosine
|
|
155
|
+
| Using embeddings from OpenAI, Cohere, etc. | `true` (default) | Embeddings may not be unit-length; normalization ensures cosine similarity. |
|
|
111
156
|
| Vectors are already unit-length | Either | Setting `false` avoids redundant work. |
|
|
112
|
-
| You need raw dot-product semantics | `false` |
|
|
157
|
+
| You need raw dot-product semantics | `false` | Similarity will be the raw dot product; range depends on vector magnitudes. |
|
|
113
158
|
|
|
114
159
|
## Full API Reference
|
|
115
160
|
|
|
@@ -151,7 +196,7 @@ Opens (or creates) a database instance and loads persisted data.
|
|
|
151
196
|
- `getMany(keys: string[]): (number[] | undefined)[]`
|
|
152
197
|
- Batch lookup.
|
|
153
198
|
- `query(value: VectorInput, options?: QueryOptions): ResultItem[]`
|
|
154
|
-
- Returns results sorted by
|
|
199
|
+
- Returns results sorted by descending similarity as a plain array.
|
|
155
200
|
- Throws on dimension mismatch.
|
|
156
201
|
- `query(value: VectorInput, options: QueryOptions & { iterable: true }): Iterable<ResultItem>`
|
|
157
202
|
- With `{ iterable: true }`, returns a lazy iterable. Keys are resolved
|
|
@@ -164,17 +209,22 @@ Opens (or creates) a database instance and loads persisted data.
|
|
|
164
209
|
- Subsequent operations throw.
|
|
165
210
|
- `clear(): Promise<void>`
|
|
166
211
|
- Clears in-memory state and destroys storage for this DB.
|
|
212
|
+
- `export(): Promise<ReadableStream<Uint8Array>>`
|
|
213
|
+
- Exports the entire database as a streaming binary. Vectors are streamed in 64KB chunks.
|
|
214
|
+
- `import(stream: ReadableStream<Uint8Array>): Promise<void>`
|
|
215
|
+
- Imports data from a stream, replacing all existing data.
|
|
216
|
+
- Throws on dimension mismatch between the stream data and the database.
|
|
167
217
|
|
|
168
218
|
### `ResultItem`
|
|
169
219
|
|
|
170
220
|
```ts
|
|
171
221
|
interface ResultItem {
|
|
172
222
|
key: string;
|
|
173
|
-
|
|
223
|
+
similarity: number;
|
|
174
224
|
}
|
|
175
225
|
```
|
|
176
226
|
|
|
177
|
-
- `
|
|
227
|
+
- `similarity` — The dot product of query and stored vectors. With normalization (default), this is cosine similarity: 1 = identical, -1 = opposite.
|
|
178
228
|
|
|
179
229
|
### Option types
|
|
180
230
|
|
|
@@ -182,9 +232,9 @@ interface ResultItem {
|
|
|
182
232
|
|
|
183
233
|
```ts
|
|
184
234
|
interface OpenOptions {
|
|
185
|
-
name?: string; // OPFS directory name, default: "default"
|
|
186
235
|
dimensions: number; // vector size
|
|
187
236
|
normalize?: boolean; // default: true
|
|
237
|
+
storage?: StorageProvider; // default: InMemoryStorageProvider
|
|
188
238
|
}
|
|
189
239
|
```
|
|
190
240
|
|
|
@@ -194,12 +244,10 @@ Advanced/testing override options.
|
|
|
194
244
|
|
|
195
245
|
```ts
|
|
196
246
|
interface OpenOptionsInternal extends OpenOptions {
|
|
197
|
-
storage?: StorageProvider;
|
|
198
247
|
wasmBinary?: Uint8Array | null;
|
|
199
248
|
}
|
|
200
249
|
```
|
|
201
250
|
|
|
202
|
-
- `storage`: provide custom storage implementation (for example, tests)
|
|
203
251
|
- `wasmBinary`:
|
|
204
252
|
- `Uint8Array`: use provided precompiled WASM
|
|
205
253
|
- `null`: force JavaScript-only compute
|
|
@@ -218,7 +266,7 @@ interface SetOptions {
|
|
|
218
266
|
```ts
|
|
219
267
|
interface QueryOptions {
|
|
220
268
|
topK?: number; // default: Infinity (all results)
|
|
221
|
-
|
|
269
|
+
minSimilarity?: number; // inclusive lower bound on similarity; results below this are excluded
|
|
222
270
|
normalize?: boolean;
|
|
223
271
|
iterable?: boolean; // when true, returns Iterable<ResultItem> instead of ResultItem[]
|
|
224
272
|
}
|
|
@@ -289,7 +337,11 @@ npm run dev
|
|
|
289
337
|
|
|
290
338
|
## Practical notes
|
|
291
339
|
|
|
292
|
-
-
|
|
293
|
-
- `topK` defaults to `Infinity`, returning all stored vectors sorted by
|
|
340
|
+
- Similarity is the dot product of query and stored vectors; with normalization enabled (default), this behaves like cosine similarity (1 = identical, -1 = opposite).
|
|
341
|
+
- `topK` defaults to `Infinity`, returning all stored vectors sorted by similarity. Use `minSimilarity` to limit results by proximity.
|
|
294
342
|
- Querying an empty database returns an empty array (`[]`).
|
|
295
343
|
- `flush()` writes deduplicated state, and reopen preserves key-to-slot mapping.
|
|
344
|
+
|
|
345
|
+
## Related
|
|
346
|
+
|
|
347
|
+
- Just need cosine similarity? Try [fast-theta](https://github.com/chuanqisun/fast-theta).
|