eigen-db 4.4.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/README.md +38 -25
- package/dist/eigen-db.js +190 -187
- package/dist/eigen-db.js.map +1 -1
- package/dist/eigen-db.umd.cjs +1 -1
- package/dist/eigen-db.umd.cjs.map +1 -1
- package/dist/result-set.d.ts +18 -7
- package/dist/types.d.ts +5 -1
- package/package.json +1 -1
- package/src/lib/__tests__/result-set.test.ts +146 -27
- package/src/lib/__tests__/vector-db.test.ts +188 -4
- package/src/lib/result-set.ts +55 -24
- package/src/lib/types.ts +5 -1
- package/src/lib/vector-db.ts +8 -4
package/CHANGELOG.md
CHANGED
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Eigen DB
|
|
2
2
|
|
|
3
|
-
High-performance vector database for the web.
|
|
3
|
+
High-performance vector database for the web, powered by Web Assembly.
|
|
4
4
|
|
|
5
5
|
`eigen-db` stores and queries embedding vectors in-browser, using:
|
|
6
6
|
|
|
@@ -16,7 +16,7 @@ npm install eigen-db
|
|
|
16
16
|
|
|
17
17
|
## Guide: Set up and query
|
|
18
18
|
|
|
19
|
-
###
|
|
19
|
+
### Open a database
|
|
20
20
|
|
|
21
21
|
```ts
|
|
22
22
|
import { DB } from "eigen-db";
|
|
@@ -39,7 +39,7 @@ const db = await DB.open({
|
|
|
39
39
|
});
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
-
###
|
|
42
|
+
### Insert vectors
|
|
43
43
|
|
|
44
44
|
```ts
|
|
45
45
|
db.set("doc:1", embedding1);
|
|
@@ -56,7 +56,7 @@ Notes:
|
|
|
56
56
|
- Each vector must be a `number[]` (or `Float32Array`) with exactly `dimensions` elements.
|
|
57
57
|
- Duplicate keys use last-write-wins semantics.
|
|
58
58
|
|
|
59
|
-
###
|
|
59
|
+
### Look up, check, and remove vectors
|
|
60
60
|
|
|
61
61
|
```ts
|
|
62
62
|
db.get("doc:1"); // number[] | undefined
|
|
@@ -66,7 +66,7 @@ db.dimensions; // configured vector dimensions
|
|
|
66
66
|
db.size; // number of entries
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
-
###
|
|
69
|
+
### Iterate over the database
|
|
70
70
|
|
|
71
71
|
```ts
|
|
72
72
|
// Iterate over all keys
|
|
@@ -83,13 +83,13 @@ for (const [key, vector] of db.entries()) {
|
|
|
83
83
|
const all = [...db];
|
|
84
84
|
```
|
|
85
85
|
|
|
86
|
-
###
|
|
86
|
+
### Query nearest vectors
|
|
87
87
|
|
|
88
88
|
```ts
|
|
89
89
|
const queryVector = embeddingQuery;
|
|
90
90
|
|
|
91
91
|
// Returns a plain array of { key, similarity } sorted by descending similarity
|
|
92
|
-
const results = db.query(queryVector, {
|
|
92
|
+
const results = db.query(queryVector, { limit: 10 });
|
|
93
93
|
|
|
94
94
|
for (const { key, similarity } of results) {
|
|
95
95
|
console.log(key, similarity);
|
|
@@ -99,7 +99,7 @@ for (const { key, similarity } of results) {
|
|
|
99
99
|
For lazy iteration (useful for pagination or early stopping):
|
|
100
100
|
|
|
101
101
|
```ts
|
|
102
|
-
const results = db.query(queryVector, {
|
|
102
|
+
const results = db.query(queryVector, { limit: 100, iterable: true });
|
|
103
103
|
|
|
104
104
|
// Iterate and break early — keys are resolved on demand
|
|
105
105
|
for (const { key, similarity } of results) {
|
|
@@ -111,17 +111,27 @@ for (const { key, similarity } of results) {
|
|
|
111
111
|
const all = [...results];
|
|
112
112
|
```
|
|
113
113
|
|
|
114
|
-
Use `minSimilarity`
|
|
114
|
+
Use `minSimilarity` and `maxSimilarity` to filter results by a similarity range:
|
|
115
115
|
|
|
116
116
|
```ts
|
|
117
117
|
// Only return results with similarity ≥ 0.7 (inclusive)
|
|
118
118
|
const results = db.query(queryVector, { minSimilarity: 0.7 });
|
|
119
119
|
|
|
120
|
-
//
|
|
121
|
-
const results = db.query(queryVector, {
|
|
120
|
+
// Only return results with similarity ≤ 0.5 (inclusive)
|
|
121
|
+
const results = db.query(queryVector, { maxSimilarity: 0.5 });
|
|
122
|
+
|
|
123
|
+
// Combine both for a range
|
|
124
|
+
const results = db.query(queryVector, { minSimilarity: 0.3, maxSimilarity: 0.8 });
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Use `order: "ascend"` to get the least similar results first (bottom-K):
|
|
128
|
+
|
|
129
|
+
```ts
|
|
130
|
+
// Least similar results first
|
|
131
|
+
const bottomK = db.query(queryVector, { order: "ascend", limit: 10 });
|
|
122
132
|
```
|
|
123
133
|
|
|
124
|
-
###
|
|
134
|
+
### Persist and lifecycle
|
|
125
135
|
|
|
126
136
|
```ts
|
|
127
137
|
await db.flush(); // persist current state
|
|
@@ -134,7 +144,7 @@ To delete all vectors and storage:
|
|
|
134
144
|
await db.clear();
|
|
135
145
|
```
|
|
136
146
|
|
|
137
|
-
###
|
|
147
|
+
### Export and import
|
|
138
148
|
|
|
139
149
|
Export the entire database as a streaming binary file:
|
|
140
150
|
|
|
@@ -177,11 +187,11 @@ Similarity is the dot product of the query and stored vectors.
|
|
|
177
187
|
|
|
178
188
|
**When to normalize:**
|
|
179
189
|
|
|
180
|
-
| Scenario
|
|
181
|
-
|
|
|
190
|
+
| Scenario | Normalize? | Notes |
|
|
191
|
+
| ------------------------------------------ | ---------------- | --------------------------------------------------------------------------- |
|
|
182
192
|
| Using embeddings from OpenAI, Cohere, etc. | `true` (default) | Embeddings may not be unit-length; normalization ensures cosine similarity. |
|
|
183
|
-
| Vectors are already unit-length
|
|
184
|
-
| You need raw dot-product semantics
|
|
193
|
+
| Vectors are already unit-length | Either | Setting `false` avoids redundant work. |
|
|
194
|
+
| You need raw dot-product semantics | `false` | Similarity will be the raw dot product; range depends on vector magnitudes. |
|
|
185
195
|
|
|
186
196
|
## Full API Reference
|
|
187
197
|
|
|
@@ -303,8 +313,10 @@ interface SetOptions {
|
|
|
303
313
|
|
|
304
314
|
```ts
|
|
305
315
|
interface QueryOptions {
|
|
306
|
-
|
|
316
|
+
limit?: number; // default: Infinity (all results)
|
|
317
|
+
order?: "ascend" | "descend"; // default: "descend" (most similar first)
|
|
307
318
|
minSimilarity?: number; // inclusive lower bound on similarity; results below this are excluded
|
|
319
|
+
maxSimilarity?: number; // inclusive upper bound on similarity; results above this are excluded
|
|
308
320
|
normalize?: boolean;
|
|
309
321
|
iterable?: boolean; // when true, returns Iterable<ResultItem> instead of ResultItem[]
|
|
310
322
|
}
|
|
@@ -349,12 +361,12 @@ Thrown when memory growth would exceed WASM 32-bit memory limits for the configu
|
|
|
349
361
|
|
|
350
362
|
WASM SIMD vs pure JavaScript performance on 1536-dimensional vectors (OpenAI embedding size), measured with `vitest bench` (Node.js):
|
|
351
363
|
|
|
352
|
-
| Operation
|
|
353
|
-
|
|
|
354
|
-
| normalize (1536 dims)
|
|
355
|
-
| searchAll (100 vectors × 1536 dims)
|
|
356
|
-
| searchAll (1,000 vectors × 1536 dims)
|
|
357
|
-
| searchAll (10,000 vectors × 1536 dims) | 34
|
|
364
|
+
| Operation | JS (ops/s) | WASM SIMD (ops/s) | Speedup |
|
|
365
|
+
| -------------------------------------- | ---------- | ----------------- | -------- |
|
|
366
|
+
| normalize (1536 dims) | 223,117 | 2,226,734 | **~10×** |
|
|
367
|
+
| searchAll (100 vectors × 1536 dims) | 3,429 | 77,130 | **~22×** |
|
|
368
|
+
| searchAll (1,000 vectors × 1536 dims) | 344 | 8,009 | **~23×** |
|
|
369
|
+
| searchAll (10,000 vectors × 1536 dims) | 34 | 398 | **~12×** |
|
|
358
370
|
|
|
359
371
|
The WASM SIMD layer uses 2-vector outer loop unrolling (halving query memory reads) and 4× inner loop unrolling with multiple independent accumulators.
|
|
360
372
|
|
|
@@ -376,7 +388,8 @@ npm run dev
|
|
|
376
388
|
## Practical notes
|
|
377
389
|
|
|
378
390
|
- Similarity is the dot product of query and stored vectors; with normalization enabled (default), this behaves like cosine similarity (1 = identical, -1 = opposite).
|
|
379
|
-
- `
|
|
391
|
+
- `limit` defaults to `Infinity`, returning all stored vectors sorted by similarity. Use `minSimilarity` and `maxSimilarity` to filter results by proximity range.
|
|
392
|
+
- `order` defaults to `"descend"` (most similar first). Use `"ascend"` to get least similar first.
|
|
380
393
|
- Querying an empty database returns an empty array (`[]`).
|
|
381
394
|
- `flush()` writes deduplicated state, and reopen preserves key-to-slot mapping.
|
|
382
395
|
|