eigen-db 3.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/README.md +33 -31
- package/package.json +1 -1
- package/src/lib/__tests__/result-set.test.ts +76 -51
- package/src/lib/__tests__/vector-db.test.ts +22 -19
- package/src/lib/index.ts +10 -4
- package/src/lib/result-set.ts +53 -70
- package/src/lib/types.ts +11 -0
- package/src/lib/vector-db.ts +23 -6
package/CHANGELOG.md
CHANGED
package/README.md
CHANGED
|
@@ -50,15 +50,27 @@ Notes:
|
|
|
50
50
|
```ts
|
|
51
51
|
const queryVector = embeddingQuery;
|
|
52
52
|
|
|
53
|
+
// Returns a plain array of { key, score } sorted by similarity
|
|
53
54
|
const results = db.query(queryVector, { topK: 10 });
|
|
54
55
|
|
|
55
|
-
for (
|
|
56
|
-
|
|
57
|
-
|
|
56
|
+
for (const { key, score } of results) {
|
|
57
|
+
console.log(key, score);
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
For lazy iteration (useful for pagination or early stopping):
|
|
62
|
+
|
|
63
|
+
```ts
|
|
64
|
+
const results = db.query(queryVector, { topK: 100, iterable: true });
|
|
65
|
+
|
|
66
|
+
// Iterate and break early — keys are resolved on demand
|
|
67
|
+
for (const { key, score } of results) {
|
|
68
|
+
if (score < 0.5) break;
|
|
69
|
+
console.log(key, score);
|
|
58
70
|
}
|
|
59
71
|
|
|
60
|
-
// Or
|
|
61
|
-
const
|
|
72
|
+
// Or spread into an array when you need all results
|
|
73
|
+
const all = [...results];
|
|
62
74
|
```
|
|
63
75
|
|
|
64
76
|
### 4) Persist and lifecycle
|
|
@@ -80,10 +92,9 @@ await db.clear();
|
|
|
80
92
|
|
|
81
93
|
```ts
|
|
82
94
|
export { DB };
|
|
83
|
-
export { ResultSet };
|
|
84
95
|
export type { ResultItem };
|
|
85
96
|
export { VectorCapacityExceededError };
|
|
86
|
-
export type { OpenOptions, OpenOptionsInternal, SetOptions, QueryOptions, VectorInput };
|
|
97
|
+
export type { OpenOptions, OpenOptionsInternal, SetOptions, QueryOptions, IterableQueryOptions, VectorInput };
|
|
87
98
|
export { InMemoryStorageProvider, OPFSStorageProvider };
|
|
88
99
|
export type { StorageProvider };
|
|
89
100
|
```
|
|
@@ -114,8 +125,12 @@ Opens (or creates) a database instance and loads persisted data.
|
|
|
114
125
|
- Batch insert/update.
|
|
115
126
|
- `getMany(keys: string[]): (number[] | undefined)[]`
|
|
116
127
|
- Batch lookup.
|
|
117
|
-
- `query(value: VectorInput, options?: QueryOptions):
|
|
118
|
-
- Returns similarity-ranked results.
|
|
128
|
+
- `query(value: VectorInput, options?: QueryOptions): ResultItem[]`
|
|
129
|
+
- Returns similarity-ranked results as a plain array.
|
|
130
|
+
- Throws on dimension mismatch.
|
|
131
|
+
- `query(value: VectorInput, options: IterableQueryOptions): Iterable<ResultItem>`
|
|
132
|
+
- With `{ iterable: true }`, returns a lazy iterable. Keys are resolved
|
|
133
|
+
only as each item is consumed, enabling early stopping and pagination.
|
|
119
134
|
- Throws on dimension mismatch.
|
|
120
135
|
- `flush(): Promise<void>`
|
|
121
136
|
- Persists in-memory state to storage.
|
|
@@ -125,27 +140,6 @@ Opens (or creates) a database instance and loads persisted data.
|
|
|
125
140
|
- `clear(): Promise<void>`
|
|
126
141
|
- Clears in-memory state and destroys storage for this DB.
|
|
127
142
|
|
|
128
|
-
### `ResultSet`
|
|
129
|
-
|
|
130
|
-
Represents a lazily resolved, score-sorted search result collection.
|
|
131
|
-
|
|
132
|
-
#### Properties
|
|
133
|
-
|
|
134
|
-
- `length: number` — number of results available (bounded by `topK`)
|
|
135
|
-
|
|
136
|
-
#### Methods
|
|
137
|
-
|
|
138
|
-
- `get(rank: number): ResultItem`
|
|
139
|
-
- Returns the item at rank (`0` is best match).
|
|
140
|
-
- Throws `RangeError` when out of bounds.
|
|
141
|
-
- `getPage(page: number, pageSize: number): ResultItem[]`
|
|
142
|
-
- Convenience pagination helper.
|
|
143
|
-
|
|
144
|
-
#### Static
|
|
145
|
-
|
|
146
|
-
- `fromScores(scores, resolveKey, topK): ResultSet`
|
|
147
|
-
- Constructs a sorted lazy result set from raw scores.
|
|
148
|
-
|
|
149
143
|
### `ResultItem`
|
|
150
144
|
|
|
151
145
|
```ts
|
|
@@ -201,6 +195,14 @@ interface QueryOptions {
|
|
|
201
195
|
}
|
|
202
196
|
```
|
|
203
197
|
|
|
198
|
+
#### `IterableQueryOptions`
|
|
199
|
+
|
|
200
|
+
```ts
|
|
201
|
+
interface IterableQueryOptions extends QueryOptions {
|
|
202
|
+
iterable: true; // returns Iterable<ResultItem> instead of ResultItem[]
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
204
206
|
### Storage
|
|
205
207
|
|
|
206
208
|
#### `StorageProvider`
|
|
@@ -239,5 +241,5 @@ Thrown when memory growth would exceed WASM 32-bit memory limits for the configu
|
|
|
239
241
|
## Practical notes
|
|
240
242
|
|
|
241
243
|
- Similarity is dot product; with normalization enabled (default), this behaves like cosine similarity.
|
|
242
|
-
- Querying an empty database returns
|
|
244
|
+
- Querying an empty database returns an empty array (`[]`).
|
|
243
245
|
- `flush()` writes deduplicated state, and reopen preserves key-to-slot mapping.
|
package/package.json
CHANGED
|
@@ -1,69 +1,76 @@
|
|
|
1
|
-
import { describe,
|
|
2
|
-
import {
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { iterableResults, topKResults } from "../result-set";
|
|
3
3
|
|
|
4
|
-
describe("
|
|
4
|
+
describe("topKResults", () => {
|
|
5
5
|
const keys = ["apple", "banana", "cherry", "date", "elderberry"];
|
|
6
6
|
const resolveKey = (index: number) => keys[index];
|
|
7
7
|
|
|
8
8
|
it("sorts results by descending score", () => {
|
|
9
9
|
const scores = new Float32Array([0.3, 0.9, 0.1, 0.7, 0.5]);
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
expect(
|
|
13
|
-
expect(
|
|
14
|
-
expect(
|
|
15
|
-
expect(
|
|
16
|
-
expect(
|
|
17
|
-
expect(
|
|
18
|
-
expect(
|
|
19
|
-
expect(
|
|
10
|
+
const results = topKResults(scores, resolveKey, 5);
|
|
11
|
+
|
|
12
|
+
expect(results).toHaveLength(5);
|
|
13
|
+
expect(results[0].key).toBe("banana");
|
|
14
|
+
expect(results[0].score).toBeCloseTo(0.9, 4);
|
|
15
|
+
expect(results[1].key).toBe("date");
|
|
16
|
+
expect(results[1].score).toBeCloseTo(0.7, 4);
|
|
17
|
+
expect(results[2].key).toBe("elderberry");
|
|
18
|
+
expect(results[3].key).toBe("apple");
|
|
19
|
+
expect(results[4].key).toBe("cherry");
|
|
20
20
|
});
|
|
21
21
|
|
|
22
22
|
it("respects topK limit", () => {
|
|
23
23
|
const scores = new Float32Array([0.3, 0.9, 0.1, 0.7, 0.5]);
|
|
24
|
-
const
|
|
24
|
+
const results = topKResults(scores, resolveKey, 3);
|
|
25
25
|
|
|
26
|
-
expect(
|
|
27
|
-
expect(
|
|
28
|
-
expect(
|
|
29
|
-
expect(
|
|
26
|
+
expect(results).toHaveLength(3);
|
|
27
|
+
expect(results[0].key).toBe("banana");
|
|
28
|
+
expect(results[0].score).toBeCloseTo(0.9, 4);
|
|
29
|
+
expect(results[2].key).toBe("elderberry");
|
|
30
30
|
});
|
|
31
31
|
|
|
32
|
-
it("
|
|
33
|
-
const scores = new Float32Array(
|
|
34
|
-
const
|
|
32
|
+
it("handles empty scores", () => {
|
|
33
|
+
const scores = new Float32Array(0);
|
|
34
|
+
const results = topKResults(scores, resolveKey, 10);
|
|
35
|
+
expect(results).toEqual([]);
|
|
36
|
+
});
|
|
35
37
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
+
it("handles topK larger than result count", () => {
|
|
39
|
+
const scores = new Float32Array([0.5, 0.8]);
|
|
40
|
+
const results = topKResults(scores, resolveKey, 100);
|
|
41
|
+
expect(results).toHaveLength(2);
|
|
38
42
|
});
|
|
43
|
+
});
|
|
39
44
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
45
|
+
describe("iterableResults", () => {
|
|
46
|
+
const keys = ["apple", "banana", "cherry", "date", "elderberry"];
|
|
47
|
+
const resolveKey = (index: number) => keys[index];
|
|
43
48
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
expect(page0[1].key).toBe("date"); // score 0.4
|
|
49
|
+
it("sorts results by descending score", () => {
|
|
50
|
+
const scores = new Float32Array([0.3, 0.9, 0.1, 0.7, 0.5]);
|
|
51
|
+
const results = [...iterableResults(scores, resolveKey, 5)];
|
|
48
52
|
|
|
49
|
-
|
|
50
|
-
expect(
|
|
51
|
-
expect(
|
|
52
|
-
expect(
|
|
53
|
+
expect(results).toHaveLength(5);
|
|
54
|
+
expect(results[0].key).toBe("banana");
|
|
55
|
+
expect(results[0].score).toBeCloseTo(0.9, 4);
|
|
56
|
+
expect(results[1].key).toBe("date");
|
|
57
|
+
expect(results[4].key).toBe("cherry");
|
|
58
|
+
});
|
|
53
59
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
60
|
+
it("respects topK limit", () => {
|
|
61
|
+
const scores = new Float32Array([0.3, 0.9, 0.1, 0.7, 0.5]);
|
|
62
|
+
const results = [...iterableResults(scores, resolveKey, 3)];
|
|
63
|
+
|
|
64
|
+
expect(results).toHaveLength(3);
|
|
65
|
+
expect(results[0].key).toBe("banana");
|
|
57
66
|
});
|
|
58
67
|
|
|
59
|
-
it("handles empty
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
expect(rs.length).toBe(0);
|
|
63
|
-
expect(rs.getPage(0, 10)).toEqual([]);
|
|
68
|
+
it("handles empty scores", () => {
|
|
69
|
+
const results = [...iterableResults(new Float32Array(0), resolveKey, 10)];
|
|
70
|
+
expect(results).toEqual([]);
|
|
64
71
|
});
|
|
65
72
|
|
|
66
|
-
it("only resolves keys lazily (on
|
|
73
|
+
it("only resolves keys lazily (on consumption)", () => {
|
|
67
74
|
let callCount = 0;
|
|
68
75
|
const lazyResolver = (index: number) => {
|
|
69
76
|
callCount++;
|
|
@@ -71,20 +78,38 @@ describe("ResultSet", () => {
|
|
|
71
78
|
};
|
|
72
79
|
|
|
73
80
|
const scores = new Float32Array([0.3, 0.9, 0.1]);
|
|
74
|
-
const
|
|
81
|
+
const iterable = iterableResults(scores, lazyResolver, 3);
|
|
75
82
|
|
|
76
83
|
expect(callCount).toBe(0); // no key resolved yet
|
|
77
84
|
|
|
78
|
-
|
|
85
|
+
const iter = iterable[Symbol.iterator]();
|
|
86
|
+
iter.next();
|
|
79
87
|
expect(callCount).toBe(1); // resolved only 1
|
|
80
88
|
|
|
81
|
-
|
|
82
|
-
expect(callCount).toBe(
|
|
89
|
+
iter.next();
|
|
90
|
+
expect(callCount).toBe(2);
|
|
83
91
|
});
|
|
84
92
|
|
|
85
|
-
it("
|
|
86
|
-
const scores = new Float32Array([0.
|
|
87
|
-
const
|
|
88
|
-
|
|
93
|
+
it("is re-iterable", () => {
|
|
94
|
+
const scores = new Float32Array([0.3, 0.9, 0.1]);
|
|
95
|
+
const iterable = iterableResults(scores, resolveKey, 3);
|
|
96
|
+
|
|
97
|
+
const first = [...iterable];
|
|
98
|
+
const second = [...iterable];
|
|
99
|
+
expect(first).toEqual(second);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("supports partial iteration (early break)", () => {
|
|
103
|
+
const scores = new Float32Array([0.1, 0.2, 0.3, 0.4, 0.5]);
|
|
104
|
+
const iterable = iterableResults(scores, resolveKey, 5);
|
|
105
|
+
|
|
106
|
+
const partial: string[] = [];
|
|
107
|
+
for (const item of iterable) {
|
|
108
|
+
partial.push(item.key);
|
|
109
|
+
if (partial.length === 2) break;
|
|
110
|
+
}
|
|
111
|
+
expect(partial).toHaveLength(2);
|
|
112
|
+
expect(partial[0]).toBe("elderberry"); // score 0.5
|
|
113
|
+
expect(partial[1]).toBe("date"); // score 0.4
|
|
89
114
|
});
|
|
90
115
|
});
|
|
@@ -181,16 +181,16 @@ describe("VectorDB", () => {
|
|
|
181
181
|
expect(results.length).toBe(3);
|
|
182
182
|
|
|
183
183
|
// x-axis should be the best match (identical direction)
|
|
184
|
-
expect(results
|
|
185
|
-
expect(results
|
|
184
|
+
expect(results[0].key).toBe("x-axis");
|
|
185
|
+
expect(results[0].score).toBeCloseTo(1.0, 2);
|
|
186
186
|
|
|
187
187
|
// xy-axis should be second (partially aligned)
|
|
188
|
-
expect(results
|
|
189
|
-
expect(results
|
|
188
|
+
expect(results[1].key).toBe("xy-axis");
|
|
189
|
+
expect(results[1].score).toBeGreaterThan(0);
|
|
190
190
|
|
|
191
191
|
// y-axis should be last (orthogonal)
|
|
192
|
-
expect(results
|
|
193
|
-
expect(results
|
|
192
|
+
expect(results[2].key).toBe("y-axis");
|
|
193
|
+
expect(results[2].score).toBeCloseTo(0.0, 2);
|
|
194
194
|
});
|
|
195
195
|
|
|
196
196
|
it("query respects topK option", async () => {
|
|
@@ -208,7 +208,7 @@ describe("VectorDB", () => {
|
|
|
208
208
|
expect(results.length).toBe(2);
|
|
209
209
|
});
|
|
210
210
|
|
|
211
|
-
it("query on empty database returns empty
|
|
211
|
+
it("query on empty database returns empty array", async () => {
|
|
212
212
|
const db = await VectorDB.open({
|
|
213
213
|
dimensions: 4,
|
|
214
214
|
storage,
|
|
@@ -216,7 +216,7 @@ describe("VectorDB", () => {
|
|
|
216
216
|
});
|
|
217
217
|
|
|
218
218
|
const results = db.query([1, 0, 0, 0]);
|
|
219
|
-
expect(results
|
|
219
|
+
expect(results).toEqual([]);
|
|
220
220
|
});
|
|
221
221
|
|
|
222
222
|
it("query validates vector dimensions", async () => {
|
|
@@ -230,7 +230,7 @@ describe("VectorDB", () => {
|
|
|
230
230
|
expect(() => db.query([1, 2, 3, 4, 5, 6, 7, 8])).toThrow("dimension mismatch");
|
|
231
231
|
});
|
|
232
232
|
|
|
233
|
-
it("query results support pagination", async () => {
|
|
233
|
+
it("query results support iterable mode for pagination", async () => {
|
|
234
234
|
const db = await VectorDB.open({
|
|
235
235
|
dimensions: 4,
|
|
236
236
|
normalize: false,
|
|
@@ -244,16 +244,19 @@ describe("VectorDB", () => {
|
|
|
244
244
|
db.set(`t${i}`, vec);
|
|
245
245
|
}
|
|
246
246
|
|
|
247
|
-
const results = db.query([1, 0, 0, 0], { normalize: false });
|
|
247
|
+
const results = db.query([1, 0, 0, 0], { normalize: false, iterable: true });
|
|
248
248
|
|
|
249
|
-
|
|
250
|
-
|
|
249
|
+
// Spread into array
|
|
250
|
+
const all = [...results];
|
|
251
|
+
expect(all).toHaveLength(5);
|
|
251
252
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
253
|
+
// Partial iteration (simulate pagination)
|
|
254
|
+
const page: { key: string; score: number }[] = [];
|
|
255
|
+
for (const item of results) {
|
|
256
|
+
page.push(item);
|
|
257
|
+
if (page.length === 2) break;
|
|
258
|
+
}
|
|
259
|
+
expect(page).toHaveLength(2);
|
|
257
260
|
});
|
|
258
261
|
|
|
259
262
|
it("query after overwrite uses updated vector", async () => {
|
|
@@ -271,8 +274,8 @@ describe("VectorDB", () => {
|
|
|
271
274
|
|
|
272
275
|
const results = db.query([0, 1, 0, 0]);
|
|
273
276
|
// Both 'point' and 'other' are now along y-axis, so both should score high
|
|
274
|
-
expect(results
|
|
275
|
-
expect(results
|
|
277
|
+
expect(results[0].score).toBeCloseTo(1.0, 2);
|
|
278
|
+
expect(results[1].score).toBeCloseTo(1.0, 2);
|
|
276
279
|
expect(db.size).toBe(2);
|
|
277
280
|
});
|
|
278
281
|
|
package/src/lib/index.ts
CHANGED
|
@@ -1,14 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* eigen-db: High-Performance In-Browser Vector Database
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Stores and queries embedding vectors entirely on the client side,
|
|
5
5
|
* utilizing OPFS for persistent storage and WASM SIMD for computation.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
export { VectorCapacityExceededError } from "./errors";
|
|
9
|
-
export { ResultSet } from "./result-set";
|
|
10
9
|
export type { ResultItem } from "./result-set";
|
|
11
10
|
export { InMemoryStorageProvider, OPFSStorageProvider } from "./storage";
|
|
12
11
|
export type { StorageProvider } from "./storage";
|
|
13
|
-
export type {
|
|
12
|
+
export type {
|
|
13
|
+
IterableQueryOptions,
|
|
14
|
+
OpenOptions,
|
|
15
|
+
OpenOptionsInternal,
|
|
16
|
+
QueryOptions,
|
|
17
|
+
SetOptions,
|
|
18
|
+
VectorInput,
|
|
19
|
+
} from "./types";
|
|
14
20
|
export { VectorDB as DB } from "./vector-db";
|
package/src/lib/result-set.ts
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* RESULT HELPERS
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Utility functions for sorting scores and producing query results.
|
|
5
|
+
* Two modes:
|
|
6
|
+
* 1. topKResults — eagerly materializes a ResultItem[] (default query path)
|
|
7
|
+
* 2. iterableResults — returns a lazy Iterable<ResultItem> where keys are
|
|
8
|
+
* resolved only as each item is consumed (for pagination / streaming)
|
|
6
9
|
*/
|
|
7
10
|
|
|
8
11
|
export interface ResultItem {
|
|
@@ -12,78 +15,58 @@ export interface ResultItem {
|
|
|
12
15
|
|
|
13
16
|
export type KeyResolver = (index: number) => string;
|
|
14
17
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
*/
|
|
23
|
-
private readonly sortedIndices: Uint32Array;
|
|
24
|
-
|
|
25
|
-
/** Raw scores array (not sorted, indexed by original DB position) */
|
|
26
|
-
private readonly scores: Float32Array;
|
|
18
|
+
/**
|
|
19
|
+
* Sort scores descending and return the top K results as a plain array.
|
|
20
|
+
* All keys are resolved eagerly.
|
|
21
|
+
*/
|
|
22
|
+
export function topKResults(scores: Float32Array, resolveKey: KeyResolver, topK: number): ResultItem[] {
|
|
23
|
+
const n = scores.length;
|
|
24
|
+
if (n === 0) return [];
|
|
27
25
|
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
const indices = new Uint32Array(n);
|
|
27
|
+
for (let i = 0; i < n; i++) indices[i] = i;
|
|
28
|
+
indices.sort((a, b) => scores[b] - scores[a]);
|
|
30
29
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
) {
|
|
37
|
-
this.scores = scores;
|
|
38
|
-
this.sortedIndices = sortedIndices;
|
|
39
|
-
this.resolveKey = resolveKey;
|
|
40
|
-
this.length = Math.min(topK, sortedIndices.length);
|
|
30
|
+
const k = Math.min(topK, n);
|
|
31
|
+
const results: ResultItem[] = new Array(k);
|
|
32
|
+
for (let i = 0; i < k; i++) {
|
|
33
|
+
const idx = indices[i];
|
|
34
|
+
results[i] = { key: resolveKey(idx), score: scores[idx] };
|
|
41
35
|
}
|
|
36
|
+
return results;
|
|
37
|
+
}
|
|
42
38
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
): ResultSet {
|
|
55
|
-
const n = scores.length;
|
|
56
|
-
|
|
57
|
-
// Create index array for sorting
|
|
58
|
-
const indices = new Uint32Array(n);
|
|
59
|
-
for (let i = 0; i < n; i++) indices[i] = i;
|
|
60
|
-
|
|
61
|
-
// Sort indices by descending score
|
|
62
|
-
indices.sort((a, b) => scores[b] - scores[a]);
|
|
39
|
+
/**
|
|
40
|
+
* Sort scores descending and return a lazy iterable over the top K results.
|
|
41
|
+
* Keys are resolved only when each item is consumed, saving allocations
|
|
42
|
+
* when the caller iterates partially (e.g., pagination).
|
|
43
|
+
*
|
|
44
|
+
* The returned iterable is re-iterable — each call to [Symbol.iterator]()
|
|
45
|
+
* produces a fresh cursor over the same pre-sorted data.
|
|
46
|
+
*/
|
|
47
|
+
export function iterableResults(scores: Float32Array, resolveKey: KeyResolver, topK: number): Iterable<ResultItem> {
|
|
48
|
+
const n = scores.length;
|
|
49
|
+
if (n === 0) return [];
|
|
63
50
|
|
|
64
|
-
|
|
65
|
-
|
|
51
|
+
const indices = new Uint32Array(n);
|
|
52
|
+
for (let i = 0; i < n; i++) indices[i] = i;
|
|
53
|
+
indices.sort((a, b) => scores[b] - scores[a]);
|
|
66
54
|
|
|
67
|
-
|
|
68
|
-
get(rank: number): ResultItem {
|
|
69
|
-
if (rank < 0 || rank >= this.length) {
|
|
70
|
-
throw new RangeError(`Rank ${rank} out of bounds [0, ${this.length})`);
|
|
71
|
-
}
|
|
72
|
-
const dbIndex = this.sortedIndices[rank];
|
|
73
|
-
return {
|
|
74
|
-
key: this.resolveKey(dbIndex),
|
|
75
|
-
score: this.scores[dbIndex],
|
|
76
|
-
};
|
|
77
|
-
}
|
|
55
|
+
const k = Math.min(topK, n);
|
|
78
56
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
57
|
+
return {
|
|
58
|
+
[Symbol.iterator](): Iterator<ResultItem> {
|
|
59
|
+
let i = 0;
|
|
60
|
+
return {
|
|
61
|
+
next(): IteratorResult<ResultItem> {
|
|
62
|
+
if (i >= k) return { done: true, value: undefined };
|
|
63
|
+
const idx = indices[i++];
|
|
64
|
+
return {
|
|
65
|
+
done: false,
|
|
66
|
+
value: { key: resolveKey(idx), score: scores[idx] },
|
|
67
|
+
};
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
},
|
|
71
|
+
};
|
|
89
72
|
}
|
package/src/lib/types.ts
CHANGED
|
@@ -36,6 +36,7 @@ export interface SetOptions {
|
|
|
36
36
|
|
|
37
37
|
/**
|
|
38
38
|
* Options for query operations.
|
|
39
|
+
* Returns a plain ResultItem[] array by default.
|
|
39
40
|
*/
|
|
40
41
|
export interface QueryOptions {
|
|
41
42
|
/** Maximum number of results to return. Defaults to all. */
|
|
@@ -43,3 +44,13 @@ export interface QueryOptions {
|
|
|
43
44
|
/** Override normalization for this call. */
|
|
44
45
|
normalize?: boolean;
|
|
45
46
|
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Query options requesting a lazy iterable instead of a materialized array.
|
|
50
|
+
* Keys are resolved only as each item is consumed, saving allocations
|
|
51
|
+
* when the caller doesn't need the full result set.
|
|
52
|
+
*/
|
|
53
|
+
export interface IterableQueryOptions extends QueryOptions {
|
|
54
|
+
/** When true, returns an Iterable<ResultItem> instead of ResultItem[]. */
|
|
55
|
+
iterable: true;
|
|
56
|
+
}
|
package/src/lib/vector-db.ts
CHANGED
|
@@ -15,11 +15,19 @@ import { normalize, searchAll } from "./compute";
|
|
|
15
15
|
import { VectorCapacityExceededError } from "./errors";
|
|
16
16
|
import { decodeLexicon, encodeLexicon } from "./lexicon";
|
|
17
17
|
import { MemoryManager } from "./memory-manager";
|
|
18
|
-
import {
|
|
18
|
+
import type { ResultItem } from "./result-set";
|
|
19
|
+
import { iterableResults, topKResults } from "./result-set";
|
|
19
20
|
import { getSimdWasmBinary } from "./simd-binary";
|
|
20
21
|
import type { StorageProvider } from "./storage";
|
|
21
22
|
import { OPFSStorageProvider } from "./storage";
|
|
22
|
-
import type {
|
|
23
|
+
import type {
|
|
24
|
+
IterableQueryOptions,
|
|
25
|
+
OpenOptions,
|
|
26
|
+
OpenOptionsInternal,
|
|
27
|
+
QueryOptions,
|
|
28
|
+
SetOptions,
|
|
29
|
+
VectorInput,
|
|
30
|
+
} from "./types";
|
|
23
31
|
import { instantiateWasm, type WasmExports } from "./wasm-compute";
|
|
24
32
|
|
|
25
33
|
const VECTORS_FILE = "vectors.bin";
|
|
@@ -183,15 +191,21 @@ export class VectorDB {
|
|
|
183
191
|
|
|
184
192
|
/**
|
|
185
193
|
* Search for the most similar vectors to the given query vector.
|
|
186
|
-
*
|
|
194
|
+
*
|
|
195
|
+
* Default: returns a plain ResultItem[] sorted by descending similarity.
|
|
196
|
+
* With `{ iterable: true }`: returns a lazy Iterable<ResultItem> where keys
|
|
197
|
+
* are resolved only as each item is consumed.
|
|
187
198
|
*/
|
|
188
|
-
query(value: VectorInput, options
|
|
199
|
+
query(value: VectorInput, options: IterableQueryOptions): Iterable<ResultItem>;
|
|
200
|
+
query(value: VectorInput, options?: QueryOptions): ResultItem[];
|
|
201
|
+
query(value: VectorInput, options?: QueryOptions | IterableQueryOptions): ResultItem[] | Iterable<ResultItem> {
|
|
189
202
|
this.assertOpen();
|
|
190
203
|
|
|
191
204
|
const k = options?.topK ?? this.size;
|
|
205
|
+
const iterable = options && "iterable" in options && options.iterable;
|
|
192
206
|
|
|
193
207
|
if (this.size === 0) {
|
|
194
|
-
return
|
|
208
|
+
return [];
|
|
195
209
|
}
|
|
196
210
|
|
|
197
211
|
if (value.length !== this.dimensions) {
|
|
@@ -248,7 +262,10 @@ export class VectorDB {
|
|
|
248
262
|
return slotToKey[slotIndex];
|
|
249
263
|
};
|
|
250
264
|
|
|
251
|
-
|
|
265
|
+
if (iterable) {
|
|
266
|
+
return iterableResults(scores, resolveKey, k);
|
|
267
|
+
}
|
|
268
|
+
return topKResults(scores, resolveKey, k);
|
|
252
269
|
}
|
|
253
270
|
|
|
254
271
|
/**
|