@omendb/omendb 0.0.23 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +320 -18
- package/index.d.ts +72 -9
- package/index.js +95 -5
- package/package.json +7 -7
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@ Fast embedded vector database with HNSW indexing for Node.js and Bun.
|
|
|
8
8
|
npm install omendb
|
|
9
9
|
```
|
|
10
10
|
|
|
11
|
-
##
|
|
11
|
+
## Quick Start
|
|
12
12
|
|
|
13
13
|
```typescript
|
|
14
14
|
import { open } from "omendb";
|
|
@@ -26,57 +26,359 @@ db.set([
|
|
|
26
26
|
{
|
|
27
27
|
id: "doc2",
|
|
28
28
|
vector: new Float32Array(384).fill(0.2),
|
|
29
|
-
metadata: {
|
|
29
|
+
metadata: { category: "news" },
|
|
30
30
|
},
|
|
31
31
|
]);
|
|
32
32
|
|
|
33
33
|
// Search
|
|
34
|
-
const results = db.search(new Float32Array(384).fill(0.15),
|
|
35
|
-
console.log(results);
|
|
34
|
+
const results = db.search(new Float32Array(384).fill(0.15), 5);
|
|
36
35
|
// [{ id: 'doc1', distance: 0.05, metadata: { title: 'Hello' } }, ...]
|
|
37
36
|
|
|
38
37
|
// Batch search (async, parallel)
|
|
39
|
-
const batchResults = await db.searchBatch(queries,
|
|
38
|
+
const batchResults = await db.searchBatch(queries, 10);
|
|
39
|
+
|
|
40
|
+
// Close when done (releases file locks)
|
|
41
|
+
db.close();
|
|
40
42
|
```
|
|
41
43
|
|
|
42
44
|
## Features
|
|
43
45
|
|
|
44
46
|
- HNSW indexing for fast approximate nearest neighbor search
|
|
45
47
|
- ACORN-1 filtered search
|
|
46
|
-
-
|
|
48
|
+
- SQ8 quantization (4x compression, ~99% recall)
|
|
49
|
+
- Hybrid search (vector + BM25 text)
|
|
47
50
|
- Collections for multi-tenancy
|
|
48
51
|
- Persistent storage with auto-save
|
|
49
52
|
- Works with Node.js 18+ and Bun
|
|
50
53
|
|
|
51
54
|
## API
|
|
52
55
|
|
|
53
|
-
###
|
|
56
|
+
### Opening a Database
|
|
57
|
+
|
|
58
|
+
```typescript
|
|
59
|
+
import { open } from "omendb";
|
|
60
|
+
|
|
61
|
+
// Basic
|
|
62
|
+
const db = open("./vectors", { dimensions: 384 });
|
|
63
|
+
|
|
64
|
+
// In-memory
|
|
65
|
+
const memDb = open(":memory:", { dimensions: 128 });
|
|
54
66
|
|
|
55
|
-
|
|
67
|
+
// Full options
|
|
68
|
+
const db = open("./vectors", {
|
|
69
|
+
dimensions: 768,
|
|
70
|
+
m: 16, // HNSW connections per node (default: 16)
|
|
71
|
+
efConstruction: 100, // Build quality (default: 100)
|
|
72
|
+
efSearch: 100, // Search quality (default: 100)
|
|
73
|
+
quantization: true, // SQ8: 4x compression, ~99% recall
|
|
74
|
+
metric: "cosine", // "l2", "cosine", or "dot"
|
|
75
|
+
});
|
|
76
|
+
```
|
|
56
77
|
|
|
57
|
-
|
|
58
|
-
- `options.dimensions`: Vector dimensionality (default: 128)
|
|
78
|
+
### Core Operations
|
|
59
79
|
|
|
60
|
-
|
|
80
|
+
#### `db.set(items)`
|
|
61
81
|
|
|
62
82
|
Insert or update vectors.
|
|
63
83
|
|
|
64
|
-
|
|
84
|
+
```typescript
|
|
85
|
+
db.set([
|
|
86
|
+
{ id: "doc1", vector: Float32Array, metadata?: object },
|
|
87
|
+
{ id: "doc2", vector: Float32Array, metadata?: object },
|
|
88
|
+
]);
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
#### `db.get(id)`
|
|
92
|
+
|
|
93
|
+
Get a vector by ID.
|
|
94
|
+
|
|
95
|
+
```typescript
|
|
96
|
+
const item = db.get("doc1");
|
|
97
|
+
// { id: "doc1", vector: Float32Array, metadata: {...} } or null
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
#### `db.getBatch(ids)`
|
|
101
|
+
|
|
102
|
+
Get multiple vectors by ID.
|
|
103
|
+
|
|
104
|
+
```typescript
|
|
105
|
+
const items = db.getBatch(["doc1", "doc2"]);
|
|
106
|
+
// [{ id, vector, metadata } | null, ...]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
#### `db.update(id, options)`
|
|
110
|
+
|
|
111
|
+
Update a vector's data and/or metadata.
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
114
|
+
db.update("doc1", {
|
|
115
|
+
vector: newVector, // Optional
|
|
116
|
+
metadata: { title: "New" }, // Optional
|
|
117
|
+
});
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
#### `db.delete(ids)`
|
|
121
|
+
|
|
122
|
+
Delete vectors by ID.
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
const deleted = db.delete(["doc1", "doc2"]);
|
|
126
|
+
// Returns number deleted
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
#### `db.deleteByFilter(filter)`
|
|
130
|
+
|
|
131
|
+
Delete vectors matching a filter.
|
|
132
|
+
|
|
133
|
+
```typescript
|
|
134
|
+
const deleted = db.deleteByFilter({ category: "old" });
|
|
135
|
+
const deleted = db.deleteByFilter({
|
|
136
|
+
$and: [{ type: "draft" }, { age: { $gt: 30 } }],
|
|
137
|
+
});
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
### Search
|
|
141
|
+
|
|
142
|
+
#### `db.search(query, k, options?)`
|
|
65
143
|
|
|
66
144
|
Search for k nearest neighbors (sync).
|
|
67
145
|
|
|
68
|
-
|
|
146
|
+
```typescript
|
|
147
|
+
const results = db.search(queryVector, 10); // Basic
|
|
148
|
+
const results = db.search(queryVector, 10, {
|
|
149
|
+
ef: 200, // Search quality (higher = better recall)
|
|
150
|
+
filter: { category: "news" }, // Metadata filter
|
|
151
|
+
maxDistance: 0.5, // Distance threshold
|
|
152
|
+
});
|
|
153
|
+
// [{ id, distance, metadata }, ...]
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### `db.searchBatch(queries, k, ef?)`
|
|
69
157
|
|
|
70
158
|
Batch search with parallel execution (async).
|
|
71
159
|
|
|
72
|
-
|
|
160
|
+
```typescript
|
|
161
|
+
const results = await db.searchBatch(queries, 10, 100);
|
|
162
|
+
// [[{ id, distance, metadata }, ...], ...]
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Text & Hybrid Search
|
|
73
166
|
|
|
74
|
-
|
|
167
|
+
#### `db.enableTextSearch(bufferMb?)`
|
|
75
168
|
|
|
76
|
-
|
|
169
|
+
Enable text indexing for hybrid search.
|
|
77
170
|
|
|
78
|
-
|
|
171
|
+
```typescript
|
|
172
|
+
db.enableTextSearch(); // Default 64MB buffer
|
|
173
|
+
db.enableTextSearch(128); // Custom buffer size
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
#### `db.hasTextSearch`
|
|
177
|
+
|
|
178
|
+
Check if text search is enabled.
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
if (db.hasTextSearch) { ... }
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
#### `db.setWithText(items)`
|
|
185
|
+
|
|
186
|
+
Insert vectors with text content.
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
db.setWithText([
|
|
190
|
+
{ id: "doc1", vector: vec, text: "Machine learning tutorial", metadata: {...} }
|
|
191
|
+
]);
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
#### `db.textSearch(query, k)`
|
|
195
|
+
|
|
196
|
+
BM25 text-only search.
|
|
197
|
+
|
|
198
|
+
```typescript
|
|
199
|
+
const results = db.textSearch("machine learning", 10);
|
|
200
|
+
// [{ id, score, metadata }, ...]
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
#### `db.hybridSearch(options)`
|
|
204
|
+
|
|
205
|
+
Combined vector + text search.
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
const results = db.hybridSearch({
|
|
209
|
+
vector: queryVector,
|
|
210
|
+
text: "machine learning",
|
|
211
|
+
k: 10,
|
|
212
|
+
alpha: 0.7, // 0=text only, 1=vector only (default: 0.5)
|
|
213
|
+
subscores: true, // Include separate scores
|
|
214
|
+
});
|
|
215
|
+
// [{ id, score, metadata, keyword_score?, semantic_score? }, ...]
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
### Collections
|
|
219
|
+
|
|
220
|
+
#### `db.collection(name)`
|
|
221
|
+
|
|
222
|
+
Get or create a named collection.
|
|
223
|
+
|
|
224
|
+
```typescript
|
|
225
|
+
const users = db.collection("users");
|
|
226
|
+
users.set([...]);
|
|
227
|
+
users.search(query, 5);
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
#### `db.collections()`
|
|
231
|
+
|
|
232
|
+
List all collections.
|
|
233
|
+
|
|
234
|
+
```typescript
|
|
235
|
+
const names = db.collections();
|
|
236
|
+
// ["users", "products", ...]
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
#### `db.deleteCollection(name)`
|
|
240
|
+
|
|
241
|
+
Delete a collection.
|
|
242
|
+
|
|
243
|
+
```typescript
|
|
244
|
+
db.deleteCollection("old_collection");
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### Properties
|
|
248
|
+
|
|
249
|
+
```typescript
|
|
250
|
+
db.length; // Number of vectors
|
|
251
|
+
db.dimensions; // Vector dimensionality
|
|
252
|
+
db.efSearch; // Get/set search quality parameter
|
|
253
|
+
|
|
254
|
+
db.efSearch = 200; // Tune for better recall
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
### Utility Methods
|
|
258
|
+
|
|
259
|
+
#### `db.count(filter?)`
|
|
260
|
+
|
|
261
|
+
Count vectors, optionally with filter.
|
|
262
|
+
|
|
263
|
+
```typescript
|
|
264
|
+
const total = db.count();
|
|
265
|
+
const filtered = db.count({ category: "news" });
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
#### `db.isEmpty()`
|
|
269
|
+
|
|
270
|
+
Check if database is empty.
|
|
271
|
+
|
|
272
|
+
#### `db.exists(id)`
|
|
273
|
+
|
|
274
|
+
Check if an ID exists.
|
|
275
|
+
|
|
276
|
+
```typescript
|
|
277
|
+
if (db.exists("doc1")) { ... }
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
#### `db.ids()`
|
|
281
|
+
|
|
282
|
+
Get all vector IDs.
|
|
283
|
+
|
|
284
|
+
```typescript
|
|
285
|
+
const allIds = db.ids();
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
#### `db.items()`
|
|
289
|
+
|
|
290
|
+
Get all vectors with metadata.
|
|
291
|
+
|
|
292
|
+
```typescript
|
|
293
|
+
const allItems = db.items();
|
|
294
|
+
// [{ id, vector, metadata }, ...]
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
#### `db.stats()`
|
|
298
|
+
|
|
299
|
+
Get index statistics.
|
|
300
|
+
|
|
301
|
+
```typescript
|
|
302
|
+
const stats = db.stats();
|
|
303
|
+
// { numVectors, dimensions, maxLevel, avgNeighborsL0, ... }
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
### Persistence
|
|
307
|
+
|
|
308
|
+
#### `db.flush()`
|
|
309
|
+
|
|
310
|
+
Force write pending changes to disk.
|
|
311
|
+
|
|
312
|
+
```typescript
|
|
313
|
+
db.flush();
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
#### `db.compact()`
|
|
317
|
+
|
|
318
|
+
Remove deleted records and reclaim space.
|
|
319
|
+
|
|
320
|
+
```typescript
|
|
321
|
+
const removed = db.compact();
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
#### `db.optimize()`
|
|
325
|
+
|
|
326
|
+
Reorder graph for better cache locality (6-40% speedup).
|
|
327
|
+
|
|
328
|
+
```typescript
|
|
329
|
+
const reordered = db.optimize();
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
#### `db.close()`
|
|
333
|
+
|
|
334
|
+
Close database and release file locks.
|
|
335
|
+
|
|
336
|
+
```typescript
|
|
337
|
+
db.close();
|
|
338
|
+
// Can now reopen the same path
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
#### `db.mergeFrom(other)`
|
|
342
|
+
|
|
343
|
+
Merge another database into this one.
|
|
344
|
+
|
|
345
|
+
```typescript
|
|
346
|
+
const merged = db.mergeFrom(otherDb);
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
### Filter Operators
|
|
350
|
+
|
|
351
|
+
```typescript
|
|
352
|
+
// Equality
|
|
353
|
+
{ field: "value" } // Shorthand
|
|
354
|
+
{ field: { $eq: "value" } } // Explicit
|
|
355
|
+
|
|
356
|
+
// Comparison
|
|
357
|
+
{ field: { $ne: "value" } } // Not equal
|
|
358
|
+
{ field: { $gt: 10 } } // Greater than
|
|
359
|
+
{ field: { $gte: 10 } } // Greater or equal
|
|
360
|
+
{ field: { $lt: 10 } } // Less than
|
|
361
|
+
{ field: { $lte: 10 } } // Less or equal
|
|
362
|
+
|
|
363
|
+
// Membership
|
|
364
|
+
{ field: { $in: ["a", "b"] } } // In list
|
|
365
|
+
{ field: { $nin: ["a", "b"] } } // Not in list
|
|
366
|
+
|
|
367
|
+
// Logical
|
|
368
|
+
{ $and: [{...}, {...}] } // AND
|
|
369
|
+
{ $or: [{...}, {...}] } // OR
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
## Performance
|
|
373
|
+
|
|
374
|
+
**10K vectors, 128D, M=16, ef=100. Measured 2026-01-20 (Apple M3 Max):**
|
|
375
|
+
|
|
376
|
+
| Metric | Value |
|
|
377
|
+
| ---------- | ------------ |
|
|
378
|
+
| Search QPS | 11,542 |
|
|
379
|
+
| Build | 30,826 vec/s |
|
|
380
|
+
| Recall@10 | 89.7% |
|
|
79
381
|
|
|
80
382
|
## License
|
|
81
383
|
|
|
82
|
-
|
|
384
|
+
[Elastic License 2.0](../LICENSE)
|
package/index.d.ts
CHANGED
|
@@ -4,9 +4,14 @@ export declare class VectorDatabase {
|
|
|
4
4
|
/**
|
|
5
5
|
* Insert or update vectors.
|
|
6
6
|
*
|
|
7
|
-
*
|
|
7
|
+
* Works for both single-vector and multi-vector stores:
|
|
8
|
+
* - Single-vector: items have `vector` field
|
|
9
|
+
* - Multi-vector: items have `vectors` field (array of vectors)
|
|
10
|
+
*
|
|
11
|
+
* @param items - Array of {id, vector, metadata?} or {id, vectors, metadata?}
|
|
12
|
+
* @returns Array of internal indices
|
|
8
13
|
*/
|
|
9
|
-
set(items: Array<
|
|
14
|
+
set(items: Array<SetItem>): Array<number>
|
|
10
15
|
/**
|
|
11
16
|
* Search for k nearest neighbors.
|
|
12
17
|
*
|
|
@@ -18,6 +23,18 @@ export declare class VectorDatabase {
|
|
|
18
23
|
* @returns Array of {id, distance, metadata}
|
|
19
24
|
*/
|
|
20
25
|
search(query: Array<number> | Float32Array, k: number, ef?: number | undefined | null, filter?: Record<string, unknown> | undefined, maxDistance?: number | undefined | null): Array<SearchResult>
|
|
26
|
+
/**
|
|
27
|
+
* Search multi-vector store with query tokens.
|
|
28
|
+
*
|
|
29
|
+
* Internal method used by unified search() for multi-vector stores.
|
|
30
|
+
*
|
|
31
|
+
* @param query - Query tokens (number[][] or Float32Array[])
|
|
32
|
+
* @param k - Number of results to return
|
|
33
|
+
* @param rerank - Enable MaxSim reranking for better quality (default: true)
|
|
34
|
+
* @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
|
|
35
|
+
* @returns Array of {id, distance, metadata}
|
|
36
|
+
*/
|
|
37
|
+
searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
|
|
21
38
|
/**
|
|
22
39
|
* Batch search with parallel execution (async).
|
|
23
40
|
*
|
|
@@ -83,6 +100,8 @@ export declare class VectorDatabase {
|
|
|
83
100
|
get length(): number
|
|
84
101
|
/** Get vector dimensions of this database. */
|
|
85
102
|
get dimensions(): number
|
|
103
|
+
/** Check if this is a multi-vector store. */
|
|
104
|
+
get isMultiVector(): boolean
|
|
86
105
|
/** Check if database is empty. */
|
|
87
106
|
isEmpty(): boolean
|
|
88
107
|
/** Get database statistics. */
|
|
@@ -146,6 +165,24 @@ export declare class VectorDatabase {
|
|
|
146
165
|
* For hybrid search, this commits text index changes.
|
|
147
166
|
*/
|
|
148
167
|
flush(): void
|
|
168
|
+
/**
|
|
169
|
+
* Compact the database by removing deleted records and reclaiming space.
|
|
170
|
+
*
|
|
171
|
+
* This operation removes tombstoned records, reassigns indices to be
|
|
172
|
+
* contiguous, and rebuilds the search index. Call after bulk deletes
|
|
173
|
+
* to reclaim memory and improve search performance.
|
|
174
|
+
*
|
|
175
|
+
* @returns Number of deleted records that were removed
|
|
176
|
+
*
|
|
177
|
+
* @example
|
|
178
|
+
* ```typescript
|
|
179
|
+
* // After bulk delete
|
|
180
|
+
* db.delete(staleIds);
|
|
181
|
+
* const removed = db.compact();
|
|
182
|
+
* console.log(`Removed ${removed} deleted records`);
|
|
183
|
+
* ```
|
|
184
|
+
*/
|
|
185
|
+
compact(): number
|
|
149
186
|
/**
|
|
150
187
|
* Close the database and release file locks.
|
|
151
188
|
*
|
|
@@ -216,6 +253,14 @@ export interface HybridSearchResult {
|
|
|
216
253
|
semanticScore?: number
|
|
217
254
|
}
|
|
218
255
|
|
|
256
|
+
export interface MultiVectorItem {
|
|
257
|
+
id: string
|
|
258
|
+
/** Multi-vector data as array of Float32Arrays */
|
|
259
|
+
vectors: Float32Array[]
|
|
260
|
+
/** Optional metadata */
|
|
261
|
+
metadata?: Record<string, unknown> | undefined
|
|
262
|
+
}
|
|
263
|
+
|
|
219
264
|
/**
|
|
220
265
|
* Open or create a vector database.
|
|
221
266
|
*
|
|
@@ -236,16 +281,16 @@ export interface HybridSearchResult {
|
|
|
236
281
|
* efSearch: 150
|
|
237
282
|
* });
|
|
238
283
|
*
|
|
239
|
-
* // With
|
|
284
|
+
* // With SQ8 quantization (4x memory reduction, ~99% recall)
|
|
240
285
|
* const db = omendb.open("./mydb", {
|
|
241
286
|
* dimensions: 128,
|
|
242
|
-
* quantization:
|
|
287
|
+
* quantization: true // or "sq8"
|
|
243
288
|
* });
|
|
244
289
|
*
|
|
245
290
|
* // Quantization with custom rescore settings
|
|
246
291
|
* const db = omendb.open("./mydb", {
|
|
247
292
|
* dimensions: 128,
|
|
248
|
-
* quantization:
|
|
293
|
+
* quantization: true,
|
|
249
294
|
* rescore: false, // Disable rescore for max speed
|
|
250
295
|
* oversample: 5.0 // Or increase oversample for better recall
|
|
251
296
|
* });
|
|
@@ -261,7 +306,7 @@ export declare function open(path: string, options?: OpenOptions | undefined | n
|
|
|
261
306
|
* - m: 16 (HNSW neighbors per node, higher = better recall, more memory)
|
|
262
307
|
* - efConstruction: 100 (build quality, higher = better graph, slower build)
|
|
263
308
|
* - efSearch: 100 (search quality, higher = better recall, slower search)
|
|
264
|
-
* - quantization: null (
|
|
309
|
+
* - quantization: null (true/"sq8" for 4x compression, ~99% recall)
|
|
265
310
|
* - rescore: true when quantization enabled (rerank candidates with exact distance)
|
|
266
311
|
* - oversample: 3.0 (fetch k*oversample candidates when rescoring)
|
|
267
312
|
* - metric: "l2" (distance metric: "l2", "euclidean", "cosine", "dot", "ip")
|
|
@@ -278,9 +323,7 @@ export interface OpenOptions {
|
|
|
278
323
|
/**
|
|
279
324
|
* Quantization mode (default: null = no quantization)
|
|
280
325
|
* - true or "sq8": SQ8 4x compression, ~99% recall (RECOMMENDED)
|
|
281
|
-
* -
|
|
282
|
-
* - "binary": Binary 32x compression, ~95% recall
|
|
283
|
-
* - 2, 4, 8: RaBitQ with specific bits (legacy)
|
|
326
|
+
* - false/null: Full precision (no quantization)
|
|
284
327
|
*/
|
|
285
328
|
quantization?: boolean | string | number | null | undefined
|
|
286
329
|
/**
|
|
@@ -295,6 +338,14 @@ export interface OpenOptions {
|
|
|
295
338
|
oversample?: number
|
|
296
339
|
/** Distance metric: "l2"/"euclidean" (default), "cosine", "dot"/"ip" */
|
|
297
340
|
metric?: string
|
|
341
|
+
/**
|
|
342
|
+
* Enable multi-vector mode for ColBERT-style retrieval
|
|
343
|
+
* - true: Enable with default config (repetitions=8, partition_bits=4, dProj=16)
|
|
344
|
+
* - { repetitions?, partitionBits?, seed?, dProj? }: Custom config
|
|
345
|
+
* - dProj: Dimension projection (16 = 8x smaller FDE, null = full token dim)
|
|
346
|
+
* - false/null: Disabled (default, single-vector mode)
|
|
347
|
+
*/
|
|
348
|
+
multiVector?: boolean | { repetitions?: number; partitionBits?: number; seed?: number; dProj?: number | null } | null | undefined
|
|
298
349
|
}
|
|
299
350
|
|
|
300
351
|
export interface SearchResult {
|
|
@@ -304,6 +355,18 @@ export interface SearchResult {
|
|
|
304
355
|
metadata: Record<string, unknown>
|
|
305
356
|
}
|
|
306
357
|
|
|
358
|
+
export interface SetItem {
|
|
359
|
+
id: string
|
|
360
|
+
/** Single vector data (for regular stores) */
|
|
361
|
+
vector?: Float32Array
|
|
362
|
+
/** Multi-vector data (for multi-vector stores) */
|
|
363
|
+
vectors?: Float32Array[] | undefined
|
|
364
|
+
/** Optional metadata */
|
|
365
|
+
metadata?: Record<string, unknown> | undefined
|
|
366
|
+
/** Optional document text (stored in metadata.document) */
|
|
367
|
+
document?: string
|
|
368
|
+
}
|
|
369
|
+
|
|
307
370
|
export interface StatsResult {
|
|
308
371
|
dimensions: number
|
|
309
372
|
count: number
|
package/index.js
CHANGED
|
@@ -111,12 +111,41 @@ function toFloat32Array(arr) {
|
|
|
111
111
|
|
|
112
112
|
// Convert VectorItem to use Float32Array
|
|
113
113
|
function convertVectorItem(item) {
|
|
114
|
+
if (item.vector === undefined || item.vector === null) {
|
|
115
|
+
if (Array.isArray(item.vectors)) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
`Item '${item.id}' has 'vectors' field but store is single-vector. Use multiVector: true when opening the database.`,
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
throw new Error(`Item '${item.id}' missing required 'vector' field`);
|
|
121
|
+
}
|
|
114
122
|
return {
|
|
115
123
|
...item,
|
|
116
124
|
vector: toFloat32Array(item.vector),
|
|
117
125
|
};
|
|
118
126
|
}
|
|
119
127
|
|
|
128
|
+
// Convert MultiVectorItem to use Float32Arrays
|
|
129
|
+
function convertMultiVectorItem(item) {
|
|
130
|
+
if (!Array.isArray(item.vectors)) {
|
|
131
|
+
if (item.vector !== undefined) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
`Item '${item.id}' has 'vector' field but store is multi-vector. Use 'vectors' field (array of vectors) instead.`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
throw new Error(`Item '${item.id}' missing required 'vectors' field`);
|
|
137
|
+
}
|
|
138
|
+
return {
|
|
139
|
+
...item,
|
|
140
|
+
vectors: item.vectors.map(toFloat32Array),
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Check if items contain multi-vector data (vectors field must be an array)
|
|
145
|
+
function isMultiVectorItem(item) {
|
|
146
|
+
return Array.isArray(item.vectors);
|
|
147
|
+
}
|
|
148
|
+
|
|
120
149
|
// Wrap VectorDatabase to handle array conversion
|
|
121
150
|
const NativeVectorDatabase = nativeBinding.VectorDatabase;
|
|
122
151
|
|
|
@@ -125,12 +154,69 @@ class VectorDatabase {
|
|
|
125
154
|
this._native = nativeDb;
|
|
126
155
|
}
|
|
127
156
|
|
|
157
|
+
/**
|
|
158
|
+
* Insert or update vectors.
|
|
159
|
+
*
|
|
160
|
+
* Works for both single-vector and multi-vector stores:
|
|
161
|
+
* - Single-vector: items have `vector` field
|
|
162
|
+
* - Multi-vector: items have `vectors` field (array of vectors)
|
|
163
|
+
*
|
|
164
|
+
* @param {Array<{id: string, vector?: Float32Array|number[], vectors?: Float32Array[]|number[][], metadata?: object}>} items
|
|
165
|
+
* @returns {number[]} Array of internal indices
|
|
166
|
+
*/
|
|
128
167
|
set(items) {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
168
|
+
if (!Array.isArray(items)) {
|
|
169
|
+
throw new Error("set() requires an array of items");
|
|
170
|
+
}
|
|
171
|
+
if (items.length === 0) {
|
|
172
|
+
return [];
|
|
173
|
+
}
|
|
174
|
+
// Unified set() handles both single and multi-vector via native set()
|
|
175
|
+
return this._native.set(items.map(this._native.isMultiVector ? convertMultiVectorItem : convertVectorItem));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Search for k nearest neighbors.
|
|
180
|
+
*
|
|
181
|
+
* Works for both single-vector and multi-vector stores:
|
|
182
|
+
* - Single-vector: query is number[] or Float32Array
|
|
183
|
+
* - Multi-vector: query is number[][] or Float32Array[]
|
|
184
|
+
*
|
|
185
|
+
* @param {number[]|Float32Array|number[][]|Float32Array[]} query - Query vector(s)
|
|
186
|
+
* @param {number} k - Number of results to return
|
|
187
|
+
* @param {object} [options] - Search options
|
|
188
|
+
* @param {number} [options.ef] - Search width override (single-vector only)
|
|
189
|
+
* @param {object} [options.filter] - Metadata filter (single-vector only)
|
|
190
|
+
* @param {number} [options.maxDistance] - Max distance threshold (single-vector only)
|
|
191
|
+
* @param {boolean} [options.rerank] - Enable MaxSim reranking (multi-vector, default: true)
|
|
192
|
+
* @param {number} [options.rerankFactor] - Rerank candidate multiplier (multi-vector, default: 4)
|
|
193
|
+
* @returns {Array<{id: string, distance: number, metadata: object}>}
|
|
194
|
+
*/
|
|
195
|
+
search(query, k, options) {
|
|
196
|
+
if (this._native.isMultiVector) {
|
|
197
|
+
// Multi-vector store
|
|
198
|
+
const rerank = options?.rerank;
|
|
199
|
+
const rerankFactor = options?.rerankFactor;
|
|
200
|
+
return this._native.searchMulti(query, k, rerank, rerankFactor);
|
|
201
|
+
} else {
|
|
202
|
+
// Single-vector store - support both old positional args and new options object
|
|
203
|
+
if (typeof options === "object" && options !== null && !Array.isArray(options)) {
|
|
204
|
+
// New options object style
|
|
205
|
+
return this._native.search(
|
|
206
|
+
query,
|
|
207
|
+
k,
|
|
208
|
+
options.ef,
|
|
209
|
+
options.filter,
|
|
210
|
+
options.maxDistance,
|
|
211
|
+
);
|
|
212
|
+
} else {
|
|
213
|
+
// Legacy positional args: search(query, k, ef, filter, maxDistance)
|
|
214
|
+
const ef = options; // 3rd arg was ef in old API
|
|
215
|
+
const filter = arguments[3];
|
|
216
|
+
const maxDistance = arguments[4];
|
|
217
|
+
return this._native.search(query, k, ef, filter, maxDistance);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
134
220
|
}
|
|
135
221
|
|
|
136
222
|
searchBatch(queries, k, ef) {
|
|
@@ -165,6 +251,10 @@ class VectorDatabase {
|
|
|
165
251
|
return this._native.dimensions;
|
|
166
252
|
}
|
|
167
253
|
|
|
254
|
+
get isMultiVector() {
|
|
255
|
+
return this._native.isMultiVector;
|
|
256
|
+
}
|
|
257
|
+
|
|
168
258
|
isEmpty() {
|
|
169
259
|
return this._native.isEmpty();
|
|
170
260
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@omendb/omendb",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.25",
|
|
4
4
|
"description": "Fast embedded vector database with HNSW + ACORN-1 filtered search",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"types": "index.d.ts",
|
|
@@ -41,7 +41,7 @@
|
|
|
41
41
|
"test": "vitest run"
|
|
42
42
|
},
|
|
43
43
|
"devDependencies": {
|
|
44
|
-
"@napi-rs/cli": "^3.5.
|
|
44
|
+
"@napi-rs/cli": "^3.5.1",
|
|
45
45
|
"vitest": "^2.1.0"
|
|
46
46
|
},
|
|
47
47
|
"files": [
|
|
@@ -50,10 +50,10 @@
|
|
|
50
50
|
"omendb.node"
|
|
51
51
|
],
|
|
52
52
|
"optionalDependencies": {
|
|
53
|
-
"@omendb/omendb-darwin-x64": "0.0.
|
|
54
|
-
"@omendb/omendb-darwin-arm64": "0.0.
|
|
55
|
-
"@omendb/omendb-linux-x64-gnu": "0.0.
|
|
56
|
-
"@omendb/omendb-linux-arm64-gnu": "0.0.
|
|
57
|
-
"@omendb/omendb-win32-x64-msvc": "0.0.
|
|
53
|
+
"@omendb/omendb-darwin-x64": "0.0.25",
|
|
54
|
+
"@omendb/omendb-darwin-arm64": "0.0.25",
|
|
55
|
+
"@omendb/omendb-linux-x64-gnu": "0.0.25",
|
|
56
|
+
"@omendb/omendb-linux-arm64-gnu": "0.0.25",
|
|
57
|
+
"@omendb/omendb-win32-x64-msvc": "0.0.25"
|
|
58
58
|
}
|
|
59
59
|
}
|