@omendb/omendb 0.0.26 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +13 -10
  2. package/index.d.ts +94 -159
  3. package/index.js +12 -30
  4. package/package.json +6 -6
package/README.md CHANGED
@@ -5,7 +5,7 @@ Fast embedded vector database with HNSW indexing for Node.js and Bun.
5
5
  ## Installation
6
6
 
7
7
  ```bash
8
- npm install omendb
8
+ npm install @omendb/omendb
9
9
  ```
10
10
 
11
11
  ## Quick Start
@@ -191,28 +191,31 @@ db.setWithText([
191
191
  ]);
192
192
  ```
193
193
 
194
- #### `db.textSearch(query, k)`
194
+ #### `db.searchText(query, k)`
195
195
 
196
196
  BM25 text-only search.
197
197
 
198
198
  ```typescript
199
- const results = db.textSearch("machine learning", 10);
199
+ const results = db.searchText("machine learning", 10);
200
200
  // [{ id, score, metadata }, ...]
201
201
  ```
202
202
 
203
- #### `db.hybridSearch(options)`
203
+ #### `db.searchHybrid(queryVector, queryText, k, options?)`
204
204
 
205
- Combined vector + text search.
205
+ Combined vector + text search using Reciprocal Rank Fusion.
206
206
 
207
207
  ```typescript
208
- const results = db.hybridSearch({
209
- vector: queryVector,
210
- text: "machine learning",
211
- k: 10,
208
+ // Basic
209
+ const results = db.searchHybrid(queryVector, "machine learning", 10);
210
+
211
+ // With options
212
+ const results = db.searchHybrid(queryVector, "machine learning", 10, {
212
213
  alpha: 0.7, // 0=text only, 1=vector only (default: 0.5)
214
+ rrfK: 60, // RRF constant (default: 60)
215
+ filter: { category: "ml" },
213
216
  subscores: true, // Include separate scores
214
217
  });
215
- // [{ id, score, metadata, keyword_score?, semantic_score? }, ...]
218
+ // [{ id, score, metadata, keywordScore?, semanticScore? }, ...]
216
219
  ```
217
220
 
218
221
  ### Collections
package/index.d.ts CHANGED
@@ -1,6 +1,17 @@
1
1
  /* auto-generated by NAPI-RS */
2
2
  /* eslint-disable */
3
3
  export declare class VectorDatabase {
4
+ /**
5
+ * Get or create a named collection.
6
+ *
7
+ * Collection handles share state - changes made through one handle
8
+ * are immediately visible through another (no flush required).
9
+ */
10
+ collection(name: string, embeddingFn?: ((texts: string[]) => Float32Array[]) | undefined): VectorDatabase
11
+ /** List all collections. */
12
+ collections(): Array<string>
13
+ /** Delete a collection. */
14
+ deleteCollection(name: string): void
4
15
  /**
5
16
  * Insert or update vectors.
6
17
  *
@@ -14,45 +25,7 @@ export declare class VectorDatabase {
14
25
  * @param items - Array of {id, vector, metadata?, text?} or {id, vectors, metadata?}
15
26
  * @returns Number of vectors inserted/updated
16
27
  */
17
- set(items: Array<SetItem>): number
18
- /**
19
- * Search for k nearest neighbors.
20
- *
21
- * @param query - Query vector (number[] or Float32Array)
22
- * @param k - Number of results to return
23
- * @param options - Optional search options: {filter?, ef?, maxDistance?}
24
- * @returns Array of {id, distance, score, metadata}
25
- *
26
- * @example
27
- * ```javascript
28
- * // Basic search
29
- * db.search([1, 0, 0, 0], 10);
30
- *
31
- * // With options
32
- * db.search([1, 0, 0, 0], 10, { filter: { category: "A" }, ef: 200 });
33
- * db.search([1, 0, 0, 0], 10, { maxDistance: 0.5 });
34
- * ```
35
- */
36
- search(query: Array<number> | Float32Array, k: number, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): Array<SearchResult>
37
- /**
38
- * Search multi-vector store with query tokens.
39
- *
40
- * Internal method used by unified search() for multi-vector stores.
41
- *
42
- * @param query - Query tokens (number[][] or Float32Array[])
43
- * @param k - Number of results to return
44
- * @param rerank - Enable MaxSim reranking for better quality (default: true)
45
- * @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
46
- * @returns Array of {id, distance, metadata}
47
- */
48
- searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
49
- /**
50
- * Batch search with parallel execution (async).
51
- *
52
- * Runs searches in parallel using rayon on a blocking thread pool,
53
- * keeping the Node.js event loop free.
54
- */
55
- searchBatch(queries: Array<Array<number> | Float32Array>, k: number, ef?: number | undefined | null): Promise<Array<Array<SearchResult>>>
28
+ set(items: Array<SetItem>): Promise<number>
56
29
  /** Get a vector by ID. */
57
30
  get(id: string): GetResult | null
58
31
  /**
@@ -145,6 +118,8 @@ export declare class VectorDatabase {
145
118
  get dimensions(): number
146
119
  /** Check if this is a multi-vector store. */
147
120
  get isMultiVector(): boolean
121
+ /** Check if an embedding function is configured. */
122
+ get hasEmbeddingFn(): boolean
148
123
  /** Check if database is empty. */
149
124
  isEmpty(): boolean
150
125
  /** Get database statistics. */
@@ -153,63 +128,6 @@ export declare class VectorDatabase {
153
128
  get efSearch(): number
154
129
  /** Set ef_search value. */
155
130
  set efSearch(efSearch: number)
156
- /**
157
- * Get or create a named collection.
158
- *
159
- * Collection handles share state - changes made through one handle
160
- * are immediately visible through another (no flush required).
161
- */
162
- collection(name: string): VectorDatabase
163
- /** List all collections. */
164
- collections(): Array<string>
165
- /** Delete a collection. */
166
- deleteCollection(name: string): void
167
- /**
168
- * Check if text search is enabled.
169
- *
170
- * Text search is automatically enabled when using set() with text field.
171
- */
172
- get hasTextSearch(): boolean
173
- /**
174
- * Search using text only (BM25 scoring).
175
- *
176
- * @param query - Text query
177
- * @param k - Number of results
178
- * @returns Array of {id, score, metadata}
179
- */
180
- searchText(query: string, k: number): Array<TextSearchResult>
181
- /**
182
- * Hybrid search combining vector similarity and text relevance.
183
- *
184
- * Uses Reciprocal Rank Fusion (RRF) to combine HNSW and BM25 results.
185
- *
186
- * @param queryVector - Query embedding
187
- * @param queryText - Text query for BM25
188
- * @param k - Number of results
189
- * @param options - Optional: {filter?, alpha?, rrfK?, subscores?}
190
- * @returns Array of {id, score, metadata, keywordScore?, semanticScore?}
191
- *
192
- * @example
193
- * ```javascript
194
- * // Basic hybrid search
195
- * db.searchHybrid([1, 0, 0, 0], "machine learning", 10);
196
- *
197
- * // With options
198
- * db.searchHybrid([1, 0, 0, 0], "query", 10, {
199
- * filter: { type: "ml" },
200
- * alpha: 0.7,
201
- * rrfK: 60,
202
- * subscores: true
203
- * });
204
- * ```
205
- */
206
- searchHybrid(queryVector: Array<number> | Float32Array, queryText: string, k: number, options?: { filter?: Record<string, unknown>; alpha?: number; rrfK?: number; subscores?: boolean } | undefined): Array<HybridSearchResult>
207
- /**
208
- * Flush pending changes to disk.
209
- *
210
- * For hybrid search, this commits text index changes.
211
- */
212
- flush(): void
213
131
  /**
214
132
  * Compact the database by removing deleted records and reclaiming space.
215
133
  *
@@ -272,39 +190,98 @@ export declare class VectorDatabase {
272
190
  */
273
191
  exists(id: string): boolean
274
192
  /**
275
- * Alias for exists() - check if an ID exists in the database.
193
+ * Get multiple vectors by ID.
276
194
  *
277
- * @param id - Vector ID to check
278
- * @returns true if ID exists and is not deleted
195
+ * Batch version of get(). More efficient than calling get() in a loop.
196
+ *
197
+ * @param ids - Array of vector IDs to retrieve
198
+ * @returns Array of results in same order as input, null for missing IDs
199
+ */
200
+ getBatch(ids: Array<string>): Array<GetResult | undefined | null>
201
+ /**
202
+ * Check if text search is enabled.
203
+ *
204
+ * Text search is automatically enabled when using set() with text field.
205
+ */
206
+ get hasTextSearch(): boolean
207
+ /**
208
+ * Search using text only (BM25 scoring).
209
+ *
210
+ * @param query - Text query
211
+ * @param k - Number of results
212
+ * @returns Array of {id, score, metadata}
213
+ */
214
+ searchText(query: string, k: number): Array<TextSearchResult>
215
+ /**
216
+ * Hybrid search combining vector similarity and text relevance.
217
+ *
218
+ * Uses Reciprocal Rank Fusion (RRF) to combine HNSW and BM25 results.
219
+ *
220
+ * @param queryVector - Query embedding
221
+ * @param queryText - Text query for BM25
222
+ * @param k - Number of results
223
+ * @param options - Optional: {filter?, alpha?, rrfK?, subscores?}
224
+ * @returns Array of {id, score, metadata, keywordScore?, semanticScore?}
225
+ *
226
+ * @example
227
+ * ```javascript
228
+ * // Basic hybrid search
229
+ * db.searchHybrid([1, 0, 0, 0], "machine learning", 10);
230
+ *
231
+ * // With options
232
+ * db.searchHybrid([1, 0, 0, 0], "query", 10, {
233
+ * filter: { type: "ml" },
234
+ * alpha: 0.7,
235
+ * rrfK: 60,
236
+ * subscores: true
237
+ * });
238
+ * ```
279
239
  */
280
- has(id: string): boolean
240
+ searchHybrid(queryVector: Array<number> | Float32Array | string, queryText: string | undefined | null, k: number, options?: { filter?: Record<string, unknown>; alpha?: number; rrfK?: number; subscores?: boolean } | undefined): Promise<Array<HybridSearchResult>>
281
241
  /**
282
- * Search for the single nearest neighbor.
242
+ * Flush pending changes to disk.
283
243
  *
284
- * Convenience method that returns the top result or null if no matches.
244
+ * For hybrid search, this commits text index changes.
245
+ */
246
+ flush(): void
247
+ /**
248
+ * Search for k nearest neighbors.
285
249
  *
286
250
  * @param query - Query vector (number[] or Float32Array)
251
+ * @param k - Number of results to return
287
252
  * @param options - Optional search options: {filter?, ef?, maxDistance?}
288
- * @returns Single result or null
253
+ * @returns Array of {id, distance, score, metadata}
289
254
  *
290
255
  * @example
291
256
  * ```javascript
292
- * const nearest = db.searchOne([1, 0, 0, 0]);
293
- * if (nearest) {
294
- * console.log(`Found: ${nearest.id} at distance ${nearest.distance}`);
295
- * }
257
+ * // Basic search
258
+ * db.search([1, 0, 0, 0], 10);
259
+ *
260
+ * // With options
261
+ * db.search([1, 0, 0, 0], 10, { filter: { category: "A" }, ef: 200 });
262
+ * db.search([1, 0, 0, 0], 10, { maxDistance: 0.5 });
296
263
  * ```
297
264
  */
298
- searchOne(query: Array<number> | Float32Array, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): SearchResult | null
265
+ search(query: Array<number> | Float32Array | string, k: number, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): Promise<Array<SearchResult>>
299
266
  /**
300
- * Get multiple vectors by ID.
267
+ * Search multi-vector store with query tokens.
301
268
  *
302
- * Batch version of get(). More efficient than calling get() in a loop.
269
+ * Internal method used by unified search() for multi-vector stores.
303
270
  *
304
- * @param ids - Array of vector IDs to retrieve
305
- * @returns Array of results in same order as input, null for missing IDs
271
+ * @param query - Query tokens (number[][] or Float32Array[])
272
+ * @param k - Number of results to return
273
+ * @param rerank - Enable MaxSim reranking for better quality (default: true)
274
+ * @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
275
+ * @returns Array of {id, distance, metadata}
306
276
  */
307
- getBatch(ids: Array<string>): Array<GetResult | undefined | null>
277
+ searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
278
+ /**
279
+ * Batch search with parallel execution (async).
280
+ *
281
+ * Runs searches in parallel using rayon on a blocking thread pool,
282
+ * keeping the Node.js event loop free.
283
+ */
284
+ searchBatch(queries: Array<Array<number> | Float32Array>, k: number, ef?: number | undefined | null): Promise<Array<Array<SearchResult>>>
308
285
  }
309
286
 
310
287
  export interface GetResult {
@@ -323,14 +300,6 @@ export interface HybridSearchResult {
323
300
  semanticScore?: number
324
301
  }
325
302
 
326
- export interface MultiVectorItem {
327
- id: string
328
- /** Multi-vector data as array of Float32Arrays */
329
- vectors: Float32Array[]
330
- /** Optional metadata */
331
- metadata?: Record<string, unknown> | undefined
332
- }
333
-
334
303
  /**
335
304
  * Open or create a vector database.
336
305
  *
@@ -357,16 +326,9 @@ export interface MultiVectorItem {
357
326
  * quantization: true // or "sq8"
358
327
  * });
359
328
  *
360
- * // Quantization with custom rescore settings
361
- * const db = omendb.open("./mydb", {
362
- * dimensions: 128,
363
- * quantization: true,
364
- * rescore: false, // Disable rescore for max speed
365
- * oversample: 5.0 // Or increase oversample for better recall
366
- * });
367
329
  * ```
368
330
  */
369
- export declare function open(path: string, options?: OpenOptions | undefined | null): VectorDatabase
331
+ export declare function open(path: string, options?: OpenOptions | undefined | null, embeddingFn?: ((texts: string[]) => Float32Array[]) | undefined): VectorDatabase
370
332
 
371
333
  /**
372
334
  * Configuration options for opening a vector database.
@@ -376,9 +338,7 @@ export declare function open(path: string, options?: OpenOptions | undefined | n
376
338
  * - m: 16 (HNSW neighbors per node, higher = better recall, more memory)
377
339
  * - efConstruction: 100 (build quality, higher = better graph, slower build)
378
340
  * - efSearch: 100 (search quality, higher = better recall, slower search)
379
- * - quantization: null (true/"sq8" for 4x compression, ~99% recall)
380
- * - rescore: true when quantization enabled (rerank candidates with exact distance)
381
- * - oversample: 3.0 (fetch k*oversample candidates when rescoring)
341
+ * - quantization: null (true/"sq8" for 4x compression)
382
342
  * - metric: "l2" (distance metric: "l2", "euclidean", "cosine", "dot", "ip")
383
343
  */
384
344
  export interface OpenOptions {
@@ -396,16 +356,6 @@ export interface OpenOptions {
396
356
  * - false/null: Full precision (no quantization)
397
357
  */
398
358
  quantization?: boolean | string | number | null | undefined
399
- /**
400
- * Rescore candidates with exact distance (default: true when quantization enabled)
401
- * Set to false for maximum speed at the cost of ~20% recall
402
- */
403
- rescore?: boolean
404
- /**
405
- * Oversampling factor for rescoring (default: 3.0)
406
- * Fetches k*oversample candidates then reranks to return top k
407
- */
408
- oversample?: number
409
359
  /** Distance metric: "l2"/"euclidean" (default), "cosine", "dot"/"ip" */
410
360
  metric?: string
411
361
  /**
@@ -437,6 +387,8 @@ export interface SetItem {
437
387
  metadata?: Record<string, unknown> | undefined
438
388
  /** Optional text for hybrid search (auto-enables text search, stored in metadata.text) */
439
389
  text?: string
390
+ /** Optional document for auto-embedding via embeddingFn */
391
+ document?: string
440
392
  }
441
393
 
442
394
  export interface StatsResult {
@@ -450,20 +402,3 @@ export interface TextSearchResult {
450
402
  score: number
451
403
  metadata: Record<string, unknown>
452
404
  }
453
-
454
- export interface VectorItem {
455
- id: string
456
- /** Vector data as Float32Array */
457
- vector: Float32Array
458
- /** Optional metadata */
459
- metadata?: Record<string, unknown> | undefined
460
- /** Optional document text (stored in metadata.document) */
461
- document?: string
462
- }
463
-
464
- export interface VectorItemWithText {
465
- id: string
466
- vector: Float32Array
467
- text: string
468
- metadata?: Record<string, unknown> | undefined
469
- }
package/index.js CHANGED
@@ -111,6 +111,10 @@ function toFloat32Array(arr) {
111
111
 
112
112
  // Convert VectorItem to use Float32Array
113
113
  function convertVectorItem(item) {
114
+ // Items with document field are handled by native embedding
115
+ if (item.document !== undefined && item.document !== null) {
116
+ return item;
117
+ }
114
118
  if (item.vector === undefined || item.vector === null) {
115
119
  if (Array.isArray(item.vectors)) {
116
120
  throw new Error(
@@ -141,11 +145,6 @@ function convertMultiVectorItem(item) {
141
145
  };
142
146
  }
143
147
 
144
- // Check if items contain multi-vector data (vectors field must be an array)
145
- function isMultiVectorItem(item) {
146
- return Array.isArray(item.vectors);
147
- }
148
-
149
148
  // Wrap VectorDatabase to handle array conversion
150
149
  const NativeVectorDatabase = nativeBinding.VectorDatabase;
151
150
 
@@ -201,17 +200,6 @@ class VectorDatabase {
201
200
  }
202
201
  }
203
202
 
204
- /**
205
- * Search for the single nearest neighbor.
206
- *
207
- * @param {number[]|Float32Array} query - Query vector
208
- * @param {object} [options] - Search options: {filter?, ef?, maxDistance?}
209
- * @returns {{id: string, distance: number, score: number, metadata: object}|null}
210
- */
211
- searchOne(query, options) {
212
- return this._native.searchOne(query, options);
213
- }
214
-
215
203
  searchBatch(queries, k, ef) {
216
204
  return this._native.searchBatch(queries, k, ef);
217
205
  }
@@ -254,6 +242,10 @@ class VectorDatabase {
254
242
  return this._native.isMultiVector;
255
243
  }
256
244
 
245
+ get hasEmbeddingFn() {
246
+ return this._native.hasEmbeddingFn;
247
+ }
248
+
257
249
  isEmpty() {
258
250
  return this._native.isEmpty();
259
251
  }
@@ -270,8 +262,8 @@ class VectorDatabase {
270
262
  this._native.efSearch = value;
271
263
  }
272
264
 
273
- collection(name) {
274
- return new VectorDatabase(this._native.collection(name));
265
+ collection(name, embeddingFn) {
266
+ return new VectorDatabase(this._native.collection(name, embeddingFn));
275
267
  }
276
268
 
277
269
  collections() {
@@ -338,16 +330,6 @@ class VectorDatabase {
338
330
  return this._native.exists(id);
339
331
  }
340
332
 
341
- /**
342
- * Alias for exists() - check if an ID exists in the database.
343
- *
344
- * @param {string} id - Vector ID to check
345
- * @returns {boolean}
346
- */
347
- has(id) {
348
- return this._native.has(id);
349
- }
350
-
351
333
  getBatch(ids) {
352
334
  return this._native.getBatch(ids);
353
335
  }
@@ -357,8 +339,8 @@ class VectorDatabase {
357
339
  }
358
340
  }
359
341
 
360
- function open(path, options) {
361
- return new VectorDatabase(nativeBinding.open(path, options));
342
+ function open(path, options, embeddingFn) {
343
+ return new VectorDatabase(nativeBinding.open(path, options, embeddingFn));
362
344
  }
363
345
 
364
346
  module.exports.VectorDatabase = VectorDatabase;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@omendb/omendb",
3
- "version": "0.0.26",
3
+ "version": "0.0.27",
4
4
  "description": "Fast embedded vector database with HNSW + ACORN-1 filtered search",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -50,10 +50,10 @@
50
50
  "omendb.node"
51
51
  ],
52
52
  "optionalDependencies": {
53
- "@omendb/omendb-darwin-x64": "0.0.26",
54
- "@omendb/omendb-darwin-arm64": "0.0.26",
55
- "@omendb/omendb-linux-x64-gnu": "0.0.26",
56
- "@omendb/omendb-linux-arm64-gnu": "0.0.26",
57
- "@omendb/omendb-win32-x64-msvc": "0.0.26"
53
+ "@omendb/omendb-darwin-x64": "0.0.27",
54
+ "@omendb/omendb-darwin-arm64": "0.0.27",
55
+ "@omendb/omendb-linux-x64-gnu": "0.0.27",
56
+ "@omendb/omendb-linux-arm64-gnu": "0.0.27",
57
+ "@omendb/omendb-win32-x64-msvc": "0.0.27"
58
58
  }
59
59
  }