@omendb/omendb 0.0.22 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +318 -16
  2. package/index.d.ts +23 -7
  3. package/package.json +6 -6
package/README.md CHANGED
@@ -8,7 +8,7 @@ Fast embedded vector database with HNSW indexing for Node.js and Bun.
8
8
  npm install omendb
9
9
  ```
10
10
 
11
- ## Usage
11
+ ## Quick Start
12
12
 
13
13
  ```typescript
14
14
  import { open } from "omendb";
@@ -26,57 +26,359 @@ db.set([
26
26
  {
27
27
  id: "doc2",
28
28
  vector: new Float32Array(384).fill(0.2),
29
- metadata: { title: "World" },
29
+ metadata: { category: "news" },
30
30
  },
31
31
  ]);
32
32
 
33
33
  // Search
34
34
  const results = db.search(new Float32Array(384).fill(0.15), { k: 5 });
35
- console.log(results);
36
35
  // [{ id: 'doc1', distance: 0.05, metadata: { title: 'Hello' } }, ...]
37
36
 
38
37
  // Batch search (async, parallel)
39
38
  const batchResults = await db.searchBatch(queries, { k: 10 });
39
+
40
+ // Close when done (releases file locks)
41
+ db.close();
40
42
  ```
41
43
 
42
44
  ## Features
43
45
 
44
46
  - HNSW indexing for fast approximate nearest neighbor search
45
47
  - ACORN-1 filtered search
46
- - RaBitQ compression (2/4/8-bit quantization)
48
+ - SQ8 quantization (4x compression, ~99% recall)
49
+ - Hybrid search (vector + BM25 text)
47
50
  - Collections for multi-tenancy
48
51
  - Persistent storage with auto-save
49
52
  - Works with Node.js 18+ and Bun
50
53
 
51
54
  ## API
52
55
 
53
- ### `open(path, options?)`
56
+ ### Opening a Database
57
+
58
+ ```typescript
59
+ import { open } from "omendb";
60
+
61
+ // Basic
62
+ const db = open("./vectors", { dimensions: 384 });
63
+
64
+ // In-memory
65
+ const memDb = open(":memory:", { dimensions: 128 });
54
66
 
55
- Open or create a vector database.
67
+ // Full options
68
+ const db = open("./vectors", {
69
+ dimensions: 768,
70
+ m: 16, // HNSW connections per node (default: 16)
71
+ efConstruction: 100, // Build quality (default: 100)
72
+ efSearch: 100, // Search quality (default: 100)
73
+ quantization: true, // SQ8: 4x compression, ~99% recall
74
+ metric: "cosine", // "l2", "cosine", or "dot"
75
+ });
76
+ ```
56
77
 
57
- - `path`: Database directory path
58
- - `options.dimensions`: Vector dimensionality (default: 128)
78
+ ### Core Operations
59
79
 
60
- ### `db.set(items)`
80
+ #### `db.set(items)`
61
81
 
62
82
  Insert or update vectors.
63
83
 
64
- ### `db.search(query, options)`
84
+ ```typescript
85
+ db.set([
86
+ { id: "doc1", vector: Float32Array, metadata?: object },
87
+ { id: "doc2", vector: Float32Array, metadata?: object },
88
+ ]);
89
+ ```
90
+
91
+ #### `db.get(id)`
92
+
93
+ Get a vector by ID.
94
+
95
+ ```typescript
96
+ const item = db.get("doc1");
97
+ // { id: "doc1", vector: Float32Array, metadata: {...} } or null
98
+ ```
99
+
100
+ #### `db.getBatch(ids)`
101
+
102
+ Get multiple vectors by ID.
103
+
104
+ ```typescript
105
+ const items = db.getBatch(["doc1", "doc2"]);
106
+ // [{ id, vector, metadata } | null, ...]
107
+ ```
108
+
109
+ #### `db.update(id, options)`
110
+
111
+ Update a vector's data and/or metadata.
112
+
113
+ ```typescript
114
+ db.update("doc1", {
115
+ vector: newVector, // Optional
116
+ metadata: { title: "New" }, // Optional
117
+ });
118
+ ```
119
+
120
+ #### `db.delete(ids)`
121
+
122
+ Delete vectors by ID.
123
+
124
+ ```typescript
125
+ const deleted = db.delete(["doc1", "doc2"]);
126
+ // Returns number deleted
127
+ ```
128
+
129
+ #### `db.deleteByFilter(filter)`
130
+
131
+ Delete vectors matching a filter.
132
+
133
+ ```typescript
134
+ const deleted = db.deleteByFilter({ category: "old" });
135
+ const deleted = db.deleteByFilter({
136
+ $and: [{ type: "draft" }, { age: { $gt: 30 } }],
137
+ });
138
+ ```
139
+
140
+ ### Search
141
+
142
+ #### `db.search(query, options)`
65
143
 
66
144
  Search for k nearest neighbors (sync).
67
145
 
68
- ### `db.searchBatch(queries, options)`
146
+ ```typescript
147
+ const results = db.search(queryVector, {
148
+ k: 10, // Number of results
149
+ ef: 200, // Search quality (higher = better recall)
150
+ filter: { category: "news" }, // Metadata filter
151
+ maxDistance: 0.5, // Distance threshold
152
+ });
153
+ // [{ id, distance, metadata }, ...]
154
+ ```
155
+
156
+ #### `db.searchBatch(queries, options)`
69
157
 
70
158
  Batch search with parallel execution (async).
71
159
 
72
- ### `db.get(id)`
160
+ ```typescript
161
+ const results = await db.searchBatch(queries, { k: 10, ef: 100 });
162
+ // [[{ id, distance, metadata }, ...], ...]
163
+ ```
164
+
165
+ ### Text & Hybrid Search
73
166
 
74
- Get a vector by ID.
167
+ #### `db.enableTextSearch(bufferMb?)`
75
168
 
76
- ### `db.delete(ids)`
169
+ Enable text indexing for hybrid search.
77
170
 
78
- Delete vectors by ID.
171
+ ```typescript
172
+ db.enableTextSearch(); // Default 64MB buffer
173
+ db.enableTextSearch(128); // Custom buffer size
174
+ ```
175
+
176
+ #### `db.hasTextSearch`
177
+
178
+ Check if text search is enabled.
179
+
180
+ ```typescript
181
+ if (db.hasTextSearch) { ... }
182
+ ```
183
+
184
+ #### `db.setWithText(items)`
185
+
186
+ Insert vectors with text content.
187
+
188
+ ```typescript
189
+ db.setWithText([
190
+ { id: "doc1", vector: vec, text: "Machine learning tutorial", metadata: {...} }
191
+ ]);
192
+ ```
193
+
194
+ #### `db.textSearch(query, k)`
195
+
196
+ BM25 text-only search.
197
+
198
+ ```typescript
199
+ const results = db.textSearch("machine learning", 10);
200
+ // [{ id, score, metadata }, ...]
201
+ ```
202
+
203
+ #### `db.hybridSearch(options)`
204
+
205
+ Combined vector + text search.
206
+
207
+ ```typescript
208
+ const results = db.hybridSearch({
209
+ vector: queryVector,
210
+ text: "machine learning",
211
+ k: 10,
212
+ alpha: 0.7, // 0=text only, 1=vector only (default: 0.5)
213
+ subscores: true, // Include separate scores
214
+ });
215
+ // [{ id, score, metadata, keyword_score?, semantic_score? }, ...]
216
+ ```
217
+
218
+ ### Collections
219
+
220
+ #### `db.collection(name)`
221
+
222
+ Get or create a named collection.
223
+
224
+ ```typescript
225
+ const users = db.collection("users");
226
+ users.set([...]);
227
+ users.search(query, { k: 5 });
228
+ ```
229
+
230
+ #### `db.collections()`
231
+
232
+ List all collections.
233
+
234
+ ```typescript
235
+ const names = db.collections();
236
+ // ["users", "products", ...]
237
+ ```
238
+
239
+ #### `db.deleteCollection(name)`
240
+
241
+ Delete a collection.
242
+
243
+ ```typescript
244
+ db.deleteCollection("old_collection");
245
+ ```
246
+
247
+ ### Properties
248
+
249
+ ```typescript
250
+ db.length; // Number of vectors
251
+ db.dimensions; // Vector dimensionality
252
+ db.efSearch; // Get/set search quality parameter
253
+
254
+ db.efSearch = 200; // Tune for better recall
255
+ ```
256
+
257
+ ### Utility Methods
258
+
259
+ #### `db.count(filter?)`
260
+
261
+ Count vectors, optionally with filter.
262
+
263
+ ```typescript
264
+ const total = db.count();
265
+ const filtered = db.count({ category: "news" });
266
+ ```
267
+
268
+ #### `db.isEmpty()`
269
+
270
+ Check if database is empty.
271
+
272
+ #### `db.exists(id)`
273
+
274
+ Check if an ID exists.
275
+
276
+ ```typescript
277
+ if (db.exists("doc1")) { ... }
278
+ ```
279
+
280
+ #### `db.ids()`
281
+
282
+ Get all vector IDs.
283
+
284
+ ```typescript
285
+ const allIds = db.ids();
286
+ ```
287
+
288
+ #### `db.items()`
289
+
290
+ Get all vectors with metadata.
291
+
292
+ ```typescript
293
+ const allItems = db.items();
294
+ // [{ id, vector, metadata }, ...]
295
+ ```
296
+
297
+ #### `db.stats()`
298
+
299
+ Get index statistics.
300
+
301
+ ```typescript
302
+ const stats = db.stats();
303
+ // { numVectors, dimensions, maxLevel, avgNeighborsL0, ... }
304
+ ```
305
+
306
+ ### Persistence
307
+
308
+ #### `db.flush()`
309
+
310
+ Force write pending changes to disk.
311
+
312
+ ```typescript
313
+ db.flush();
314
+ ```
315
+
316
+ #### `db.compact()`
317
+
318
+ Remove deleted records and reclaim space.
319
+
320
+ ```typescript
321
+ const removed = db.compact();
322
+ ```
323
+
324
+ #### `db.optimize()`
325
+
326
+ Reorder graph for better cache locality (6-40% speedup).
327
+
328
+ ```typescript
329
+ const reordered = db.optimize();
330
+ ```
331
+
332
+ #### `db.close()`
333
+
334
+ Close database and release file locks.
335
+
336
+ ```typescript
337
+ db.close();
338
+ // Can now reopen the same path
339
+ ```
340
+
341
+ #### `db.mergeFrom(other)`
342
+
343
+ Merge another database into this one.
344
+
345
+ ```typescript
346
+ const merged = db.mergeFrom(otherDb);
347
+ ```
348
+
349
+ ### Filter Operators
350
+
351
+ ```typescript
352
+ // Equality
353
+ { field: "value" } // Shorthand
354
+ { field: { $eq: "value" } } // Explicit
355
+
356
+ // Comparison
357
+ { field: { $ne: "value" } } // Not equal
358
+ { field: { $gt: 10 } } // Greater than
359
+ { field: { $gte: 10 } } // Greater or equal
360
+ { field: { $lt: 10 } } // Less than
361
+ { field: { $lte: 10 } } // Less or equal
362
+
363
+ // Membership
364
+ { field: { $in: ["a", "b"] } } // In list
365
+ { field: { $nin: ["a", "b"] } } // Not in list
366
+
367
+ // Logical
368
+ { $and: [{...}, {...}] } // AND
369
+ { $or: [{...}, {...}] } // OR
370
+ ```
371
+
372
+ ## Performance
373
+
374
+ **10K vectors, 128D, M=16, ef=100. Measured 2026-01-20 (Apple M3 Max):**
375
+
376
+ | Metric | Value |
377
+ | ---------- | --------- |
378
+ | Search QPS | 11,542 |
379
+ | Build | 30,826 vec/s |
380
+ | Recall@10 | 89.7% |
79
381
 
80
382
  ## License
81
383
 
82
- Apache-2.0
384
+ [Elastic License 2.0](../LICENSE)
package/index.d.ts CHANGED
@@ -146,6 +146,24 @@ export declare class VectorDatabase {
146
146
  * For hybrid search, this commits text index changes.
147
147
  */
148
148
  flush(): void
149
+ /**
150
+ * Compact the database by removing deleted records and reclaiming space.
151
+ *
152
+ * This operation removes tombstoned records, reassigns indices to be
153
+ * contiguous, and rebuilds the search index. Call after bulk deletes
154
+ * to reclaim memory and improve search performance.
155
+ *
156
+ * @returns Number of deleted records that were removed
157
+ *
158
+ * @example
159
+ * ```typescript
160
+ * // After bulk delete
161
+ * db.delete(staleIds);
162
+ * const removed = db.compact();
163
+ * console.log(`Removed ${removed} deleted records`);
164
+ * ```
165
+ */
166
+ compact(): number
149
167
  /**
150
168
  * Close the database and release file locks.
151
169
  *
@@ -236,16 +254,16 @@ export interface HybridSearchResult {
236
254
  * efSearch: 150
237
255
  * });
238
256
  *
239
- * // With RaBitQ quantization (8x memory reduction)
257
+ * // With SQ8 quantization (4x memory reduction, ~99% recall)
240
258
  * const db = omendb.open("./mydb", {
241
259
  * dimensions: 128,
242
- * quantization: 4 // 4-bit quantization
260
+ * quantization: true // or "sq8"
243
261
  * });
244
262
  *
245
263
  * // Quantization with custom rescore settings
246
264
  * const db = omendb.open("./mydb", {
247
265
  * dimensions: 128,
248
- * quantization: 4,
266
+ * quantization: true,
249
267
  * rescore: false, // Disable rescore for max speed
250
268
  * oversample: 5.0 // Or increase oversample for better recall
251
269
  * });
@@ -261,7 +279,7 @@ export declare function open(path: string, options?: OpenOptions | undefined | n
261
279
  * - m: 16 (HNSW neighbors per node, higher = better recall, more memory)
262
280
  * - efConstruction: 100 (build quality, higher = better graph, slower build)
263
281
  * - efSearch: 100 (search quality, higher = better recall, slower search)
264
- * - quantization: null (RaBitQ bit width: 2, 4, or 8 for compression)
282
+ * - quantization: null (true/"sq8" for 4x compression, ~99% recall)
265
283
  * - rescore: true when quantization enabled (rerank candidates with exact distance)
266
284
  * - oversample: 3.0 (fetch k*oversample candidates when rescoring)
267
285
  * - metric: "l2" (distance metric: "l2", "euclidean", "cosine", "dot", "ip")
@@ -278,9 +296,7 @@ export interface OpenOptions {
278
296
  /**
279
297
  * Quantization mode (default: null = no quantization)
280
298
  * - true or "sq8": SQ8 4x compression, ~99% recall (RECOMMENDED)
281
- * - "rabitq": RaBitQ 8x compression, ~98% recall
282
- * - "binary": Binary 32x compression, ~95% recall
283
- * - 2, 4, 8: RaBitQ with specific bits (legacy)
299
+ * - false/null: Full precision (no quantization)
284
300
  */
285
301
  quantization?: boolean | string | number | null | undefined
286
302
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@omendb/omendb",
3
- "version": "0.0.22",
3
+ "version": "0.0.24",
4
4
  "description": "Fast embedded vector database with HNSW + ACORN-1 filtered search",
5
5
  "main": "index.js",
6
6
  "types": "index.d.ts",
@@ -50,10 +50,10 @@
50
50
  "omendb.node"
51
51
  ],
52
52
  "optionalDependencies": {
53
- "@omendb/omendb-darwin-x64": "0.0.22",
54
- "@omendb/omendb-darwin-arm64": "0.0.22",
55
- "@omendb/omendb-linux-x64-gnu": "0.0.22",
56
- "@omendb/omendb-linux-arm64-gnu": "0.0.22",
57
- "@omendb/omendb-win32-x64-msvc": "0.0.22"
53
+ "@omendb/omendb-darwin-x64": "0.0.24",
54
+ "@omendb/omendb-darwin-arm64": "0.0.24",
55
+ "@omendb/omendb-linux-x64-gnu": "0.0.24",
56
+ "@omendb/omendb-linux-arm64-gnu": "0.0.24",
57
+ "@omendb/omendb-win32-x64-msvc": "0.0.24"
58
58
  }
59
59
  }