npm - @omendb/omendb - Versions diffs - 0.0.22 → 0.0.24 - Mend

@omendb/omendb 0.0.22 → 0.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -8,7 +8,7 @@ Fast embedded vector database with HNSW indexing for Node.js and Bun.
 npm install omendb
 ```
-## Usage
+## Quick Start
 ```typescript
 import { open } from "omendb";
@@ -26,57 +26,359 @@ db.set([
   {
     id: "doc2",
     vector: new Float32Array(384).fill(0.2),
-    metadata: { title: "World" },
+    metadata: { category: "news" },
   },
 ]);
 // Search
 const results = db.search(new Float32Array(384).fill(0.15), { k: 5 });
-console.log(results);
 // [{ id: 'doc1', distance: 0.05, metadata: { title: 'Hello' } }, ...]
 // Batch search (async, parallel)
 const batchResults = await db.searchBatch(queries, { k: 10 });
+// Close when done (releases file locks)
+db.close();
 ```
 ## Features
 - HNSW indexing for fast approximate nearest neighbor search
 - ACORN-1 filtered search
-- RaBitQ compression (2/4/8-bit quantization)
+- SQ8 quantization (4x compression, ~99% recall)
+- Hybrid search (vector + BM25 text)
 - Collections for multi-tenancy
 - Persistent storage with auto-save
 - Works with Node.js 18+ and Bun
 ## API
-### `open(path, options?)`
+### Opening a Database
+```typescript
+import { open } from "omendb";
+// Basic
+const db = open("./vectors", { dimensions: 384 });
+// In-memory
+const memDb = open(":memory:", { dimensions: 128 });
-Open or create a vector database.
+// Full options
+const db = open("./vectors", {
+  dimensions: 768,
+  m: 16, // HNSW connections per node (default: 16)
+  efConstruction: 100, // Build quality (default: 100)
+  efSearch: 100, // Search quality (default: 100)
+  quantization: true, // SQ8: 4x compression, ~99% recall
+  metric: "cosine", // "l2", "cosine", or "dot"
+});
+```
-- `path`: Database directory path
-- `options.dimensions`: Vector dimensionality (default: 128)
+### Core Operations
-### `db.set(items)`
+#### `db.set(items)`
 Insert or update vectors.
-### `db.search(query, options)`
+```typescript
+db.set([
+  { id: "doc1", vector: Float32Array, metadata?: object },
+  { id: "doc2", vector: Float32Array, metadata?: object },
+]);
+```
+#### `db.get(id)`
+Get a vector by ID.
+```typescript
+const item = db.get("doc1");
+// { id: "doc1", vector: Float32Array, metadata: {...} } or null
+```
+#### `db.getBatch(ids)`
+Get multiple vectors by ID.
+```typescript
+const items = db.getBatch(["doc1", "doc2"]);
+// [{ id, vector, metadata } | null, ...]
+```
+#### `db.update(id, options)`
+Update a vector's data and/or metadata.
+```typescript
+db.update("doc1", {
+  vector: newVector, // Optional
+  metadata: { title: "New" }, // Optional
+});
+```
+#### `db.delete(ids)`
+Delete vectors by ID.
+```typescript
+const deleted = db.delete(["doc1", "doc2"]);
+// Returns number deleted
+```
+#### `db.deleteByFilter(filter)`
+Delete vectors matching a filter.
+```typescript
+const deleted = db.deleteByFilter({ category: "old" });
+const deleted = db.deleteByFilter({
+  $and: [{ type: "draft" }, { age: { $gt: 30 } }],
+});
+```
+### Search
+#### `db.search(query, options)`
 Search for k nearest neighbors (sync).
-### `db.searchBatch(queries, options)`
+```typescript
+const results = db.search(queryVector, {
+  k: 10, // Number of results
+  ef: 200, // Search quality (higher = better recall)
+  filter: { category: "news" }, // Metadata filter
+  maxDistance: 0.5, // Distance threshold
+});
+// [{ id, distance, metadata }, ...]
+```
+#### `db.searchBatch(queries, options)`
 Batch search with parallel execution (async).
-### `db.get(id)`
+```typescript
+const results = await db.searchBatch(queries, { k: 10, ef: 100 });
+// [[{ id, distance, metadata }, ...], ...]
+```
+### Text & Hybrid Search
-Get a vector by ID.
+#### `db.enableTextSearch(bufferMb?)`
-### `db.delete(ids)`
+Enable text indexing for hybrid search.
-Delete vectors by ID.
+```typescript
+db.enableTextSearch(); // Default 64MB buffer
+db.enableTextSearch(128); // Custom buffer size
+```
+#### `db.hasTextSearch`
+Check if text search is enabled.
+```typescript
+if (db.hasTextSearch) { ... }
+```
+#### `db.setWithText(items)`
+Insert vectors with text content.
+```typescript
+db.setWithText([
+  { id: "doc1", vector: vec, text: "Machine learning tutorial", metadata: {...} }
+]);
+```
+#### `db.textSearch(query, k)`
+BM25 text-only search.
+```typescript
+const results = db.textSearch("machine learning", 10);
+// [{ id, score, metadata }, ...]
+```
+#### `db.hybridSearch(options)`
+Combined vector + text search.
+```typescript
+const results = db.hybridSearch({
+  vector: queryVector,
+  text: "machine learning",
+  k: 10,
+  alpha: 0.7, // 0=text only, 1=vector only (default: 0.5)
+  subscores: true, // Include separate scores
+});
+// [{ id, score, metadata, keyword_score?, semantic_score? }, ...]
+```
+### Collections
+#### `db.collection(name)`
+Get or create a named collection.
+```typescript
+const users = db.collection("users");
+users.set([...]);
+users.search(query, { k: 5 });
+```
+#### `db.collections()`
+List all collections.
+```typescript
+const names = db.collections();
+// ["users", "products", ...]
+```
+#### `db.deleteCollection(name)`
+Delete a collection.
+```typescript
+db.deleteCollection("old_collection");
+```
+### Properties
+```typescript
+db.length; // Number of vectors
+db.dimensions; // Vector dimensionality
+db.efSearch; // Get/set search quality parameter
+db.efSearch = 200; // Tune for better recall
+```
+### Utility Methods
+#### `db.count(filter?)`
+Count vectors, optionally with filter.
+```typescript
+const total = db.count();
+const filtered = db.count({ category: "news" });
+```
+#### `db.isEmpty()`
+Check if database is empty.
+#### `db.exists(id)`
+Check if an ID exists.
+```typescript
+if (db.exists("doc1")) { ... }
+```
+#### `db.ids()`
+Get all vector IDs.
+```typescript
+const allIds = db.ids();
+```
+#### `db.items()`
+Get all vectors with metadata.
+```typescript
+const allItems = db.items();
+// [{ id, vector, metadata }, ...]
+```
+#### `db.stats()`
+Get index statistics.
+```typescript
+const stats = db.stats();
+// { numVectors, dimensions, maxLevel, avgNeighborsL0, ... }
+```
+### Persistence
+#### `db.flush()`
+Force write pending changes to disk.
+```typescript
+db.flush();
+```
+#### `db.compact()`
+Remove deleted records and reclaim space.
+```typescript
+const removed = db.compact();
+```
+#### `db.optimize()`
+Reorder graph for better cache locality (6-40% speedup).
+```typescript
+const reordered = db.optimize();
+```
+#### `db.close()`
+Close database and release file locks.
+```typescript
+db.close();
+// Can now reopen the same path
+```
+#### `db.mergeFrom(other)`
+Merge another database into this one.
+```typescript
+const merged = db.mergeFrom(otherDb);
+```
+### Filter Operators
+```typescript
+// Equality
+{ field: "value" }                    // Shorthand
+{ field: { $eq: "value" } }           // Explicit
+// Comparison
+{ field: { $ne: "value" } }           // Not equal
+{ field: { $gt: 10 } }                // Greater than
+{ field: { $gte: 10 } }               // Greater or equal
+{ field: { $lt: 10 } }                // Less than
+{ field: { $lte: 10 } }               // Less or equal
+// Membership
+{ field: { $in: ["a", "b"] } }        // In list
+{ field: { $nin: ["a", "b"] } }       // Not in list
+// Logical
+{ $and: [{...}, {...}] }              // AND
+{ $or: [{...}, {...}] }               // OR
+```
+## Performance
+**10K vectors, 128D, M=16, ef=100. Measured 2026-01-20 (Apple M3 Max):**
+| Metric     | Value     |
+| ---------- | --------- |
+| Search QPS | 11,542    |
+| Build      | 30,826 vec/s |
+| Recall@10  | 89.7%     |
 ## License
-Apache-2.0
+[Elastic License 2.0](../LICENSE)

package/index.d.ts CHANGED Viewed

@@ -146,6 +146,24 @@ export declare class VectorDatabase {
    * For hybrid search, this commits text index changes.
    */
   flush(): void
+  /**
+   * Compact the database by removing deleted records and reclaiming space.
+   *
+   * This operation removes tombstoned records, reassigns indices to be
+   * contiguous, and rebuilds the search index. Call after bulk deletes
+   * to reclaim memory and improve search performance.
+   *
+   * @returns Number of deleted records that were removed
+   *
+   * @example
+   * ```typescript
+   * // After bulk delete
+   * db.delete(staleIds);
+   * const removed = db.compact();
+   * console.log(`Removed ${removed} deleted records`);
+   * ```
+   */
+  compact(): number
   /**
    * Close the database and release file locks.
    *
@@ -236,16 +254,16 @@ export interface HybridSearchResult {
  *   efSearch: 150
  * });
  *
- * // With RaBitQ quantization (8x memory reduction)
+ * // With SQ8 quantization (4x memory reduction, ~99% recall)
  * const db = omendb.open("./mydb", {
  *   dimensions: 128,
- *   quantization: 4  // 4-bit quantization
+ *   quantization: true  // or "sq8"
  * });
  *
  * // Quantization with custom rescore settings
  * const db = omendb.open("./mydb", {
  *   dimensions: 128,
- *   quantization: 4,
+ *   quantization: true,
  *   rescore: false,    // Disable rescore for max speed
  *   oversample: 5.0    // Or increase oversample for better recall
  * });
@@ -261,7 +279,7 @@ export declare function open(path: string, options?: OpenOptions | undefined | n
  * - m: 16 (HNSW neighbors per node, higher = better recall, more memory)
  * - efConstruction: 100 (build quality, higher = better graph, slower build)
  * - efSearch: 100 (search quality, higher = better recall, slower search)
- * - quantization: null (RaBitQ bit width: 2, 4, or 8 for compression)
+ * - quantization: null (true/"sq8" for 4x compression, ~99% recall)
  * - rescore: true when quantization enabled (rerank candidates with exact distance)
  * - oversample: 3.0 (fetch k*oversample candidates when rescoring)
  * - metric: "l2" (distance metric: "l2", "euclidean", "cosine", "dot", "ip")
@@ -278,9 +296,7 @@ export interface OpenOptions {
   /**
    * Quantization mode (default: null = no quantization)
    * - true or "sq8": SQ8 4x compression, ~99% recall (RECOMMENDED)
-   * - "rabitq": RaBitQ 8x compression, ~98% recall
-   * - "binary": Binary 32x compression, ~95% recall
-   * - 2, 4, 8: RaBitQ with specific bits (legacy)
+   * - false/null: Full precision (no quantization)
    */
   quantization?: boolean | string | number | null | undefined
   /**

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@omendb/omendb",
-  "version": "0.0.22",
+  "version": "0.0.24",
   "description": "Fast embedded vector database with HNSW + ACORN-1 filtered search",
   "main": "index.js",
   "types": "index.d.ts",
@@ -50,10 +50,10 @@
     "omendb.node"
   ],
   "optionalDependencies": {
-    "@omendb/omendb-darwin-x64": "0.0.22",
-    "@omendb/omendb-darwin-arm64": "0.0.22",
-    "@omendb/omendb-linux-x64-gnu": "0.0.22",
-    "@omendb/omendb-linux-arm64-gnu": "0.0.22",
-    "@omendb/omendb-win32-x64-msvc": "0.0.22"
+    "@omendb/omendb-darwin-x64": "0.0.24",
+    "@omendb/omendb-darwin-arm64": "0.0.24",
+    "@omendb/omendb-linux-x64-gnu": "0.0.24",
+    "@omendb/omendb-linux-arm64-gnu": "0.0.24",
+    "@omendb/omendb-win32-x64-msvc": "0.0.24"
   }
 }