npm - @omendb/omendb - Versions diffs - 0.0.26 → 0.0.28 - Mend

@omendb/omendb 0.0.26 → 0.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@ Fast embedded vector database with HNSW indexing for Node.js and Bun.
 ## Installation
 ```bash
-npm install omendb
+npm install @omendb/omendb
 ```
 ## Quick Start
@@ -191,28 +191,31 @@ db.setWithText([
 ]);
 ```
-#### `db.textSearch(query, k)`
+#### `db.searchText(query, k)`
 BM25 text-only search.
 ```typescript
-const results = db.textSearch("machine learning", 10);
+const results = db.searchText("machine learning", 10);
 // [{ id, score, metadata }, ...]
 ```
-#### `db.hybridSearch(options)`
+#### `db.searchHybrid(queryVector, queryText, k, options?)`
-Combined vector + text search.
+Combined vector + text search using Reciprocal Rank Fusion.
 ```typescript
-const results = db.hybridSearch({
-  vector: queryVector,
-  text: "machine learning",
-  k: 10,
+// Basic
+const results = db.searchHybrid(queryVector, "machine learning", 10);
+// With options
+const results = db.searchHybrid(queryVector, "machine learning", 10, {
   alpha: 0.7, // 0=text only, 1=vector only (default: 0.5)
+  rrfK: 60, // RRF constant (default: 60)
+  filter: { category: "ml" },
   subscores: true, // Include separate scores
 });
-// [{ id, score, metadata, keyword_score?, semantic_score? }, ...]
+// [{ id, score, metadata, keywordScore?, semanticScore? }, ...]
 ```
 ### Collections

package/index.d.ts CHANGED Viewed

@@ -1,6 +1,17 @@
 /* auto-generated by NAPI-RS */
 /* eslint-disable */
 export declare class VectorDatabase {
+  /**
+   * Get or create a named collection.
+   *
+   * Collection handles share state - changes made through one handle
+   * are immediately visible through another (no flush required).
+   */
+  collection(name: string, embeddingFn?: ((texts: string[]) => Float32Array[]) | undefined): VectorDatabase
+  /** List all collections. */
+  collections(): Array<string>
+  /** Delete a collection. */
+  deleteCollection(name: string): void
   /**
    * Insert or update vectors.
    *
@@ -14,45 +25,7 @@ export declare class VectorDatabase {
    * @param items - Array of {id, vector, metadata?, text?} or {id, vectors, metadata?}
    * @returns Number of vectors inserted/updated
    */
-  set(items: Array<SetItem>): number
-  /**
-   * Search for k nearest neighbors.
-   *
-   * @param query - Query vector (number[] or Float32Array)
-   * @param k - Number of results to return
-   * @param options - Optional search options: {filter?, ef?, maxDistance?}
-   * @returns Array of {id, distance, score, metadata}
-   *
-   * @example
-   * ```javascript
-   * // Basic search
-   * db.search([1, 0, 0, 0], 10);
-   *
-   * // With options
-   * db.search([1, 0, 0, 0], 10, { filter: { category: "A" }, ef: 200 });
-   * db.search([1, 0, 0, 0], 10, { maxDistance: 0.5 });
-   * ```
-   */
-  search(query: Array<number> | Float32Array, k: number, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): Array<SearchResult>
-  /**
-   * Search multi-vector store with query tokens.
-   *
-   * Internal method used by unified search() for multi-vector stores.
-   *
-   * @param query - Query tokens (number[][] or Float32Array[])
-   * @param k - Number of results to return
-   * @param rerank - Enable MaxSim reranking for better quality (default: true)
-   * @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
-   * @returns Array of {id, distance, metadata}
-   */
-  searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
-  /**
-   * Batch search with parallel execution (async).
-   *
-   * Runs searches in parallel using rayon on a blocking thread pool,
-   * keeping the Node.js event loop free.
-   */
-  searchBatch(queries: Array<Array<number> | Float32Array>, k: number, ef?: number | undefined | null): Promise<Array<Array<SearchResult>>>
+  set(items: Array<SetItem>): Promise<number>
   /** Get a vector by ID. */
   get(id: string): GetResult | null
   /**
@@ -145,6 +118,8 @@ export declare class VectorDatabase {
   get dimensions(): number
   /** Check if this is a multi-vector store. */
   get isMultiVector(): boolean
+  /** Check if an embedding function is configured. */
+  get hasEmbeddingFn(): boolean
   /** Check if database is empty. */
   isEmpty(): boolean
   /** Get database statistics. */
@@ -154,16 +129,81 @@ export declare class VectorDatabase {
   /** Set ef_search value. */
   set efSearch(efSearch: number)
   /**
-   * Get or create a named collection.
+   * Compact the database by removing deleted records and reclaiming space.
    *
-   * Collection handles share state - changes made through one handle
-   * are immediately visible through another (no flush required).
+   * This operation removes tombstoned records, reassigns indices to be
+   * contiguous, and rebuilds the search index. Call after bulk deletes
+   * to reclaim memory and improve search performance.
+   *
+   * @returns Number of deleted records that were removed
+   *
+   * @example
+   * ```typescript
+   * // After bulk delete
+   * db.delete(staleIds);
+   * const removed = db.compact();
+   * console.log(`Removed ${removed} deleted records`);
+   * ```
    */
-  collection(name: string): VectorDatabase
-  /** List all collections. */
-  collections(): Array<string>
-  /** Delete a collection. */
-  deleteCollection(name: string): void
+  compact(): number
+  /**
+   * Close the database and release file locks.
+   *
+   * After calling close(), the database is no longer usable.
+   * Any subsequent operations will fail or return empty results.
+   *
+   * This is useful when you need to reopen the same database path
+   * in the same process, since JavaScript doesn't have deterministic
+   * object destruction like Python's `del`.
+   */
+  close(): void
+  /**
+   * Optimize index for cache-efficient search.
+   *
+   * Reorders nodes for better memory locality, improving search performance by 6-40%.
+   * Call after inserting a large batch of vectors.
+   *
+   * @returns Number of nodes reordered
+   */
+  optimize(): number
+  /**
+   * Merge another database into this one.
+   *
+   * @param other - Source database to merge from
+   * @param keyPrefix - Optional prefix for all source IDs (e.g., "subdir/")
+   * @returns Number of vectors merged
+   */
+  mergeFrom(other: VectorDatabase, keyPrefix?: string | undefined | null): number
+  /**
+   * List all vector IDs (without loading vector data).
+   *
+   * Efficient way to get all IDs for iteration, export, or debugging.
+   * @returns Array of all vector IDs in the database
+   */
+  ids(): Array<string>
+  /**
+   * Get all items as array of {id, vector, metadata}.
+   *
+   * Returns all vectors with their IDs and metadata.
+   * For large datasets, consider using ids() and get() in batches.
+   */
+  items(): Array<GetResult>
+  /**
+   * Check if an ID exists in the database.
+   *
+   * @param id - Vector ID to check
+   * @returns true if ID exists and is not deleted
+   */
+  exists(id: string): boolean
+  /**
+   * Get multiple vectors by ID.
+   *
+   * Batch version of get(). More efficient than calling get() in a loop.
+   *
+   * @param ids - Array of vector IDs to retrieve
+   * @returns Array of results in same order as input, null for missing IDs
+   */
+  getBatch(ids: Array<string>): Array<GetResult | undefined | null>
   /**
    * Check if text search is enabled.
    *
@@ -203,7 +243,7 @@ export declare class VectorDatabase {
    * });
    * ```
    */
-  searchHybrid(queryVector: Array<number> | Float32Array, queryText: string, k: number, options?: { filter?: Record<string, unknown>; alpha?: number; rrfK?: number; subscores?: boolean } | undefined): Array<HybridSearchResult>
+  searchHybrid(queryVector: Array<number> | Float32Array | string, queryText: string | undefined | null, k: number, options?: { filter?: Record<string, unknown>; alpha?: number; rrfK?: number; subscores?: boolean } | undefined): Promise<Array<HybridSearchResult>>
   /**
    * Flush pending changes to disk.
    *
@@ -211,100 +251,110 @@ export declare class VectorDatabase {
    */
   flush(): void
   /**
-   * Compact the database by removing deleted records and reclaiming space.
-   *
-   * This operation removes tombstoned records, reassigns indices to be
-   * contiguous, and rebuilds the search index. Call after bulk deletes
-   * to reclaim memory and improve search performance.
+   * Search for k nearest neighbors.
    *
-   * @returns Number of deleted records that were removed
+   * @param query - Query vector (number[] or Float32Array)
+   * @param k - Number of results to return
+   * @param options - Optional search options: {filter?, ef?, maxDistance?}
+   * @returns Array of {id, distance, score, metadata}
    *
    * @example
-   * ```typescript
-   * // After bulk delete
-   * db.delete(staleIds);
-   * const removed = db.compact();
-   * console.log(`Removed ${removed} deleted records`);
+   * ```javascript
+   * // Basic search
+   * db.search([1, 0, 0, 0], 10);
+   *
+   * // With options
+   * db.search([1, 0, 0, 0], 10, { filter: { category: "A" }, ef: 200 });
+   * db.search([1, 0, 0, 0], 10, { maxDistance: 0.5 });
    * ```
    */
-  compact(): number
+  search(query: Array<number> | Float32Array | string, k: number, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): Promise<Array<SearchResult>>
   /**
-   * Close the database and release file locks.
+   * Search multi-vector store with query tokens.
    *
-   * After calling close(), the database is no longer usable.
-   * Any subsequent operations will fail or return empty results.
+   * Internal method used by unified search() for multi-vector stores.
    *
-   * This is useful when you need to reopen the same database path
-   * in the same process, since JavaScript doesn't have deterministic
-   * object destruction like Python's `del`.
+   * @param query - Query tokens (number[][] or Float32Array[])
+   * @param k - Number of results to return
+   * @param rerank - Enable MaxSim reranking for better quality (default: true)
+   * @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
+   * @returns Array of {id, distance, metadata}
    */
-  close(): void
+  searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
   /**
-   * Optimize index for cache-efficient search.
-   *
-   * Reorders nodes for better memory locality, improving search performance by 6-40%.
-   * Call after inserting a large batch of vectors.
+   * Batch search with parallel execution (async).
    *
-   * @returns Number of nodes reordered
+   * Runs searches in parallel using rayon on a blocking thread pool,
+   * keeping the Node.js event loop free.
    */
-  optimize(): number
-  /** Merge another database into this one. */
-  mergeFrom(other: VectorDatabase): number
+  searchBatch(queries: Array<Array<number> | Float32Array>, k: number, ef?: number | undefined | null): Promise<Array<Array<SearchResult>>>
   /**
-   * List all vector IDs (without loading vector data).
+   * Enable sparse vector indexing for SPLADE-style retrieval.
    *
-   * Efficient way to get all IDs for iteration, export, or debugging.
-   * @returns Array of all vector IDs in the database
+   * Called automatically by setSparse() and setHybridSparse().
+   * Call explicitly before sparseSearch() on an empty index.
    */
-  ids(): Array<string>
+  enableSparse(): void
+  /** Check if sparse indexing is enabled. */
+  get hasSparse(): boolean
   /**
-   * Get all items as array of {id, vector, metadata}.
+   * Insert or update a sparse vector.
    *
-   * Returns all vectors with their IDs and metadata.
-   * For large datasets, consider using ids() and get() in batches.
-   */
-  items(): Array<GetResult>
-  /**
-   * Check if an ID exists in the database.
+   * @param id - Unique identifier
+   * @param sparse - Sparse vector as {indices: number[], values: number[]} or {dim: weight}
+   * @param metadata - Optional metadata
    *
-   * @param id - Vector ID to check
-   * @returns true if ID exists and is not deleted
+   * @example
+   * ```javascript
+   * db.setSparse("doc1", {indices: [10, 42], values: [0.5, 1.2]}, {title: "Hello"});
+   * db.setSparse("doc2", {"10": 0.5, "42": 1.2}, {title: "World"});
+   * ```
    */
-  exists(id: string): boolean
+  setSparse(id: string, sparse: { indices: number[]; values: number[] } | Record<string, number>, metadata?: Record<string, unknown> | undefined): void
   /**
-   * Alias for exists() - check if an ID exists in the database.
+   * Insert or update both dense and sparse vectors together.
    *
-   * @param id - Vector ID to check
-   * @returns true if ID exists and is not deleted
+   * @param id - Unique identifier
+   * @param vector - Dense vector
+   * @param sparse - Sparse vector
+   * @param metadata - Optional metadata
    */
-  has(id: string): boolean
+  setHybridSparse(id: string, vector: Array<number> | Float32Array, sparse: { indices: number[]; values: number[] } | Record<string, number>, metadata?: Record<string, unknown> | undefined): void
   /**
-   * Search for the single nearest neighbor.
-   *
-   * Convenience method that returns the top result or null if no matches.
+   * Search sparse vectors by dot product similarity.
    *
-   * @param query - Query vector (number[] or Float32Array)
-   * @param options - Optional search options: {filter?, ef?, maxDistance?}
-   * @returns Single result or null
+   * @param query - Sparse query vector
+   * @param k - Number of results
+   * @param options - Optional: {filter?}
+   * @returns Array of {id, score, metadata} sorted by score descending
    *
    * @example
    * ```javascript
-   * const nearest = db.searchOne([1, 0, 0, 0]);
-   * if (nearest) {
-   *   console.log(`Found: ${nearest.id} at distance ${nearest.distance}`);
-   * }
+   * const results = db.sparseSearch({indices: [10, 42], values: [1.0, 0.5]}, 5);
+   * const results = db.sparseSearch({"10": 1.0, "42": 0.5}, 5);
    * ```
    */
-  searchOne(query: Array<number> | Float32Array, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): SearchResult | null
+  sparseSearch(query: { indices: number[]; values: number[] } | Record<string, number>, k: number, options?: { filter?: Record<string, unknown> } | undefined): Array<SparseSearchResult>
   /**
-   * Get multiple vectors by ID.
+   * Hybrid dense + sparse search with Reciprocal Rank Fusion (RRF).
    *
-   * Batch version of get(). More efficient than calling get() in a loop.
+   * @param queryVector - Dense query vector
+   * @param sparseQuery - Sparse query vector
+   * @param k - Number of results
+   * @param options - Optional: {alpha?, filter?}
+   * @returns Array of {id, score, metadata}
    *
-   * @param ids - Array of vector IDs to retrieve
-   * @returns Array of results in same order as input, null for missing IDs
+   * @example
+   * ```javascript
+   * const results = db.hybridSparseSearch(
+   *   [1, 0, 0],
+   *   {indices: [10, 42], values: [1.0, 0.5]},
+   *   10,
+   *   { alpha: 0.5 }
+   * );
+   * ```
    */
-  getBatch(ids: Array<string>): Array<GetResult | undefined | null>
+  hybridSparseSearch(queryVector: Array<number> | Float32Array, sparseQuery: { indices: number[]; values: number[] } | Record<string, number>, k: number, options?: { alpha?: number; filter?: Record<string, unknown> } | undefined): Array<SparseSearchResult>
 }
 export interface GetResult {
@@ -323,14 +373,6 @@ export interface HybridSearchResult {
   semanticScore?: number
 }
-export interface MultiVectorItem {
-  id: string
-  /** Multi-vector data as array of Float32Arrays */
-  vectors: Float32Array[]
-  /** Optional metadata */
-  metadata?: Record<string, unknown> | undefined
-}
 /**
  * Open or create a vector database.
  *
@@ -357,16 +399,9 @@ export interface MultiVectorItem {
  *   quantization: true  // or "sq8"
  * });
  *
- * // Quantization with custom rescore settings
- * const db = omendb.open("./mydb", {
- *   dimensions: 128,
- *   quantization: true,
- *   rescore: false,    // Disable rescore for max speed
- *   oversample: 5.0    // Or increase oversample for better recall
- * });
  * ```
  */
-export declare function open(path: string, options?: OpenOptions | undefined | null): VectorDatabase
+export declare function open(path: string, options?: OpenOptions | undefined | null, embeddingFn?: ((texts: string[]) => Float32Array[]) | undefined): VectorDatabase
 /**
  * Configuration options for opening a vector database.
@@ -376,9 +411,7 @@ export declare function open(path: string, options?: OpenOptions | undefined | n
  * - m: 16 (HNSW neighbors per node, higher = better recall, more memory)
  * - efConstruction: 100 (build quality, higher = better graph, slower build)
  * - efSearch: 100 (search quality, higher = better recall, slower search)
- * - quantization: null (true/"sq8" for 4x compression, ~99% recall)
- * - rescore: true when quantization enabled (rerank candidates with exact distance)
- * - oversample: 3.0 (fetch k*oversample candidates when rescoring)
+ * - quantization: null (true/"sq8" for 4x compression)
  * - metric: "l2" (distance metric: "l2", "euclidean", "cosine", "dot", "ip")
  */
 export interface OpenOptions {
@@ -396,16 +429,6 @@ export interface OpenOptions {
    * - false/null: Full precision (no quantization)
    */
   quantization?: boolean | string | number | null | undefined
-  /**
-   * Rescore candidates with exact distance (default: true when quantization enabled)
-   * Set to false for maximum speed at the cost of ~20% recall
-   */
-  rescore?: boolean
-  /**
-   * Oversampling factor for rescoring (default: 3.0)
-   * Fetches k*oversample candidates then reranks to return top k
-   */
-  oversample?: number
   /** Distance metric: "l2"/"euclidean" (default), "cosine", "dot"/"ip" */
   metric?: string
   /**
@@ -416,6 +439,10 @@ export interface OpenOptions {
    * - false/null: Disabled (default, single-vector mode)
    */
   multiVector?: boolean | { repetitions?: number; partitionBits?: number; seed?: number; dProj?: number | null } | null | undefined
+  /** SQ8 refiner: rescore with full precision (default: true when quantized) */
+  rescore?: boolean
+  /** Candidate multiplier for rescoring (default: 3.0) */
+  oversample?: number
 }
 export interface SearchResult {
@@ -437,6 +464,16 @@ export interface SetItem {
   metadata?: Record<string, unknown> | undefined
   /** Optional text for hybrid search (auto-enables text search, stored in metadata.text) */
   text?: string
+  /** Optional document for auto-embedding via embeddingFn */
+  document?: string
+}
+/** Sparse search result returned from sparseSearch / hybridSparseSearch. */
+export interface SparseSearchResult {
+  id: string
+  /** Dot product score (higher = more similar) */
+  score: number
+  metadata: Record<string, unknown>
 }
 export interface StatsResult {
@@ -450,20 +487,3 @@ export interface TextSearchResult {
   score: number
   metadata: Record<string, unknown>
 }
-export interface VectorItem {
-  id: string
-  /** Vector data as Float32Array */
-  vector: Float32Array
-  /** Optional metadata */
-  metadata?: Record<string, unknown> | undefined
-  /** Optional document text (stored in metadata.document) */
-  document?: string
-}
-export interface VectorItemWithText {
-  id: string
-  vector: Float32Array
-  text: string
-  metadata?: Record<string, unknown> | undefined
-}

package/index.js CHANGED Viewed

@@ -111,6 +111,10 @@ function toFloat32Array(arr) {
 // Convert VectorItem to use Float32Array
 function convertVectorItem(item) {
+	// Items with document field are handled by native embedding
+	if (item.document !== undefined && item.document !== null) {
+		return item;
+	}
 	if (item.vector === undefined || item.vector === null) {
 		if (Array.isArray(item.vectors)) {
 			throw new Error(
@@ -141,11 +145,6 @@ function convertMultiVectorItem(item) {
 	};
 }
-// Check if items contain multi-vector data (vectors field must be an array)
-function isMultiVectorItem(item) {
-	return Array.isArray(item.vectors);
-}
 // Wrap VectorDatabase to handle array conversion
 const NativeVectorDatabase = nativeBinding.VectorDatabase;
@@ -201,17 +200,6 @@ class VectorDatabase {
 		}
 	}
-	/**
-	 * Search for the single nearest neighbor.
-	 *
-	 * @param {number[]|Float32Array} query - Query vector
-	 * @param {object} [options] - Search options: {filter?, ef?, maxDistance?}
-	 * @returns {{id: string, distance: number, score: number, metadata: object}|null}
-	 */
-	searchOne(query, options) {
-		return this._native.searchOne(query, options);
-	}
 	searchBatch(queries, k, ef) {
 		return this._native.searchBatch(queries, k, ef);
 	}
@@ -254,6 +242,10 @@ class VectorDatabase {
 		return this._native.isMultiVector;
 	}
+	get hasEmbeddingFn() {
+		return this._native.hasEmbeddingFn;
+	}
 	isEmpty() {
 		return this._native.isEmpty();
 	}
@@ -270,8 +262,8 @@ class VectorDatabase {
 		this._native.efSearch = value;
 	}
-	collection(name) {
-		return new VectorDatabase(this._native.collection(name));
+	collection(name, embeddingFn) {
+		return new VectorDatabase(this._native.collection(name, embeddingFn));
 	}
 	collections() {
@@ -338,16 +330,6 @@ class VectorDatabase {
 		return this._native.exists(id);
 	}
-	/**
-	 * Alias for exists() - check if an ID exists in the database.
-	 *
-	 * @param {string} id - Vector ID to check
-	 * @returns {boolean}
-	 */
-	has(id) {
-		return this._native.has(id);
-	}
 	getBatch(ids) {
 		return this._native.getBatch(ids);
 	}
@@ -355,10 +337,34 @@ class VectorDatabase {
 	compact() {
 		return this._native.compact();
 	}
+	enableSparse() {
+		return this._native.enableSparse();
+	}
+	get hasSparse() {
+		return this._native.hasSparse;
+	}
+	setSparse(id, sparse, metadata) {
+		return this._native.setSparse(id, sparse, metadata);
+	}
+	setHybridSparse(id, vector, sparse, metadata) {
+		return this._native.setHybridSparse(id, vector, sparse, metadata);
+	}
+	sparseSearch(query, k, options) {
+		return this._native.sparseSearch(query, k, options);
+	}
+	hybridSparseSearch(queryVector, sparseQuery, k, options) {
+		return this._native.hybridSparseSearch(queryVector, sparseQuery, k, options);
+	}
 }
-function open(path, options) {
-	return new VectorDatabase(nativeBinding.open(path, options));
+function open(path, options, embeddingFn) {
+	return new VectorDatabase(nativeBinding.open(path, options, embeddingFn));
 }
 module.exports.VectorDatabase = VectorDatabase;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@omendb/omendb",
-  "version": "0.0.26",
+  "version": "0.0.28",
   "description": "Fast embedded vector database with HNSW + ACORN-1 filtered search",
   "main": "index.js",
   "types": "index.d.ts",
@@ -50,10 +50,10 @@
     "omendb.node"
   ],
   "optionalDependencies": {
-    "@omendb/omendb-darwin-x64": "0.0.26",
-    "@omendb/omendb-darwin-arm64": "0.0.26",
-    "@omendb/omendb-linux-x64-gnu": "0.0.26",
-    "@omendb/omendb-linux-arm64-gnu": "0.0.26",
-    "@omendb/omendb-win32-x64-msvc": "0.0.26"
+    "@omendb/omendb-darwin-x64": "0.0.28",
+    "@omendb/omendb-darwin-arm64": "0.0.28",
+    "@omendb/omendb-linux-x64-gnu": "0.0.28",
+    "@omendb/omendb-linux-arm64-gnu": "0.0.28",
+    "@omendb/omendb-win32-x64-msvc": "0.0.28"
   }
 }