npm - @omendb/omendb - Versions diffs - 0.0.25 → 0.0.27 - Mend

@omendb/omendb 0.0.25 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@ Fast embedded vector database with HNSW indexing for Node.js and Bun.
 ## Installation
 ```bash
-npm install omendb
+npm install @omendb/omendb
 ```
 ## Quick Start
@@ -191,28 +191,31 @@ db.setWithText([
 ]);
 ```
-#### `db.textSearch(query, k)`
+#### `db.searchText(query, k)`
 BM25 text-only search.
 ```typescript
-const results = db.textSearch("machine learning", 10);
+const results = db.searchText("machine learning", 10);
 // [{ id, score, metadata }, ...]
 ```
-#### `db.hybridSearch(options)`
+#### `db.searchHybrid(queryVector, queryText, k, options?)`
-Combined vector + text search.
+Combined vector + text search using Reciprocal Rank Fusion.
 ```typescript
-const results = db.hybridSearch({
-  vector: queryVector,
-  text: "machine learning",
-  k: 10,
+// Basic
+const results = db.searchHybrid(queryVector, "machine learning", 10);
+// With options
+const results = db.searchHybrid(queryVector, "machine learning", 10, {
   alpha: 0.7, // 0=text only, 1=vector only (default: 0.5)
+  rrfK: 60, // RRF constant (default: 60)
+  filter: { category: "ml" },
   subscores: true, // Include separate scores
 });
-// [{ id, score, metadata, keyword_score?, semantic_score? }, ...]
+// [{ id, score, metadata, keywordScore?, semanticScore? }, ...]
 ```
 ### Collections

package/index.d.ts CHANGED Viewed

@@ -1,6 +1,17 @@
 /* auto-generated by NAPI-RS */
 /* eslint-disable */
 export declare class VectorDatabase {
+  /**
+   * Get or create a named collection.
+   *
+   * Collection handles share state - changes made through one handle
+   * are immediately visible through another (no flush required).
+   */
+  collection(name: string, embeddingFn?: ((texts: string[]) => Float32Array[]) | undefined): VectorDatabase
+  /** List all collections. */
+  collections(): Array<string>
+  /** Delete a collection. */
+  deleteCollection(name: string): void
   /**
    * Insert or update vectors.
    *
@@ -8,48 +19,33 @@ export declare class VectorDatabase {
    * - Single-vector: items have `vector` field
    * - Multi-vector: items have `vectors` field (array of vectors)
    *
-   * @param items - Array of {id, vector, metadata?} or {id, vectors, metadata?}
-   * @returns Array of internal indices
-   */
-  set(items: Array<SetItem>): Array<number>
-  /**
-   * Search for k nearest neighbors.
-   *
-   * @param query - Query vector (number[] or Float32Array)
-   * @param k - Number of results to return
-   * @param ef - Optional search width override
-   * @param filter - Optional metadata filter (e.g., {category: "foo"} or {price: {$gt: 10}})
-   * @param maxDistance - Optional max distance threshold (filter out distant results)
-   * @returns Array of {id, distance, metadata}
-   */
-  search(query: Array<number> | Float32Array, k: number, ef?: number | undefined | null, filter?: Record<string, unknown> | undefined, maxDistance?: number | undefined | null): Array<SearchResult>
-  /**
-   * Search multi-vector store with query tokens.
-   *
-   * Internal method used by unified search() for multi-vector stores.
-   *
-   * @param query - Query tokens (number[][] or Float32Array[])
-   * @param k - Number of results to return
-   * @param rerank - Enable MaxSim reranking for better quality (default: true)
-   * @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
-   * @returns Array of {id, distance, metadata}
-   */
-  searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
-  /**
-   * Batch search with parallel execution (async).
+   * When any item includes a `text` field, text search is automatically enabled.
+   * This allows immediate use of searchHybrid() without calling enableTextSearch().
    *
-   * Runs searches in parallel using rayon on a blocking thread pool,
-   * keeping the Node.js event loop free.
+   * @param items - Array of {id, vector, metadata?, text?} or {id, vectors, metadata?}
+   * @returns Number of vectors inserted/updated
    */
-  searchBatch(queries: Array<Array<number> | Float32Array>, k: number, ef?: number | undefined | null): Promise<Array<Array<SearchResult>>>
+  set(items: Array<SetItem>): Promise<number>
   /** Get a vector by ID. */
   get(id: string): GetResult | null
   /**
    * Delete vectors by ID.
    *
+   * Accepts either a single ID string or an array of IDs.
+   *
+   * @param ids - Single ID string or array of IDs to delete
    * @returns Number of vectors deleted
+   *
+   * @example
+   * ```javascript
+   * // Delete single
+   * db.delete("doc1");
+   *
+   * // Delete multiple
+   * db.delete(["doc1", "doc2", "doc3"]);
+   * ```
    */
-  delete(ids: Array<string>): number
+  delete(ids: string | Array<string>): number
   /**
    * Delete vectors matching a metadata filter.
    *
@@ -94,14 +90,36 @@ export declare class VectorDatabase {
    * ```
    */
   count(filter?: Record<string, unknown> | undefined): number
-  /** Update a vector's data and/or metadata. */
-  update(id: string, vector: Array<number> | Float32Array, metadata?: Record<string, unknown> | undefined): void
+  /**
+   * Update a vector's data, metadata, and/or text.
+   *
+   * @param id - Vector ID to update
+   * @param options - Update options: {vector?, metadata?, text?}
+   *
+   * @example
+   * ```javascript
+   * // Update vector only
+   * db.update("doc1", { vector: [1, 0, 0, 0] });
+   *
+   * // Update metadata only
+   * db.update("doc1", { metadata: { status: "active" } });
+   *
+   * // Update text (re-indexed for BM25 search)
+   * db.update("doc1", { text: "Updated content for search" });
+   *
+   * // Update multiple fields
+   * db.update("doc1", { vector: [...], metadata: {...}, text: "..." });
+   * ```
+   */
+  update(id: string, options: { vector?: number[] | Float32Array; metadata?: Record<string, unknown>; text?: string }): void
   /** Get number of vectors in database. */
   get length(): number
   /** Get vector dimensions of this database. */
   get dimensions(): number
   /** Check if this is a multi-vector store. */
   get isMultiVector(): boolean
+  /** Check if an embedding function is configured. */
+  get hasEmbeddingFn(): boolean
   /** Check if database is empty. */
   isEmpty(): boolean
   /** Get database statistics. */
@@ -110,61 +128,6 @@ export declare class VectorDatabase {
   get efSearch(): number
   /** Set ef_search value. */
   set efSearch(efSearch: number)
-  /**
-   * Get or create a named collection.
-   *
-   * Collection handles share state - changes made through one handle
-   * are immediately visible through another (no flush required).
-   */
-  collection(name: string): VectorDatabase
-  /** List all collections. */
-  collections(): Array<string>
-  /** Delete a collection. */
-  deleteCollection(name: string): void
-  /**
-   * Enable text search for hybrid (vector + text) search.
-   *
-   * Must be called before using setWithText() or hybridSearch().
-   */
-  enableTextSearch(): void
-  /** Check if text search is enabled. */
-  get hasTextSearch(): boolean
-  /**
-   * Set vectors with associated text for hybrid search.
-   *
-   * @param items - Array of {id, vector, text, metadata?}
-   * @returns Array of internal indices
-   */
-  setWithText(items: Array<VectorItemWithText>): Array<number>
-  /**
-   * Search using text only (BM25 scoring).
-   *
-   * @param query - Text query
-   * @param k - Number of results
-   * @returns Array of {id, score, metadata}
-   */
-  textSearch(query: string, k: number): Array<TextSearchResult>
-  /**
-   * Hybrid search combining vector similarity and text relevance.
-   *
-   * Uses Reciprocal Rank Fusion (RRF) to combine HNSW and BM25 results.
-   *
-   * @param queryVector - Query embedding
-   * @param queryText - Text query for BM25
-   * @param k - Number of results
-   * @param filter - Optional metadata filter
-   * @param alpha - Weight for vector vs text (0.0=text only, 1.0=vector only, default=0.5)
-   * @param rrfK - RRF constant (default=60, higher reduces rank influence)
-   * @param subscores - Return separate keyword_score and semantic_score (default: false)
-   * @returns Array of {id, score, metadata, keyword_score?, semantic_score?}
-   */
-  hybridSearch(queryVector: Array<number> | Float32Array, queryText: string, k: number, filter?: Record<string, unknown> | undefined, alpha?: number | undefined | null, rrfK?: number | undefined | null, subscores?: boolean | undefined | null): Array<HybridSearchResult>
-  /**
-   * Flush pending changes to disk.
-   *
-   * For hybrid search, this commits text index changes.
-   */
-  flush(): void
   /**
    * Compact the database by removing deleted records and reclaiming space.
    *
@@ -235,6 +198,90 @@ export declare class VectorDatabase {
    * @returns Array of results in same order as input, null for missing IDs
    */
   getBatch(ids: Array<string>): Array<GetResult | undefined | null>
+  /**
+   * Check if text search is enabled.
+   *
+   * Text search is automatically enabled when using set() with text field.
+   */
+  get hasTextSearch(): boolean
+  /**
+   * Search using text only (BM25 scoring).
+   *
+   * @param query - Text query
+   * @param k - Number of results
+   * @returns Array of {id, score, metadata}
+   */
+  searchText(query: string, k: number): Array<TextSearchResult>
+  /**
+   * Hybrid search combining vector similarity and text relevance.
+   *
+   * Uses Reciprocal Rank Fusion (RRF) to combine HNSW and BM25 results.
+   *
+   * @param queryVector - Query embedding
+   * @param queryText - Text query for BM25
+   * @param k - Number of results
+   * @param options - Optional: {filter?, alpha?, rrfK?, subscores?}
+   * @returns Array of {id, score, metadata, keywordScore?, semanticScore?}
+   *
+   * @example
+   * ```javascript
+   * // Basic hybrid search
+   * db.searchHybrid([1, 0, 0, 0], "machine learning", 10);
+   *
+   * // With options
+   * db.searchHybrid([1, 0, 0, 0], "query", 10, {
+   *   filter: { type: "ml" },
+   *   alpha: 0.7,
+   *   rrfK: 60,
+   *   subscores: true
+   * });
+   * ```
+   */
+  searchHybrid(queryVector: Array<number> | Float32Array | string, queryText: string | undefined | null, k: number, options?: { filter?: Record<string, unknown>; alpha?: number; rrfK?: number; subscores?: boolean } | undefined): Promise<Array<HybridSearchResult>>
+  /**
+   * Flush pending changes to disk.
+   *
+   * For hybrid search, this commits text index changes.
+   */
+  flush(): void
+  /**
+   * Search for k nearest neighbors.
+   *
+   * @param query - Query vector (number[] or Float32Array)
+   * @param k - Number of results to return
+   * @param options - Optional search options: {filter?, ef?, maxDistance?}
+   * @returns Array of {id, distance, score, metadata}
+   *
+   * @example
+   * ```javascript
+   * // Basic search
+   * db.search([1, 0, 0, 0], 10);
+   *
+   * // With options
+   * db.search([1, 0, 0, 0], 10, { filter: { category: "A" }, ef: 200 });
+   * db.search([1, 0, 0, 0], 10, { maxDistance: 0.5 });
+   * ```
+   */
+  search(query: Array<number> | Float32Array | string, k: number, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): Promise<Array<SearchResult>>
+  /**
+   * Search multi-vector store with query tokens.
+   *
+   * Internal method used by unified search() for multi-vector stores.
+   *
+   * @param query - Query tokens (number[][] or Float32Array[])
+   * @param k - Number of results to return
+   * @param rerank - Enable MaxSim reranking for better quality (default: true)
+   * @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
+   * @returns Array of {id, distance, metadata}
+   */
+  searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
+  /**
+   * Batch search with parallel execution (async).
+   *
+   * Runs searches in parallel using rayon on a blocking thread pool,
+   * keeping the Node.js event loop free.
+   */
+  searchBatch(queries: Array<Array<number> | Float32Array>, k: number, ef?: number | undefined | null): Promise<Array<Array<SearchResult>>>
 }
 export interface GetResult {
@@ -253,14 +300,6 @@ export interface HybridSearchResult {
   semanticScore?: number
 }
-export interface MultiVectorItem {
-  id: string
-  /** Multi-vector data as array of Float32Arrays */
-  vectors: Float32Array[]
-  /** Optional metadata */
-  metadata?: Record<string, unknown> | undefined
-}
 /**
  * Open or create a vector database.
  *
@@ -287,16 +326,9 @@ export interface MultiVectorItem {
  *   quantization: true  // or "sq8"
  * });
  *
- * // Quantization with custom rescore settings
- * const db = omendb.open("./mydb", {
- *   dimensions: 128,
- *   quantization: true,
- *   rescore: false,    // Disable rescore for max speed
- *   oversample: 5.0    // Or increase oversample for better recall
- * });
  * ```
  */
-export declare function open(path: string, options?: OpenOptions | undefined | null): VectorDatabase
+export declare function open(path: string, options?: OpenOptions | undefined | null, embeddingFn?: ((texts: string[]) => Float32Array[]) | undefined): VectorDatabase
 /**
  * Configuration options for opening a vector database.
@@ -306,9 +338,7 @@ export declare function open(path: string, options?: OpenOptions | undefined | n
  * - m: 16 (HNSW neighbors per node, higher = better recall, more memory)
  * - efConstruction: 100 (build quality, higher = better graph, slower build)
  * - efSearch: 100 (search quality, higher = better recall, slower search)
- * - quantization: null (true/"sq8" for 4x compression, ~99% recall)
- * - rescore: true when quantization enabled (rerank candidates with exact distance)
- * - oversample: 3.0 (fetch k*oversample candidates when rescoring)
+ * - quantization: null (true/"sq8" for 4x compression)
  * - metric: "l2" (distance metric: "l2", "euclidean", "cosine", "dot", "ip")
  */
 export interface OpenOptions {
@@ -326,16 +356,6 @@ export interface OpenOptions {
    * - false/null: Full precision (no quantization)
    */
   quantization?: boolean | string | number | null | undefined
-  /**
-   * Rescore candidates with exact distance (default: true when quantization enabled)
-   * Set to false for maximum speed at the cost of ~20% recall
-   */
-  rescore?: boolean
-  /**
-   * Oversampling factor for rescoring (default: 3.0)
-   * Fetches k*oversample candidates then reranks to return top k
-   */
-  oversample?: number
   /** Distance metric: "l2"/"euclidean" (default), "cosine", "dot"/"ip" */
   metric?: string
   /**
@@ -351,6 +371,8 @@ export interface OpenOptions {
 export interface SearchResult {
   id: string
   distance: number
+  /** Normalized similarity score (0-1, higher = more similar) */
+  score: number
   /** Metadata as JSON (using serde-json feature) */
   metadata: Record<string, unknown>
 }
@@ -363,7 +385,9 @@ export interface SetItem {
   vectors?: Float32Array[] | undefined
   /** Optional metadata */
   metadata?: Record<string, unknown> | undefined
-  /** Optional document text (stored in metadata.document) */
+  /** Optional text for hybrid search (auto-enables text search, stored in metadata.text) */
+  text?: string
+  /** Optional document for auto-embedding via embeddingFn */
   document?: string
 }
@@ -378,20 +402,3 @@ export interface TextSearchResult {
   score: number
   metadata: Record<string, unknown>
 }
-export interface VectorItem {
-  id: string
-  /** Vector data as Float32Array */
-  vector: Float32Array
-  /** Optional metadata */
-  metadata?: Record<string, unknown> | undefined
-  /** Optional document text (stored in metadata.document) */
-  document?: string
-}
-export interface VectorItemWithText {
-  id: string
-  vector: Float32Array
-  text: string
-  metadata?: Record<string, unknown> | undefined
-}

package/index.js CHANGED Viewed

@@ -111,6 +111,10 @@ function toFloat32Array(arr) {
 // Convert VectorItem to use Float32Array
 function convertVectorItem(item) {
+	// Items with document field are handled by native embedding
+	if (item.document !== undefined && item.document !== null) {
+		return item;
+	}
 	if (item.vector === undefined || item.vector === null) {
 		if (Array.isArray(item.vectors)) {
 			throw new Error(
@@ -141,11 +145,6 @@ function convertMultiVectorItem(item) {
 	};
 }
-// Check if items contain multi-vector data (vectors field must be an array)
-function isMultiVectorItem(item) {
-	return Array.isArray(item.vectors);
-}
 // Wrap VectorDatabase to handle array conversion
 const NativeVectorDatabase = nativeBinding.VectorDatabase;
@@ -161,15 +160,17 @@ class VectorDatabase {
 	 * - Single-vector: items have `vector` field
 	 * - Multi-vector: items have `vectors` field (array of vectors)
 	 *
-	 * @param {Array<{id: string, vector?: Float32Array|number[], vectors?: Float32Array[]|number[][], metadata?: object}>} items
-	 * @returns {number[]} Array of internal indices
+	 * When any item includes a `text` field, text search is automatically enabled.
+	 *
+	 * @param {Array<{id: string, vector?: Float32Array|number[], vectors?: Float32Array[]|number[][], metadata?: object, text?: string}>} items
+	 * @returns {number} Number of vectors inserted/updated
 	 */
 	set(items) {
 		if (!Array.isArray(items)) {
 			throw new Error("set() requires an array of items");
 		}
 		if (items.length === 0) {
-			return [];
+			return 0;
 		}
 		// Unified set() handles both single and multi-vector via native set()
 		return this._native.set(items.map(this._native.isMultiVector ? convertMultiVectorItem : convertVectorItem));
@@ -184,13 +185,8 @@ class VectorDatabase {
 	 *
 	 * @param {number[]|Float32Array|number[][]|Float32Array[]} query - Query vector(s)
 	 * @param {number} k - Number of results to return
-	 * @param {object} [options] - Search options
-	 * @param {number} [options.ef] - Search width override (single-vector only)
-	 * @param {object} [options.filter] - Metadata filter (single-vector only)
-	 * @param {number} [options.maxDistance] - Max distance threshold (single-vector only)
-	 * @param {boolean} [options.rerank] - Enable MaxSim reranking (multi-vector, default: true)
-	 * @param {number} [options.rerankFactor] - Rerank candidate multiplier (multi-vector, default: 4)
-	 * @returns {Array<{id: string, distance: number, metadata: object}>}
+	 * @param {object} [options] - Search options: {filter?, ef?, maxDistance?}
+	 * @returns {Array<{id: string, distance: number, score: number, metadata: object}>}
 	 */
 	search(query, k, options) {
 		if (this._native.isMultiVector) {
@@ -199,23 +195,8 @@ class VectorDatabase {
 			const rerankFactor = options?.rerankFactor;
 			return this._native.searchMulti(query, k, rerank, rerankFactor);
 		} else {
-			// Single-vector store - support both old positional args and new options object
-			if (typeof options === "object" && options !== null && !Array.isArray(options)) {
-				// New options object style
-				return this._native.search(
-					query,
-					k,
-					options.ef,
-					options.filter,
-					options.maxDistance,
-				);
-			} else {
-				// Legacy positional args: search(query, k, ef, filter, maxDistance)
-				const ef = options; // 3rd arg was ef in old API
-				const filter = arguments[3];
-				const maxDistance = arguments[4];
-				return this._native.search(query, k, ef, filter, maxDistance);
-			}
+			// Single-vector store - pass options object directly to native
+			return this._native.search(query, k, options);
 		}
 	}
@@ -239,8 +220,14 @@ class VectorDatabase {
 		return this._native.count(filter);
 	}
-	update(id, vector, metadata) {
-		return this._native.update(id, vector, metadata);
+	/**
+	 * Update a vector's data, metadata, and/or text.
+	 *
+	 * @param {string} id - Vector ID to update
+	 * @param {object} options - Update options: {vector?, metadata?, text?}
+	 */
+	update(id, options) {
+		return this._native.update(id, options);
 	}
 	get length() {
@@ -255,6 +242,10 @@ class VectorDatabase {
 		return this._native.isMultiVector;
 	}
+	get hasEmbeddingFn() {
+		return this._native.hasEmbeddingFn;
+	}
 	isEmpty() {
 		return this._native.isEmpty();
 	}
@@ -271,8 +262,8 @@ class VectorDatabase {
 		this._native.efSearch = value;
 	}
-	collection(name) {
-		return new VectorDatabase(this._native.collection(name));
+	collection(name, embeddingFn) {
+		return new VectorDatabase(this._native.collection(name, embeddingFn));
 	}
 	collections() {
@@ -283,32 +274,32 @@ class VectorDatabase {
 		return this._native.deleteCollection(name);
 	}
-	enableTextSearch() {
-		return this._native.enableTextSearch();
-	}
 	get hasTextSearch() {
 		return this._native.hasTextSearch;
 	}
-	setWithText(items) {
-		return this._native.setWithText(items.map(convertVectorItem));
-	}
-	textSearch(query, k) {
-		return this._native.textSearch(query, k);
+	/**
+	 * Search using text only (BM25 scoring).
+	 *
+	 * @param {string} query - Text query
+	 * @param {number} k - Number of results
+	 * @returns {Array<{id: string, score: number, metadata: object}>}
+	 */
+	searchText(query, k) {
+		return this._native.searchText(query, k);
 	}
-	hybridSearch(queryVector, queryText, k, filter, alpha, rrfK, subscores) {
-		return this._native.hybridSearch(
-			queryVector,
-			queryText,
-			k,
-			filter,
-			alpha,
-			rrfK,
-			subscores,
-		);
+	/**
+	 * Hybrid search combining vector similarity and text relevance.
+	 *
+	 * @param {number[]|Float32Array} queryVector - Query embedding
+	 * @param {string} queryText - Text query for BM25
+	 * @param {number} k - Number of results
+	 * @param {object} [options] - Options: {filter?, alpha?, rrfK?, subscores?}
+	 * @returns {Array<{id: string, score: number, metadata: object, keywordScore?: number, semanticScore?: number}>}
+	 */
+	searchHybrid(queryVector, queryText, k, options) {
+		return this._native.searchHybrid(queryVector, queryText, k, options);
 	}
 	flush() {
@@ -342,10 +333,14 @@ class VectorDatabase {
 	getBatch(ids) {
 		return this._native.getBatch(ids);
 	}
+	compact() {
+		return this._native.compact();
+	}
 }
-function open(path, options) {
-	return new VectorDatabase(nativeBinding.open(path, options));
+function open(path, options, embeddingFn) {
+	return new VectorDatabase(nativeBinding.open(path, options, embeddingFn));
 }
 module.exports.VectorDatabase = VectorDatabase;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@omendb/omendb",
-  "version": "0.0.25",
+  "version": "0.0.27",
   "description": "Fast embedded vector database with HNSW + ACORN-1 filtered search",
   "main": "index.js",
   "types": "index.d.ts",
@@ -50,10 +50,10 @@
     "omendb.node"
   ],
   "optionalDependencies": {
-    "@omendb/omendb-darwin-x64": "0.0.25",
-    "@omendb/omendb-darwin-arm64": "0.0.25",
-    "@omendb/omendb-linux-x64-gnu": "0.0.25",
-    "@omendb/omendb-linux-arm64-gnu": "0.0.25",
-    "@omendb/omendb-win32-x64-msvc": "0.0.25"
+    "@omendb/omendb-darwin-x64": "0.0.27",
+    "@omendb/omendb-darwin-arm64": "0.0.27",
+    "@omendb/omendb-linux-x64-gnu": "0.0.27",
+    "@omendb/omendb-linux-arm64-gnu": "0.0.27",
+    "@omendb/omendb-win32-x64-msvc": "0.0.27"
   }
 }