npm - @omendb/omendb - Versions diffs - 0.0.26 → 0.0.27 - Mend

@omendb/omendb 0.0.26 → 0.0.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@ Fast embedded vector database with HNSW indexing for Node.js and Bun.
 ## Installation
 ```bash
-npm install omendb
+npm install @omendb/omendb
 ```
 ## Quick Start
@@ -191,28 +191,31 @@ db.setWithText([
 ]);
 ```
-#### `db.textSearch(query, k)`
+#### `db.searchText(query, k)`
 BM25 text-only search.
 ```typescript
-const results = db.textSearch("machine learning", 10);
+const results = db.searchText("machine learning", 10);
 // [{ id, score, metadata }, ...]
 ```
-#### `db.hybridSearch(options)`
+#### `db.searchHybrid(queryVector, queryText, k, options?)`
-Combined vector + text search.
+Combined vector + text search using Reciprocal Rank Fusion.
 ```typescript
-const results = db.hybridSearch({
-  vector: queryVector,
-  text: "machine learning",
-  k: 10,
+// Basic
+const results = db.searchHybrid(queryVector, "machine learning", 10);
+// With options
+const results = db.searchHybrid(queryVector, "machine learning", 10, {
   alpha: 0.7, // 0=text only, 1=vector only (default: 0.5)
+  rrfK: 60, // RRF constant (default: 60)
+  filter: { category: "ml" },
   subscores: true, // Include separate scores
 });
-// [{ id, score, metadata, keyword_score?, semantic_score? }, ...]
+// [{ id, score, metadata, keywordScore?, semanticScore? }, ...]
 ```
 ### Collections

package/index.d.ts CHANGED Viewed

@@ -1,6 +1,17 @@
 /* auto-generated by NAPI-RS */
 /* eslint-disable */
 export declare class VectorDatabase {
+  /**
+   * Get or create a named collection.
+   *
+   * Collection handles share state - changes made through one handle
+   * are immediately visible through another (no flush required).
+   */
+  collection(name: string, embeddingFn?: ((texts: string[]) => Float32Array[]) | undefined): VectorDatabase
+  /** List all collections. */
+  collections(): Array<string>
+  /** Delete a collection. */
+  deleteCollection(name: string): void
   /**
    * Insert or update vectors.
    *
@@ -14,45 +25,7 @@ export declare class VectorDatabase {
    * @param items - Array of {id, vector, metadata?, text?} or {id, vectors, metadata?}
    * @returns Number of vectors inserted/updated
    */
-  set(items: Array<SetItem>): number
-  /**
-   * Search for k nearest neighbors.
-   *
-   * @param query - Query vector (number[] or Float32Array)
-   * @param k - Number of results to return
-   * @param options - Optional search options: {filter?, ef?, maxDistance?}
-   * @returns Array of {id, distance, score, metadata}
-   *
-   * @example
-   * ```javascript
-   * // Basic search
-   * db.search([1, 0, 0, 0], 10);
-   *
-   * // With options
-   * db.search([1, 0, 0, 0], 10, { filter: { category: "A" }, ef: 200 });
-   * db.search([1, 0, 0, 0], 10, { maxDistance: 0.5 });
-   * ```
-   */
-  search(query: Array<number> | Float32Array, k: number, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): Array<SearchResult>
-  /**
-   * Search multi-vector store with query tokens.
-   *
-   * Internal method used by unified search() for multi-vector stores.
-   *
-   * @param query - Query tokens (number[][] or Float32Array[])
-   * @param k - Number of results to return
-   * @param rerank - Enable MaxSim reranking for better quality (default: true)
-   * @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
-   * @returns Array of {id, distance, metadata}
-   */
-  searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
-  /**
-   * Batch search with parallel execution (async).
-   *
-   * Runs searches in parallel using rayon on a blocking thread pool,
-   * keeping the Node.js event loop free.
-   */
-  searchBatch(queries: Array<Array<number> | Float32Array>, k: number, ef?: number | undefined | null): Promise<Array<Array<SearchResult>>>
+  set(items: Array<SetItem>): Promise<number>
   /** Get a vector by ID. */
   get(id: string): GetResult | null
   /**
@@ -145,6 +118,8 @@ export declare class VectorDatabase {
   get dimensions(): number
   /** Check if this is a multi-vector store. */
   get isMultiVector(): boolean
+  /** Check if an embedding function is configured. */
+  get hasEmbeddingFn(): boolean
   /** Check if database is empty. */
   isEmpty(): boolean
   /** Get database statistics. */
@@ -153,63 +128,6 @@ export declare class VectorDatabase {
   get efSearch(): number
   /** Set ef_search value. */
   set efSearch(efSearch: number)
-  /**
-   * Get or create a named collection.
-   *
-   * Collection handles share state - changes made through one handle
-   * are immediately visible through another (no flush required).
-   */
-  collection(name: string): VectorDatabase
-  /** List all collections. */
-  collections(): Array<string>
-  /** Delete a collection. */
-  deleteCollection(name: string): void
-  /**
-   * Check if text search is enabled.
-   *
-   * Text search is automatically enabled when using set() with text field.
-   */
-  get hasTextSearch(): boolean
-  /**
-   * Search using text only (BM25 scoring).
-   *
-   * @param query - Text query
-   * @param k - Number of results
-   * @returns Array of {id, score, metadata}
-   */
-  searchText(query: string, k: number): Array<TextSearchResult>
-  /**
-   * Hybrid search combining vector similarity and text relevance.
-   *
-   * Uses Reciprocal Rank Fusion (RRF) to combine HNSW and BM25 results.
-   *
-   * @param queryVector - Query embedding
-   * @param queryText - Text query for BM25
-   * @param k - Number of results
-   * @param options - Optional: {filter?, alpha?, rrfK?, subscores?}
-   * @returns Array of {id, score, metadata, keywordScore?, semanticScore?}
-   *
-   * @example
-   * ```javascript
-   * // Basic hybrid search
-   * db.searchHybrid([1, 0, 0, 0], "machine learning", 10);
-   *
-   * // With options
-   * db.searchHybrid([1, 0, 0, 0], "query", 10, {
-   *   filter: { type: "ml" },
-   *   alpha: 0.7,
-   *   rrfK: 60,
-   *   subscores: true
-   * });
-   * ```
-   */
-  searchHybrid(queryVector: Array<number> | Float32Array, queryText: string, k: number, options?: { filter?: Record<string, unknown>; alpha?: number; rrfK?: number; subscores?: boolean } | undefined): Array<HybridSearchResult>
-  /**
-   * Flush pending changes to disk.
-   *
-   * For hybrid search, this commits text index changes.
-   */
-  flush(): void
   /**
    * Compact the database by removing deleted records and reclaiming space.
    *
@@ -272,39 +190,98 @@ export declare class VectorDatabase {
    */
   exists(id: string): boolean
   /**
-   * Alias for exists() - check if an ID exists in the database.
+   * Get multiple vectors by ID.
    *
-   * @param id - Vector ID to check
-   * @returns true if ID exists and is not deleted
+   * Batch version of get(). More efficient than calling get() in a loop.
+   *
+   * @param ids - Array of vector IDs to retrieve
+   * @returns Array of results in same order as input, null for missing IDs
+   */
+  getBatch(ids: Array<string>): Array<GetResult | undefined | null>
+  /**
+   * Check if text search is enabled.
+   *
+   * Text search is automatically enabled when using set() with text field.
+   */
+  get hasTextSearch(): boolean
+  /**
+   * Search using text only (BM25 scoring).
+   *
+   * @param query - Text query
+   * @param k - Number of results
+   * @returns Array of {id, score, metadata}
+   */
+  searchText(query: string, k: number): Array<TextSearchResult>
+  /**
+   * Hybrid search combining vector similarity and text relevance.
+   *
+   * Uses Reciprocal Rank Fusion (RRF) to combine HNSW and BM25 results.
+   *
+   * @param queryVector - Query embedding
+   * @param queryText - Text query for BM25
+   * @param k - Number of results
+   * @param options - Optional: {filter?, alpha?, rrfK?, subscores?}
+   * @returns Array of {id, score, metadata, keywordScore?, semanticScore?}
+   *
+   * @example
+   * ```javascript
+   * // Basic hybrid search
+   * db.searchHybrid([1, 0, 0, 0], "machine learning", 10);
+   *
+   * // With options
+   * db.searchHybrid([1, 0, 0, 0], "query", 10, {
+   *   filter: { type: "ml" },
+   *   alpha: 0.7,
+   *   rrfK: 60,
+   *   subscores: true
+   * });
+   * ```
    */
-  has(id: string): boolean
+  searchHybrid(queryVector: Array<number> | Float32Array | string, queryText: string | undefined | null, k: number, options?: { filter?: Record<string, unknown>; alpha?: number; rrfK?: number; subscores?: boolean } | undefined): Promise<Array<HybridSearchResult>>
   /**
-   * Search for the single nearest neighbor.
+   * Flush pending changes to disk.
    *
-   * Convenience method that returns the top result or null if no matches.
+   * For hybrid search, this commits text index changes.
+   */
+  flush(): void
+  /**
+   * Search for k nearest neighbors.
    *
    * @param query - Query vector (number[] or Float32Array)
+   * @param k - Number of results to return
    * @param options - Optional search options: {filter?, ef?, maxDistance?}
-   * @returns Single result or null
+   * @returns Array of {id, distance, score, metadata}
    *
    * @example
    * ```javascript
-   * const nearest = db.searchOne([1, 0, 0, 0]);
-   * if (nearest) {
-   *   console.log(`Found: ${nearest.id} at distance ${nearest.distance}`);
-   * }
+   * // Basic search
+   * db.search([1, 0, 0, 0], 10);
+   *
+   * // With options
+   * db.search([1, 0, 0, 0], 10, { filter: { category: "A" }, ef: 200 });
+   * db.search([1, 0, 0, 0], 10, { maxDistance: 0.5 });
    * ```
    */
-  searchOne(query: Array<number> | Float32Array, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): SearchResult | null
+  search(query: Array<number> | Float32Array | string, k: number, options?: { filter?: Record<string, unknown>; ef?: number; maxDistance?: number } | undefined): Promise<Array<SearchResult>>
   /**
-   * Get multiple vectors by ID.
+   * Search multi-vector store with query tokens.
    *
-   * Batch version of get(). More efficient than calling get() in a loop.
+   * Internal method used by unified search() for multi-vector stores.
    *
-   * @param ids - Array of vector IDs to retrieve
-   * @returns Array of results in same order as input, null for missing IDs
+   * @param query - Query tokens (number[][] or Float32Array[])
+   * @param k - Number of results to return
+   * @param rerank - Enable MaxSim reranking for better quality (default: true)
+   * @param rerankFactor - Fetch k*rerankFactor candidates before reranking (default: 32)
+   * @returns Array of {id, distance, metadata}
    */
-  getBatch(ids: Array<string>): Array<GetResult | undefined | null>
+  searchMulti(query: Array<Array<number>> | Array<Float32Array>, k: number, rerank?: boolean | undefined | null, rerankFactor?: number | undefined | null): Array<SearchResult>
+  /**
+   * Batch search with parallel execution (async).
+   *
+   * Runs searches in parallel using rayon on a blocking thread pool,
+   * keeping the Node.js event loop free.
+   */
+  searchBatch(queries: Array<Array<number> | Float32Array>, k: number, ef?: number | undefined | null): Promise<Array<Array<SearchResult>>>
 }
 export interface GetResult {
@@ -323,14 +300,6 @@ export interface HybridSearchResult {
   semanticScore?: number
 }
-export interface MultiVectorItem {
-  id: string
-  /** Multi-vector data as array of Float32Arrays */
-  vectors: Float32Array[]
-  /** Optional metadata */
-  metadata?: Record<string, unknown> | undefined
-}
 /**
  * Open or create a vector database.
  *
@@ -357,16 +326,9 @@ export interface MultiVectorItem {
  *   quantization: true  // or "sq8"
  * });
  *
- * // Quantization with custom rescore settings
- * const db = omendb.open("./mydb", {
- *   dimensions: 128,
- *   quantization: true,
- *   rescore: false,    // Disable rescore for max speed
- *   oversample: 5.0    // Or increase oversample for better recall
- * });
  * ```
  */
-export declare function open(path: string, options?: OpenOptions | undefined | null): VectorDatabase
+export declare function open(path: string, options?: OpenOptions | undefined | null, embeddingFn?: ((texts: string[]) => Float32Array[]) | undefined): VectorDatabase
 /**
  * Configuration options for opening a vector database.
@@ -376,9 +338,7 @@ export declare function open(path: string, options?: OpenOptions | undefined | n
  * - m: 16 (HNSW neighbors per node, higher = better recall, more memory)
  * - efConstruction: 100 (build quality, higher = better graph, slower build)
  * - efSearch: 100 (search quality, higher = better recall, slower search)
- * - quantization: null (true/"sq8" for 4x compression, ~99% recall)
- * - rescore: true when quantization enabled (rerank candidates with exact distance)
- * - oversample: 3.0 (fetch k*oversample candidates when rescoring)
+ * - quantization: null (true/"sq8" for 4x compression)
  * - metric: "l2" (distance metric: "l2", "euclidean", "cosine", "dot", "ip")
  */
 export interface OpenOptions {
@@ -396,16 +356,6 @@ export interface OpenOptions {
    * - false/null: Full precision (no quantization)
    */
   quantization?: boolean | string | number | null | undefined
-  /**
-   * Rescore candidates with exact distance (default: true when quantization enabled)
-   * Set to false for maximum speed at the cost of ~20% recall
-   */
-  rescore?: boolean
-  /**
-   * Oversampling factor for rescoring (default: 3.0)
-   * Fetches k*oversample candidates then reranks to return top k
-   */
-  oversample?: number
   /** Distance metric: "l2"/"euclidean" (default), "cosine", "dot"/"ip" */
   metric?: string
   /**
@@ -437,6 +387,8 @@ export interface SetItem {
   metadata?: Record<string, unknown> | undefined
   /** Optional text for hybrid search (auto-enables text search, stored in metadata.text) */
   text?: string
+  /** Optional document for auto-embedding via embeddingFn */
+  document?: string
 }
 export interface StatsResult {
@@ -450,20 +402,3 @@ export interface TextSearchResult {
   score: number
   metadata: Record<string, unknown>
 }
-export interface VectorItem {
-  id: string
-  /** Vector data as Float32Array */
-  vector: Float32Array
-  /** Optional metadata */
-  metadata?: Record<string, unknown> | undefined
-  /** Optional document text (stored in metadata.document) */
-  document?: string
-}
-export interface VectorItemWithText {
-  id: string
-  vector: Float32Array
-  text: string
-  metadata?: Record<string, unknown> | undefined
-}

package/index.js CHANGED Viewed

@@ -111,6 +111,10 @@ function toFloat32Array(arr) {
 // Convert VectorItem to use Float32Array
 function convertVectorItem(item) {
+	// Items with document field are handled by native embedding
+	if (item.document !== undefined && item.document !== null) {
+		return item;
+	}
 	if (item.vector === undefined || item.vector === null) {
 		if (Array.isArray(item.vectors)) {
 			throw new Error(
@@ -141,11 +145,6 @@ function convertMultiVectorItem(item) {
 	};
 }
-// Check if items contain multi-vector data (vectors field must be an array)
-function isMultiVectorItem(item) {
-	return Array.isArray(item.vectors);
-}
 // Wrap VectorDatabase to handle array conversion
 const NativeVectorDatabase = nativeBinding.VectorDatabase;
@@ -201,17 +200,6 @@ class VectorDatabase {
 		}
 	}
-	/**
-	 * Search for the single nearest neighbor.
-	 *
-	 * @param {number[]|Float32Array} query - Query vector
-	 * @param {object} [options] - Search options: {filter?, ef?, maxDistance?}
-	 * @returns {{id: string, distance: number, score: number, metadata: object}|null}
-	 */
-	searchOne(query, options) {
-		return this._native.searchOne(query, options);
-	}
 	searchBatch(queries, k, ef) {
 		return this._native.searchBatch(queries, k, ef);
 	}
@@ -254,6 +242,10 @@ class VectorDatabase {
 		return this._native.isMultiVector;
 	}
+	get hasEmbeddingFn() {
+		return this._native.hasEmbeddingFn;
+	}
 	isEmpty() {
 		return this._native.isEmpty();
 	}
@@ -270,8 +262,8 @@ class VectorDatabase {
 		this._native.efSearch = value;
 	}
-	collection(name) {
-		return new VectorDatabase(this._native.collection(name));
+	collection(name, embeddingFn) {
+		return new VectorDatabase(this._native.collection(name, embeddingFn));
 	}
 	collections() {
@@ -338,16 +330,6 @@ class VectorDatabase {
 		return this._native.exists(id);
 	}
-	/**
-	 * Alias for exists() - check if an ID exists in the database.
-	 *
-	 * @param {string} id - Vector ID to check
-	 * @returns {boolean}
-	 */
-	has(id) {
-		return this._native.has(id);
-	}
 	getBatch(ids) {
 		return this._native.getBatch(ids);
 	}
@@ -357,8 +339,8 @@ class VectorDatabase {
 	}
 }
-function open(path, options) {
-	return new VectorDatabase(nativeBinding.open(path, options));
+function open(path, options, embeddingFn) {
+	return new VectorDatabase(nativeBinding.open(path, options, embeddingFn));
 }
 module.exports.VectorDatabase = VectorDatabase;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@omendb/omendb",
-  "version": "0.0.26",
+  "version": "0.0.27",
   "description": "Fast embedded vector database with HNSW + ACORN-1 filtered search",
   "main": "index.js",
   "types": "index.d.ts",
@@ -50,10 +50,10 @@
     "omendb.node"
   ],
   "optionalDependencies": {
-    "@omendb/omendb-darwin-x64": "0.0.26",
-    "@omendb/omendb-darwin-arm64": "0.0.26",
-    "@omendb/omendb-linux-x64-gnu": "0.0.26",
-    "@omendb/omendb-linux-arm64-gnu": "0.0.26",
-    "@omendb/omendb-win32-x64-msvc": "0.0.26"
+    "@omendb/omendb-darwin-x64": "0.0.27",
+    "@omendb/omendb-darwin-arm64": "0.0.27",
+    "@omendb/omendb-linux-x64-gnu": "0.0.27",
+    "@omendb/omendb-linux-arm64-gnu": "0.0.27",
+    "@omendb/omendb-win32-x64-msvc": "0.0.27"
   }
 }