npm - ruvector - Versions diffs - 0.2.30 → 0.2.32 - Mend

ruvector 0.2.30 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/README.md +32 -0
package/bin/cli.js +671 -32
package/bin/mcp-policy.js +95 -0
package/bin/mcp-server.js +4054 -3854
package/dist/core/embedding-provenance.d.ts +145 -0
package/dist/core/embedding-provenance.d.ts.map +1 -0
package/dist/core/embedding-provenance.js +258 -0
package/dist/core/index.d.ts +1 -0
package/dist/core/index.d.ts.map +1 -1
package/dist/core/index.js +1 -0
package/dist/core/intelligence-engine.d.ts +65 -4
package/dist/core/intelligence-engine.d.ts.map +1 -1
package/dist/core/intelligence-engine.js +149 -12
package/dist/core/onnx/bundled-parallel.mjs +24 -19
package/dist/core/onnx/loader.js +31 -4
package/dist/core/onnx-embedder.d.ts +42 -1
package/dist/core/onnx-embedder.d.ts.map +1 -1
package/dist/core/onnx-embedder.js +116 -11
package/dist/core/onnx-optimized.d.ts +8 -1
package/dist/core/onnx-optimized.d.ts.map +1 -1
package/dist/core/onnx-optimized.js +41 -6
package/package.json +6 -5

package/dist/core/onnx-embedder.js CHANGED Viewed

@@ -48,26 +48,32 @@ var __importStar = (this && this.__importStar) || (function () {
     };
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.OnnxEmbedder = void 0;
+exports.OnnxEmbedder = exports.BULK_EMBED_THRESHOLD = void 0;
 exports.isOnnxAvailable = isOnnxAvailable;
 exports.initOnnxEmbedder = initOnnxEmbedder;
 exports.embed = embed;
+exports.embedQuery = embedQuery;
+exports.embedPassage = embedPassage;
 exports.embedBatch = embedBatch;
 exports.similarity = similarity;
 exports.cosineSimilarity = cosineSimilarity;
 exports.getDimension = getDimension;
 exports.isReady = isReady;
 exports.isOnnxInitialized = isOnnxInitialized;
+exports.getActiveModelId = getActiveModelId;
+exports.getEmbedderProvenance = getEmbedderProvenance;
 exports.getStats = getStats;
 exports.shutdown = shutdown;
 exports.initParallelEmbedder = initParallelEmbedder;
 exports.embedBatchParallel = embedBatchParallel;
 exports.getParallelWorkerCount = getParallelWorkerCount;
+exports.embedBulk = embedBulk;
 exports.shutdownParallelEmbedder = shutdownParallelEmbedder;
 const path = __importStar(require("path"));
 const fs = __importStar(require("fs"));
 const url_1 = require("url");
 const module_1 = require("module");
+const embedding_provenance_1 = require("./embedding-provenance");
 // Set up ESM-compatible require for WASM module (fixes Windows/ESM compatibility)
 // The WASM bindings use module.require for Node.js crypto, this provides a fallback
 if (typeof globalThis !== 'undefined' && !globalThis.__ruvector_require) {
@@ -106,6 +112,10 @@ let loadedModelBytes = null;
 let loadedTokenizerJson = null;
 let loadedMaxLength = 256;
 let bundledPool = null;
+// ADR-210: identity of the loaded model, for prefix policies (D4) and the
+// embedding-provenance record (D0).
+let loadedModelId = null;
+let loadedNormalize = true;
 // Default model
 const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
 /**
@@ -257,6 +267,8 @@ async function initOnnxEmbedder(config = {}) {
             loadedModelBytes = modelBytes;
             loadedTokenizerJson = tokenizerJson;
             loadedMaxLength = config.maxLength || modelConfig.maxLength || 256;
+            loadedModelId = modelId;
+            loadedNormalize = config.normalize !== false;
             // Create embedder with config
             const embedderConfig = new wasmModule.WasmEmbedderConfig()
                 .setMaxLength(config.maxLength || modelConfig.maxLength || 256)
@@ -301,18 +313,18 @@ async function initOnnxEmbedder(config = {}) {
     await loadPromise;
     return isInitialized;
 }
-/**
- * Generate embedding for text
- */
-async function embed(text) {
+async function embedKind(kind, text) {
     if (!isInitialized) {
         await initOnnxEmbedder();
     }
     if (!embedder) {
         throw new Error('ONNX embedder not initialized');
     }
+    // ADR-210 D4: apply the model's registered query/passage prefix. MiniLM has
+    // empty prefixes, so the default model's output is byte-identical to before.
+    const prepared = (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, kind, text);
     const start = performance.now();
-    const embedding = embedder.embedOne(text);
+    const embedding = embedder.embedOne(prepared);
     const timeMs = performance.now() - start;
     return {
         embedding: Array.from(embedding),
@@ -320,6 +332,21 @@ async function embed(text) {
         timeMs,
     };
 }
+/**
+ * Generate embedding for text. Equivalent to `embedPassage()` (ADR-210 D4):
+ * stored/passage text is the default; use `embedQuery()` for search queries.
+ */
+async function embed(text) {
+    return embedKind('passage', text);
+}
+/** Embed a search query, applying the model's registered query prefix (D4). */
+async function embedQuery(text) {
+    return embedKind('query', text);
+}
+/** Embed a passage/document, applying the model's registered passage prefix (D4). */
+async function embedPassage(text) {
+    return embedKind('passage', text);
+}
 /**
  * Generate embeddings for multiple texts
  * Uses parallel workers automatically for batches >= parallelThreshold
@@ -331,10 +358,12 @@ async function embedBatch(texts) {
     if (!embedder) {
         throw new Error('ONNX embedder not initialized');
     }
+    // ADR-210 D4: batch embedding is the passage path (embed() === embedPassage()).
+    const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, 'passage', t));
     const start = performance.now();
     // Use parallel workers for large batches
-    if (parallelEnabled && parallelEmbedder && texts.length >= parallelThreshold) {
-        const batchResults = await parallelEmbedder.embedBatch(texts);
+    if (parallelEnabled && parallelEmbedder && prepared.length >= parallelThreshold) {
+        const batchResults = await parallelEmbedder.embedBatch(prepared);
         const totalTime = performance.now() - start;
         const dimension = parallelEmbedder.dimension || 384;
         return batchResults.map((emb) => ({
@@ -344,11 +373,11 @@ async function embedBatch(texts) {
         }));
     }
     // Sequential fallback
-    const batchEmbeddings = embedder.embedBatch(texts);
+    const batchEmbeddings = embedder.embedBatch(prepared);
     const totalTime = performance.now() - start;
     const dimension = embedder.dimension();
     const results = [];
-    for (let i = 0; i < texts.length; i++) {
+    for (let i = 0; i < prepared.length; i++) {
         const embedding = batchEmbeddings.slice(i * dimension, (i + 1) * dimension);
         results.push({
             embedding: Array.from(embedding),
@@ -418,6 +447,26 @@ function isReady() {
 function isOnnxInitialized() {
     return isInitialized;
 }
+/** Model id of the loaded model, or null before init (ADR-210). */
+function getActiveModelId() {
+    return loadedModelId;
+}
+/**
+ * Embedding-provenance record (ADR-210 D0) describing vectors produced by the
+ * loaded ONNX embedder, or null before the model is initialized.
+ */
+function getEmbedderProvenance() {
+    if (!isInitialized)
+        return null;
+    const modelId = loadedModelId ?? DEFAULT_MODEL;
+    return {
+        embedderKind: (0, embedding_provenance_1.embedderKindForModel)(modelId),
+        modelId,
+        dimension: getDimension(),
+        normalize: loadedNormalize,
+        prefixPolicy: (0, embedding_provenance_1.getModelPrefixSpec)(modelId).prefixPolicy,
+    };
+}
 /**
  * Get embedder stats including SIMD and parallel capabilities
  */
@@ -479,12 +528,57 @@ async function initParallelEmbedder(numWorkers) {
 async function embedBatchParallel(texts) {
     if (!bundledPool)
         await initParallelEmbedder();
-    return bundledPool.embedBatch(texts);
+    // ADR-210 D4: bulk ingest is the passage path; MiniLM prefixes are empty.
+    const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, 'passage', t));
+    return bundledPool.embedBatch(prepared);
 }
 /** Number of active pool workers (0 if the pool isn't started). */
 function getParallelWorkerCount() {
     return bundledPool ? bundledPool.numWorkers : 0;
 }
+/** Batches at or above this size route through the worker pool (ADR-210 D3). */
+exports.BULK_EMBED_THRESHOLD = 32;
+let bulkPoolFallbackWarned = false;
+/**
+ * Default bulk-embedding path (ADR-210 D3): batches of `threshold`
+ * (default 32) or more texts route through the bundled parallel worker pool
+ * — fp32 model bytes shared across workers via SharedArrayBuffer, vectors
+ * identical to the single-thread path. Smaller batches, and any batch when
+ * pool startup fails (no worker_threads, no SharedArrayBuffer), use the
+ * single-threaded batch path with one stderr note.
+ *
+ * INT8 STATUS (honest gap, ADR-210 D3): the registered int8 variants
+ * (QUANTIZED_MODELS in onnx-optimized.ts) cannot run on the bundled WASM
+ * runtime today — its graph analyzer rejects quantized MiniLM exports
+ * ("Failed analyse for node /Unsqueeze", verified against both
+ * Xenova/all-MiniLM-L6-v2 model_quantized.onnx and the official
+ * sentence-transformers model_quint8_avx2.onnx exports). Bulk ingest
+ * therefore defaults to parallel-fp32; int8 ingest needs a Rust-side
+ * runtime upgrade in the ruvector-onnx-embeddings-wasm crate (tracked as
+ * an ADR-210 follow-up). Single-query latency keeps fp32 either way.
+ */
+async function embedBulk(texts, opts = {}) {
+    if (!texts || texts.length === 0)
+        return [];
+    const threshold = opts.threshold ?? exports.BULK_EMBED_THRESHOLD;
+    if (!isInitialized) {
+        await initOnnxEmbedder();
+    }
+    if (texts.length >= threshold) {
+        try {
+            return await embedBatchParallel(texts);
+        }
+        catch (e) {
+            if (!bulkPoolFallbackWarned) {
+                bulkPoolFallbackWarned = true;
+                console.error(`ruvector: parallel bulk-embed pool unavailable (${e?.message ?? e}); ` +
+                    `using single-threaded batch embedding.`);
+            }
+        }
+    }
+    const results = await embedBatch(texts);
+    return results.map(r => r.embedding);
+}
 /** Shut down the bundled worker pool and release its threads. */
 async function shutdownParallelEmbedder() {
     if (bundledPool) {
@@ -500,10 +594,21 @@ class OnnxEmbedder {
     async init() {
         return initOnnxEmbedder(this.config);
     }
+    /** Equivalent to embedPassage() — ADR-210 D4. */
     async embed(text) {
         const result = await embed(text);
         return result.embedding;
     }
+    /** Embed a search query with the model's registered query prefix (D4). */
+    async embedQuery(text) {
+        const result = await embedQuery(text);
+        return result.embedding;
+    }
+    /** Embed a passage/document with the model's registered passage prefix (D4). */
+    async embedPassage(text) {
+        const result = await embedPassage(text);
+        return result.embedding;
+    }
     async embedBatch(texts) {
         const results = await embedBatch(texts);
         return results.map(r => r.embedding);

package/dist/core/onnx-optimized.d.ts CHANGED Viewed

@@ -52,9 +52,16 @@ export declare class OptimizedOnnxEmbedder {
     init(): Promise<void>;
     private doInit;
     /**
-     * Embed a single text with caching
+     * Embed a single text with caching.
+     * Equivalent to `embedPassage()` — ADR-210 D4 (plain embed = passage path).
      */
     embed(text: string): Promise<Float32Array>;
+    /** Embed a search query with the model's registered query prefix (D4). */
+    embedQuery(text: string): Promise<Float32Array>;
+    /** Embed a passage/document with the model's registered passage prefix (D4). */
+    embedPassage(text: string): Promise<Float32Array>;
+    private embedKind;
+    private embedRaw;
     /**
      * Embed multiple texts with batching and caching
      */

package/dist/core/onnx-optimized.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;~~AAcH~~,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;~~AA0HD~~,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;~~IA+DpB;;OAEG~~;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;~~IAiChD~~;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;~~IAmD1D~~;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAmB1D;;OAEG;IACH,aAAa,IAAI;QACf,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB;IASD;;OAEG;IACH,UAAU,IAAI,IAAI;IAKlB;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,mBAAmB,CAAC;CAG3C;AAQD,wBAAgB,wBAAwB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,qBAAqB,CAK5F;AAED,wBAAsB,iBAAiB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAIpG;AAED,eAAe,qBAAqB,CAAC"}
1	+ {"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAeH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0ID,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;IAkEpB;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAIhD,0EAA0E;IACpE,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAIrD,gFAAgF;IAC1E,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;YAIzC,SAAS;YAMT,QAAQ;IAiCtB;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAsD1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAmB1D;;OAEG;IACH,aAAa,IAAI;QACf,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB;IASD;;OAEG;IACH,UAAU,IAAI,IAAI;IAKlB;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,mBAAmB,CAAC;CAG3C;AAQD,wBAAgB,wBAAwB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,qBAAqB,CAK5F;AAED,wBAAsB,iBAAiB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAIpG;AAED,eAAe,qBAAqB,CAAC"}

package/dist/core/onnx-optimized.js CHANGED Viewed

@@ -55,6 +55,7 @@ exports.initOptimizedOnnx = initOptimizedOnnx;
 const path = __importStar(require("path"));
 const fs = __importStar(require("fs"));
 const url_1 = require("url");
+const embedding_provenance_1 = require("./embedding-provenance");
 // Force native dynamic import
 // eslint-disable-next-line @typescript-eslint/no-implied-eval
 const dynamicImport = new Function('specifier', 'return import(specifier)');
@@ -70,6 +71,9 @@ const QUANTIZED_MODELS = {
         tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
         dimension: 384,
         maxLength: 256,
+        prefixPolicy: 'none',
+        queryPrefix: '',
+        passagePrefix: '',
     },
     'bge-small-en-v1.5': {
         onnx: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/onnx/model.onnx',
@@ -78,6 +82,11 @@ const QUANTIZED_MODELS = {
         tokenizer: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.json',
         dimension: 384,
         maxLength: 512,
+        // Query instruction recommended for short-query → long-passage retrieval;
+        // passages need no instruction (model card).
+        prefixPolicy: 'query-recommended',
+        queryPrefix: 'Represent this sentence for searching relevant passages: ',
+        passagePrefix: '',
     },
     'e5-small-v2': {
         onnx: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/onnx/model.onnx',
@@ -85,6 +94,10 @@ const QUANTIZED_MODELS = {
         tokenizer: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/tokenizer.json',
         dimension: 384,
         maxLength: 512,
+        // The model card states quality degrades without these prefixes.
+        prefixPolicy: 'required',
+        queryPrefix: 'query: ',
+        passagePrefix: 'passage: ',
     },
 };
 // ============================================================================
@@ -219,7 +232,10 @@ class OptimizedOnnxEmbedder {
             // log a quantization (FP16/INT8) that is not actually applied. When the
             // loader gains variant support, thread the selected variant through to
             // loadModel() here instead of computing an unused URL.
-            const modelInfo = QUANTIZED_MODELS[this.config.modelId];
+            // Own-property lookup only ('__proto__'-style ids must miss, ADR-210).
+            const modelInfo = Object.prototype.hasOwnProperty.call(QUANTIZED_MODELS, this.config.modelId)
+                ? QUANTIZED_MODELS[this.config.modelId]
+                : undefined;
             if (modelInfo) {
                 this.dimension = modelInfo.dimension;
             }
@@ -246,9 +262,26 @@ class OptimizedOnnxEmbedder {
         }
     }
     /**
-     * Embed a single text with caching
+     * Embed a single text with caching.
+     * Equivalent to `embedPassage()` — ADR-210 D4 (plain embed = passage path).
      */
     async embed(text) {
+        return this.embedKind('passage', text);
+    }
+    /** Embed a search query with the model's registered query prefix (D4). */
+    async embedQuery(text) {
+        return this.embedKind('query', text);
+    }
+    /** Embed a passage/document with the model's registered passage prefix (D4). */
+    async embedPassage(text) {
+        return this.embedKind('passage', text);
+    }
+    async embedKind(kind, text) {
+        // ADR-210 D4: prefix before tokenization (and before the cache key, so
+        // query and passage embeds of the same text never collide for E5/BGE).
+        return this.embedRaw((0, embedding_provenance_1.prefixText)(this.config.modelId, kind, text));
+    }
+    async embedRaw(text) {
         if (this.config.lazyInit && !this.initialized) {
             await this.init();
         }
@@ -284,17 +317,19 @@ class OptimizedOnnxEmbedder {
         if (!this.embedder) {
             throw new Error('Embedder not initialized');
         }
-        const results = new Array(texts.length);
+        // ADR-210 D4: batch embedding is the passage path (embed() === embedPassage()).
+        const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(this.config.modelId, 'passage', t));
+        const results = new Array(prepared.length);
         const uncached = [];
         // Check cache first
-        for (let i = 0; i < texts.length; i++) {
-            const cacheKey = hashString(texts[i]);
+        for (let i = 0; i < prepared.length; i++) {
+            const cacheKey = hashString(prepared[i]);
             const cached = this.embeddingCache.get(cacheKey);
             if (cached) {
                 results[i] = cached;
             }
             else {
-                uncached.push({ index: i, text: texts[i] });
+                uncached.push({ index: i, text: prepared[i] });
             }
         }
         // If all cached, return immediately

package/package.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
   "name": "ruvector",
-  "version": "0.2.30",
-  "description": "Self-learning vector database for Node.js — hybrid search, Graph RAG, FlashAttention-3, HNSW, 50+ attention mechanisms",
+  "version": "0.2.32",
+  "description": "Self-learning vector database for Node.js \u2014 hybrid search, Graph RAG, FlashAttention-3, HNSW, 50+ attention mechanisms",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
   "bin": {
     "ruvector": "./bin/cli.js"
   },
   "scripts": {
-    "build": "tsc && mkdir -p dist/core/onnx && cp -r src/core/onnx/. dist/core/onnx/",
+    "build": "tsc && node -e \"require('fs').cpSync('src/core/onnx','dist/core/onnx',{recursive:true})\"",
     "verify-dist": "node scripts/verify-dist.js",
     "prepack": "npm run build && npm run verify-dist",
     "prepublishOnly": "npm run build && npm run verify-dist",
-    "test": "node test/integration.js && node test/cli-commands.js && node test/sigterm-cleanup.js"
+    "test": "node test/integration.js && node test/cli-commands.js && node test/db-workflow.js && node test/sigterm-cleanup.js && node test/mcp-policy.js && node test/startup-budget.js"
   },
   "keywords": [
     "vector",
@@ -82,7 +82,8 @@
     "ora": "^5.4.1"
   },
   "optionalDependencies": {
-    "@ruvector/rvf": "^0.1.0"
+    "@ruvector/rvf": "^0.1.0",
+    "@ruvector/tiny-dancer": "^0.1.22"
   },
   "devDependencies": {
     "@types/node": "^20.10.5",