ruvector 0.2.30 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -48,26 +48,32 @@ var __importStar = (this && this.__importStar) || (function () {
48
48
  };
49
49
  })();
50
50
  Object.defineProperty(exports, "__esModule", { value: true });
51
- exports.OnnxEmbedder = void 0;
51
+ exports.OnnxEmbedder = exports.BULK_EMBED_THRESHOLD = void 0;
52
52
  exports.isOnnxAvailable = isOnnxAvailable;
53
53
  exports.initOnnxEmbedder = initOnnxEmbedder;
54
54
  exports.embed = embed;
55
+ exports.embedQuery = embedQuery;
56
+ exports.embedPassage = embedPassage;
55
57
  exports.embedBatch = embedBatch;
56
58
  exports.similarity = similarity;
57
59
  exports.cosineSimilarity = cosineSimilarity;
58
60
  exports.getDimension = getDimension;
59
61
  exports.isReady = isReady;
60
62
  exports.isOnnxInitialized = isOnnxInitialized;
63
+ exports.getActiveModelId = getActiveModelId;
64
+ exports.getEmbedderProvenance = getEmbedderProvenance;
61
65
  exports.getStats = getStats;
62
66
  exports.shutdown = shutdown;
63
67
  exports.initParallelEmbedder = initParallelEmbedder;
64
68
  exports.embedBatchParallel = embedBatchParallel;
65
69
  exports.getParallelWorkerCount = getParallelWorkerCount;
70
+ exports.embedBulk = embedBulk;
66
71
  exports.shutdownParallelEmbedder = shutdownParallelEmbedder;
67
72
  const path = __importStar(require("path"));
68
73
  const fs = __importStar(require("fs"));
69
74
  const url_1 = require("url");
70
75
  const module_1 = require("module");
76
+ const embedding_provenance_1 = require("./embedding-provenance");
71
77
  // Set up ESM-compatible require for WASM module (fixes Windows/ESM compatibility)
72
78
  // The WASM bindings use module.require for Node.js crypto, this provides a fallback
73
79
  if (typeof globalThis !== 'undefined' && !globalThis.__ruvector_require) {
@@ -106,6 +112,10 @@ let loadedModelBytes = null;
106
112
  let loadedTokenizerJson = null;
107
113
  let loadedMaxLength = 256;
108
114
  let bundledPool = null;
115
+ // ADR-210: identity of the loaded model, for prefix policies (D4) and the
116
+ // embedding-provenance record (D0).
117
+ let loadedModelId = null;
118
+ let loadedNormalize = true;
109
119
  // Default model
110
120
  const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
111
121
  /**
@@ -257,6 +267,8 @@ async function initOnnxEmbedder(config = {}) {
257
267
  loadedModelBytes = modelBytes;
258
268
  loadedTokenizerJson = tokenizerJson;
259
269
  loadedMaxLength = config.maxLength || modelConfig.maxLength || 256;
270
+ loadedModelId = modelId;
271
+ loadedNormalize = config.normalize !== false;
260
272
  // Create embedder with config
261
273
  const embedderConfig = new wasmModule.WasmEmbedderConfig()
262
274
  .setMaxLength(config.maxLength || modelConfig.maxLength || 256)
@@ -301,18 +313,18 @@ async function initOnnxEmbedder(config = {}) {
301
313
  await loadPromise;
302
314
  return isInitialized;
303
315
  }
304
- /**
305
- * Generate embedding for text
306
- */
307
- async function embed(text) {
316
+ async function embedKind(kind, text) {
308
317
  if (!isInitialized) {
309
318
  await initOnnxEmbedder();
310
319
  }
311
320
  if (!embedder) {
312
321
  throw new Error('ONNX embedder not initialized');
313
322
  }
323
+ // ADR-210 D4: apply the model's registered query/passage prefix. MiniLM has
324
+ // empty prefixes, so the default model's output is byte-identical to before.
325
+ const prepared = (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, kind, text);
314
326
  const start = performance.now();
315
- const embedding = embedder.embedOne(text);
327
+ const embedding = embedder.embedOne(prepared);
316
328
  const timeMs = performance.now() - start;
317
329
  return {
318
330
  embedding: Array.from(embedding),
@@ -320,6 +332,21 @@ async function embed(text) {
320
332
  timeMs,
321
333
  };
322
334
  }
335
+ /**
336
+ * Generate embedding for text. Equivalent to `embedPassage()` (ADR-210 D4):
337
+ * stored/passage text is the default; use `embedQuery()` for search queries.
338
+ */
339
+ async function embed(text) {
340
+ return embedKind('passage', text);
341
+ }
342
+ /** Embed a search query, applying the model's registered query prefix (D4). */
343
+ async function embedQuery(text) {
344
+ return embedKind('query', text);
345
+ }
346
+ /** Embed a passage/document, applying the model's registered passage prefix (D4). */
347
+ async function embedPassage(text) {
348
+ return embedKind('passage', text);
349
+ }
323
350
  /**
324
351
  * Generate embeddings for multiple texts
325
352
  * Uses parallel workers automatically for batches >= parallelThreshold
@@ -331,10 +358,12 @@ async function embedBatch(texts) {
331
358
  if (!embedder) {
332
359
  throw new Error('ONNX embedder not initialized');
333
360
  }
361
+ // ADR-210 D4: batch embedding is the passage path (embed() === embedPassage()).
362
+ const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, 'passage', t));
334
363
  const start = performance.now();
335
364
  // Use parallel workers for large batches
336
- if (parallelEnabled && parallelEmbedder && texts.length >= parallelThreshold) {
337
- const batchResults = await parallelEmbedder.embedBatch(texts);
365
+ if (parallelEnabled && parallelEmbedder && prepared.length >= parallelThreshold) {
366
+ const batchResults = await parallelEmbedder.embedBatch(prepared);
338
367
  const totalTime = performance.now() - start;
339
368
  const dimension = parallelEmbedder.dimension || 384;
340
369
  return batchResults.map((emb) => ({
@@ -344,11 +373,11 @@ async function embedBatch(texts) {
344
373
  }));
345
374
  }
346
375
  // Sequential fallback
347
- const batchEmbeddings = embedder.embedBatch(texts);
376
+ const batchEmbeddings = embedder.embedBatch(prepared);
348
377
  const totalTime = performance.now() - start;
349
378
  const dimension = embedder.dimension();
350
379
  const results = [];
351
- for (let i = 0; i < texts.length; i++) {
380
+ for (let i = 0; i < prepared.length; i++) {
352
381
  const embedding = batchEmbeddings.slice(i * dimension, (i + 1) * dimension);
353
382
  results.push({
354
383
  embedding: Array.from(embedding),
@@ -418,6 +447,26 @@ function isReady() {
418
447
  function isOnnxInitialized() {
419
448
  return isInitialized;
420
449
  }
450
+ /** Model id of the loaded model, or null before init (ADR-210). */
451
+ function getActiveModelId() {
452
+ return loadedModelId;
453
+ }
454
+ /**
455
+ * Embedding-provenance record (ADR-210 D0) describing vectors produced by the
456
+ * loaded ONNX embedder, or null before the model is initialized.
457
+ */
458
+ function getEmbedderProvenance() {
459
+ if (!isInitialized)
460
+ return null;
461
+ const modelId = loadedModelId ?? DEFAULT_MODEL;
462
+ return {
463
+ embedderKind: (0, embedding_provenance_1.embedderKindForModel)(modelId),
464
+ modelId,
465
+ dimension: getDimension(),
466
+ normalize: loadedNormalize,
467
+ prefixPolicy: (0, embedding_provenance_1.getModelPrefixSpec)(modelId).prefixPolicy,
468
+ };
469
+ }
421
470
  /**
422
471
  * Get embedder stats including SIMD and parallel capabilities
423
472
  */
@@ -479,12 +528,57 @@ async function initParallelEmbedder(numWorkers) {
479
528
  async function embedBatchParallel(texts) {
480
529
  if (!bundledPool)
481
530
  await initParallelEmbedder();
482
- return bundledPool.embedBatch(texts);
531
+ // ADR-210 D4: bulk ingest is the passage path; MiniLM prefixes are empty.
532
+ const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, 'passage', t));
533
+ return bundledPool.embedBatch(prepared);
483
534
  }
484
535
  /** Number of active pool workers (0 if the pool isn't started). */
485
536
  function getParallelWorkerCount() {
486
537
  return bundledPool ? bundledPool.numWorkers : 0;
487
538
  }
539
+ /** Batches at or above this size route through the worker pool (ADR-210 D3). */
540
+ exports.BULK_EMBED_THRESHOLD = 32;
541
+ let bulkPoolFallbackWarned = false;
542
+ /**
543
+ * Default bulk-embedding path (ADR-210 D3): batches of `threshold`
544
+ * (default 32) or more texts route through the bundled parallel worker pool
545
+ * — fp32 model bytes shared across workers via SharedArrayBuffer, vectors
546
+ * identical to the single-thread path. Smaller batches, and any batch when
547
+ * pool startup fails (no worker_threads, no SharedArrayBuffer), use the
548
+ * single-threaded batch path with one stderr note.
549
+ *
550
+ * INT8 STATUS (honest gap, ADR-210 D3): the registered int8 variants
551
+ * (QUANTIZED_MODELS in onnx-optimized.ts) cannot run on the bundled WASM
552
+ * runtime today — its graph analyzer rejects quantized MiniLM exports
553
+ * ("Failed analyse for node /Unsqueeze", verified against both
554
+ * Xenova/all-MiniLM-L6-v2 model_quantized.onnx and the official
555
+ * sentence-transformers model_quint8_avx2.onnx exports). Bulk ingest
556
+ * therefore defaults to parallel-fp32; int8 ingest needs a Rust-side
557
+ * runtime upgrade in the ruvector-onnx-embeddings-wasm crate (tracked as
558
+ * an ADR-210 follow-up). Single-query latency keeps fp32 either way.
559
+ */
560
+ async function embedBulk(texts, opts = {}) {
561
+ if (!texts || texts.length === 0)
562
+ return [];
563
+ const threshold = opts.threshold ?? exports.BULK_EMBED_THRESHOLD;
564
+ if (!isInitialized) {
565
+ await initOnnxEmbedder();
566
+ }
567
+ if (texts.length >= threshold) {
568
+ try {
569
+ return await embedBatchParallel(texts);
570
+ }
571
+ catch (e) {
572
+ if (!bulkPoolFallbackWarned) {
573
+ bulkPoolFallbackWarned = true;
574
+ console.error(`ruvector: parallel bulk-embed pool unavailable (${e?.message ?? e}); ` +
575
+ `using single-threaded batch embedding.`);
576
+ }
577
+ }
578
+ }
579
+ const results = await embedBatch(texts);
580
+ return results.map(r => r.embedding);
581
+ }
488
582
  /** Shut down the bundled worker pool and release its threads. */
489
583
  async function shutdownParallelEmbedder() {
490
584
  if (bundledPool) {
@@ -500,10 +594,21 @@ class OnnxEmbedder {
500
594
  async init() {
501
595
  return initOnnxEmbedder(this.config);
502
596
  }
597
+ /** Equivalent to embedPassage() — ADR-210 D4. */
503
598
  async embed(text) {
504
599
  const result = await embed(text);
505
600
  return result.embedding;
506
601
  }
602
+ /** Embed a search query with the model's registered query prefix (D4). */
603
+ async embedQuery(text) {
604
+ const result = await embedQuery(text);
605
+ return result.embedding;
606
+ }
607
+ /** Embed a passage/document with the model's registered passage prefix (D4). */
608
+ async embedPassage(text) {
609
+ const result = await embedPassage(text);
610
+ return result.embedding;
611
+ }
507
612
  async embedBatch(texts) {
508
613
  const results = await embedBatch(texts);
509
614
  return results.map(r => r.embedding);
@@ -52,9 +52,16 @@ export declare class OptimizedOnnxEmbedder {
52
52
  init(): Promise<void>;
53
53
  private doInit;
54
54
  /**
55
- * Embed a single text with caching
55
+ * Embed a single text with caching.
56
+ * Equivalent to `embedPassage()` — ADR-210 D4 (plain embed = passage path).
56
57
  */
57
58
  embed(text: string): Promise<Float32Array>;
59
+ /** Embed a search query with the model's registered query prefix (D4). */
60
+ embedQuery(text: string): Promise<Float32Array>;
61
+ /** Embed a passage/document with the model's registered passage prefix (D4). */
62
+ embedPassage(text: string): Promise<Float32Array>;
63
+ private embedKind;
64
+ private embedRaw;
58
65
  /**
59
66
  * Embed multiple texts with batching and caching
60
67
  */
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAcH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0HD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;IA+DpB;;OAEG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAiChD;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAmD1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAmB1D;;OAEG;IACH,aAAa,IAAI;QACf,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB;IASD;;OAEG;IACH,UAAU,IAAI,IAAI;IAKlB;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,mBAAmB,CAAC;CAG3C;AAQD,wBAAgB,wBAAwB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,qBAAqB,CAK5F;AAED,wBAAsB,iBAAiB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAIpG;AAED,eAAe,qBAAqB,CAAC"}
1
+ {"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAeH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0ID,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;IAkEpB;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAIhD,0EAA0E;IACpE,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAIrD,gFAAgF;IAC1E,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;YAIzC,SAAS;YAMT,QAAQ;IAiCtB;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAsD1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAmB1D;;OAEG;IACH,aAAa,IAAI;QACf,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB;IASD;;OAEG;IACH,UAAU,IAAI,IAAI;IAKlB;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,mBAAmB,CAAC;CAG3C;AAQD,wBAAgB,wBAAwB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,qBAAqB,CAK5F;AAED,wBAAsB,iBAAiB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAIpG;AAED,eAAe,qBAAqB,CAAC"}
@@ -55,6 +55,7 @@ exports.initOptimizedOnnx = initOptimizedOnnx;
55
55
  const path = __importStar(require("path"));
56
56
  const fs = __importStar(require("fs"));
57
57
  const url_1 = require("url");
58
+ const embedding_provenance_1 = require("./embedding-provenance");
58
59
  // Force native dynamic import
59
60
  // eslint-disable-next-line @typescript-eslint/no-implied-eval
60
61
  const dynamicImport = new Function('specifier', 'return import(specifier)');
@@ -70,6 +71,9 @@ const QUANTIZED_MODELS = {
70
71
  tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
71
72
  dimension: 384,
72
73
  maxLength: 256,
74
+ prefixPolicy: 'none',
75
+ queryPrefix: '',
76
+ passagePrefix: '',
73
77
  },
74
78
  'bge-small-en-v1.5': {
75
79
  onnx: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/onnx/model.onnx',
@@ -78,6 +82,11 @@ const QUANTIZED_MODELS = {
78
82
  tokenizer: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.json',
79
83
  dimension: 384,
80
84
  maxLength: 512,
85
+ // Query instruction recommended for short-query → long-passage retrieval;
86
+ // passages need no instruction (model card).
87
+ prefixPolicy: 'query-recommended',
88
+ queryPrefix: 'Represent this sentence for searching relevant passages: ',
89
+ passagePrefix: '',
81
90
  },
82
91
  'e5-small-v2': {
83
92
  onnx: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/onnx/model.onnx',
@@ -85,6 +94,10 @@ const QUANTIZED_MODELS = {
85
94
  tokenizer: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/tokenizer.json',
86
95
  dimension: 384,
87
96
  maxLength: 512,
97
+ // The model card states quality degrades without these prefixes.
98
+ prefixPolicy: 'required',
99
+ queryPrefix: 'query: ',
100
+ passagePrefix: 'passage: ',
88
101
  },
89
102
  };
90
103
  // ============================================================================
@@ -219,7 +232,10 @@ class OptimizedOnnxEmbedder {
219
232
  // log a quantization (FP16/INT8) that is not actually applied. When the
220
233
  // loader gains variant support, thread the selected variant through to
221
234
  // loadModel() here instead of computing an unused URL.
222
- const modelInfo = QUANTIZED_MODELS[this.config.modelId];
235
+ // Own-property lookup only ('__proto__'-style ids must miss, ADR-210).
236
+ const modelInfo = Object.prototype.hasOwnProperty.call(QUANTIZED_MODELS, this.config.modelId)
237
+ ? QUANTIZED_MODELS[this.config.modelId]
238
+ : undefined;
223
239
  if (modelInfo) {
224
240
  this.dimension = modelInfo.dimension;
225
241
  }
@@ -246,9 +262,26 @@ class OptimizedOnnxEmbedder {
246
262
  }
247
263
  }
248
264
  /**
249
- * Embed a single text with caching
265
+ * Embed a single text with caching.
266
+ * Equivalent to `embedPassage()` — ADR-210 D4 (plain embed = passage path).
250
267
  */
251
268
  async embed(text) {
269
+ return this.embedKind('passage', text);
270
+ }
271
+ /** Embed a search query with the model's registered query prefix (D4). */
272
+ async embedQuery(text) {
273
+ return this.embedKind('query', text);
274
+ }
275
+ /** Embed a passage/document with the model's registered passage prefix (D4). */
276
+ async embedPassage(text) {
277
+ return this.embedKind('passage', text);
278
+ }
279
+ async embedKind(kind, text) {
280
+ // ADR-210 D4: prefix before tokenization (and before the cache key, so
281
+ // query and passage embeds of the same text never collide for E5/BGE).
282
+ return this.embedRaw((0, embedding_provenance_1.prefixText)(this.config.modelId, kind, text));
283
+ }
284
+ async embedRaw(text) {
252
285
  if (this.config.lazyInit && !this.initialized) {
253
286
  await this.init();
254
287
  }
@@ -284,17 +317,19 @@ class OptimizedOnnxEmbedder {
284
317
  if (!this.embedder) {
285
318
  throw new Error('Embedder not initialized');
286
319
  }
287
- const results = new Array(texts.length);
320
+ // ADR-210 D4: batch embedding is the passage path (embed() === embedPassage()).
321
+ const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(this.config.modelId, 'passage', t));
322
+ const results = new Array(prepared.length);
288
323
  const uncached = [];
289
324
  // Check cache first
290
- for (let i = 0; i < texts.length; i++) {
291
- const cacheKey = hashString(texts[i]);
325
+ for (let i = 0; i < prepared.length; i++) {
326
+ const cacheKey = hashString(prepared[i]);
292
327
  const cached = this.embeddingCache.get(cacheKey);
293
328
  if (cached) {
294
329
  results[i] = cached;
295
330
  }
296
331
  else {
297
- uncached.push({ index: i, text: texts[i] });
332
+ uncached.push({ index: i, text: prepared[i] });
298
333
  }
299
334
  }
300
335
  // If all cached, return immediately
package/package.json CHANGED
@@ -1,18 +1,18 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.2.30",
4
- "description": "Self-learning vector database for Node.js hybrid search, Graph RAG, FlashAttention-3, HNSW, 50+ attention mechanisms",
3
+ "version": "0.2.32",
4
+ "description": "Self-learning vector database for Node.js \u2014 hybrid search, Graph RAG, FlashAttention-3, HNSW, 50+ attention mechanisms",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
7
7
  "bin": {
8
8
  "ruvector": "./bin/cli.js"
9
9
  },
10
10
  "scripts": {
11
- "build": "tsc && mkdir -p dist/core/onnx && cp -r src/core/onnx/. dist/core/onnx/",
11
+ "build": "tsc && node -e \"require('fs').cpSync('src/core/onnx','dist/core/onnx',{recursive:true})\"",
12
12
  "verify-dist": "node scripts/verify-dist.js",
13
13
  "prepack": "npm run build && npm run verify-dist",
14
14
  "prepublishOnly": "npm run build && npm run verify-dist",
15
- "test": "node test/integration.js && node test/cli-commands.js && node test/sigterm-cleanup.js"
15
+ "test": "node test/integration.js && node test/cli-commands.js && node test/db-workflow.js && node test/sigterm-cleanup.js && node test/mcp-policy.js && node test/startup-budget.js"
16
16
  },
17
17
  "keywords": [
18
18
  "vector",
@@ -82,7 +82,8 @@
82
82
  "ora": "^5.4.1"
83
83
  },
84
84
  "optionalDependencies": {
85
- "@ruvector/rvf": "^0.1.0"
85
+ "@ruvector/rvf": "^0.1.0",
86
+ "@ruvector/tiny-dancer": "^0.1.22"
86
87
  },
87
88
  "devDependencies": {
88
89
  "@types/node": "^20.10.5",