ruvector 0.2.30 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +540 -32
- package/bin/mcp-server.js +4035 -3854
- package/dist/core/embedding-provenance.d.ts +145 -0
- package/dist/core/embedding-provenance.d.ts.map +1 -0
- package/dist/core/embedding-provenance.js +258 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +1 -0
- package/dist/core/intelligence-engine.d.ts +65 -4
- package/dist/core/intelligence-engine.d.ts.map +1 -1
- package/dist/core/intelligence-engine.js +149 -12
- package/dist/core/onnx/bundled-parallel.mjs +24 -19
- package/dist/core/onnx/loader.js +31 -4
- package/dist/core/onnx-embedder.d.ts +42 -1
- package/dist/core/onnx-embedder.d.ts.map +1 -1
- package/dist/core/onnx-embedder.js +116 -11
- package/dist/core/onnx-optimized.d.ts +8 -1
- package/dist/core/onnx-optimized.d.ts.map +1 -1
- package/dist/core/onnx-optimized.js +41 -6
- package/package.json +5 -4
|
@@ -48,26 +48,32 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
48
48
|
};
|
|
49
49
|
})();
|
|
50
50
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
51
|
-
exports.OnnxEmbedder = void 0;
|
|
51
|
+
exports.OnnxEmbedder = exports.BULK_EMBED_THRESHOLD = void 0;
|
|
52
52
|
exports.isOnnxAvailable = isOnnxAvailable;
|
|
53
53
|
exports.initOnnxEmbedder = initOnnxEmbedder;
|
|
54
54
|
exports.embed = embed;
|
|
55
|
+
exports.embedQuery = embedQuery;
|
|
56
|
+
exports.embedPassage = embedPassage;
|
|
55
57
|
exports.embedBatch = embedBatch;
|
|
56
58
|
exports.similarity = similarity;
|
|
57
59
|
exports.cosineSimilarity = cosineSimilarity;
|
|
58
60
|
exports.getDimension = getDimension;
|
|
59
61
|
exports.isReady = isReady;
|
|
60
62
|
exports.isOnnxInitialized = isOnnxInitialized;
|
|
63
|
+
exports.getActiveModelId = getActiveModelId;
|
|
64
|
+
exports.getEmbedderProvenance = getEmbedderProvenance;
|
|
61
65
|
exports.getStats = getStats;
|
|
62
66
|
exports.shutdown = shutdown;
|
|
63
67
|
exports.initParallelEmbedder = initParallelEmbedder;
|
|
64
68
|
exports.embedBatchParallel = embedBatchParallel;
|
|
65
69
|
exports.getParallelWorkerCount = getParallelWorkerCount;
|
|
70
|
+
exports.embedBulk = embedBulk;
|
|
66
71
|
exports.shutdownParallelEmbedder = shutdownParallelEmbedder;
|
|
67
72
|
const path = __importStar(require("path"));
|
|
68
73
|
const fs = __importStar(require("fs"));
|
|
69
74
|
const url_1 = require("url");
|
|
70
75
|
const module_1 = require("module");
|
|
76
|
+
const embedding_provenance_1 = require("./embedding-provenance");
|
|
71
77
|
// Set up ESM-compatible require for WASM module (fixes Windows/ESM compatibility)
|
|
72
78
|
// The WASM bindings use module.require for Node.js crypto, this provides a fallback
|
|
73
79
|
if (typeof globalThis !== 'undefined' && !globalThis.__ruvector_require) {
|
|
@@ -106,6 +112,10 @@ let loadedModelBytes = null;
|
|
|
106
112
|
let loadedTokenizerJson = null;
|
|
107
113
|
let loadedMaxLength = 256;
|
|
108
114
|
let bundledPool = null;
|
|
115
|
+
// ADR-210: identity of the loaded model, for prefix policies (D4) and the
|
|
116
|
+
// embedding-provenance record (D0).
|
|
117
|
+
let loadedModelId = null;
|
|
118
|
+
let loadedNormalize = true;
|
|
109
119
|
// Default model
|
|
110
120
|
const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
|
|
111
121
|
/**
|
|
@@ -257,6 +267,8 @@ async function initOnnxEmbedder(config = {}) {
|
|
|
257
267
|
loadedModelBytes = modelBytes;
|
|
258
268
|
loadedTokenizerJson = tokenizerJson;
|
|
259
269
|
loadedMaxLength = config.maxLength || modelConfig.maxLength || 256;
|
|
270
|
+
loadedModelId = modelId;
|
|
271
|
+
loadedNormalize = config.normalize !== false;
|
|
260
272
|
// Create embedder with config
|
|
261
273
|
const embedderConfig = new wasmModule.WasmEmbedderConfig()
|
|
262
274
|
.setMaxLength(config.maxLength || modelConfig.maxLength || 256)
|
|
@@ -301,18 +313,18 @@ async function initOnnxEmbedder(config = {}) {
|
|
|
301
313
|
await loadPromise;
|
|
302
314
|
return isInitialized;
|
|
303
315
|
}
|
|
304
|
-
|
|
305
|
-
* Generate embedding for text
|
|
306
|
-
*/
|
|
307
|
-
async function embed(text) {
|
|
316
|
+
async function embedKind(kind, text) {
|
|
308
317
|
if (!isInitialized) {
|
|
309
318
|
await initOnnxEmbedder();
|
|
310
319
|
}
|
|
311
320
|
if (!embedder) {
|
|
312
321
|
throw new Error('ONNX embedder not initialized');
|
|
313
322
|
}
|
|
323
|
+
// ADR-210 D4: apply the model's registered query/passage prefix. MiniLM has
|
|
324
|
+
// empty prefixes, so the default model's output is byte-identical to before.
|
|
325
|
+
const prepared = (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, kind, text);
|
|
314
326
|
const start = performance.now();
|
|
315
|
-
const embedding = embedder.embedOne(
|
|
327
|
+
const embedding = embedder.embedOne(prepared);
|
|
316
328
|
const timeMs = performance.now() - start;
|
|
317
329
|
return {
|
|
318
330
|
embedding: Array.from(embedding),
|
|
@@ -320,6 +332,21 @@ async function embed(text) {
|
|
|
320
332
|
timeMs,
|
|
321
333
|
};
|
|
322
334
|
}
|
|
335
|
+
/**
|
|
336
|
+
* Generate embedding for text. Equivalent to `embedPassage()` (ADR-210 D4):
|
|
337
|
+
* stored/passage text is the default; use `embedQuery()` for search queries.
|
|
338
|
+
*/
|
|
339
|
+
async function embed(text) {
|
|
340
|
+
return embedKind('passage', text);
|
|
341
|
+
}
|
|
342
|
+
/** Embed a search query, applying the model's registered query prefix (D4). */
|
|
343
|
+
async function embedQuery(text) {
|
|
344
|
+
return embedKind('query', text);
|
|
345
|
+
}
|
|
346
|
+
/** Embed a passage/document, applying the model's registered passage prefix (D4). */
|
|
347
|
+
async function embedPassage(text) {
|
|
348
|
+
return embedKind('passage', text);
|
|
349
|
+
}
|
|
323
350
|
/**
|
|
324
351
|
* Generate embeddings for multiple texts
|
|
325
352
|
* Uses parallel workers automatically for batches >= parallelThreshold
|
|
@@ -331,10 +358,12 @@ async function embedBatch(texts) {
|
|
|
331
358
|
if (!embedder) {
|
|
332
359
|
throw new Error('ONNX embedder not initialized');
|
|
333
360
|
}
|
|
361
|
+
// ADR-210 D4: batch embedding is the passage path (embed() === embedPassage()).
|
|
362
|
+
const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, 'passage', t));
|
|
334
363
|
const start = performance.now();
|
|
335
364
|
// Use parallel workers for large batches
|
|
336
|
-
if (parallelEnabled && parallelEmbedder &&
|
|
337
|
-
const batchResults = await parallelEmbedder.embedBatch(
|
|
365
|
+
if (parallelEnabled && parallelEmbedder && prepared.length >= parallelThreshold) {
|
|
366
|
+
const batchResults = await parallelEmbedder.embedBatch(prepared);
|
|
338
367
|
const totalTime = performance.now() - start;
|
|
339
368
|
const dimension = parallelEmbedder.dimension || 384;
|
|
340
369
|
return batchResults.map((emb) => ({
|
|
@@ -344,11 +373,11 @@ async function embedBatch(texts) {
|
|
|
344
373
|
}));
|
|
345
374
|
}
|
|
346
375
|
// Sequential fallback
|
|
347
|
-
const batchEmbeddings = embedder.embedBatch(
|
|
376
|
+
const batchEmbeddings = embedder.embedBatch(prepared);
|
|
348
377
|
const totalTime = performance.now() - start;
|
|
349
378
|
const dimension = embedder.dimension();
|
|
350
379
|
const results = [];
|
|
351
|
-
for (let i = 0; i <
|
|
380
|
+
for (let i = 0; i < prepared.length; i++) {
|
|
352
381
|
const embedding = batchEmbeddings.slice(i * dimension, (i + 1) * dimension);
|
|
353
382
|
results.push({
|
|
354
383
|
embedding: Array.from(embedding),
|
|
@@ -418,6 +447,26 @@ function isReady() {
|
|
|
418
447
|
function isOnnxInitialized() {
|
|
419
448
|
return isInitialized;
|
|
420
449
|
}
|
|
450
|
+
/** Model id of the loaded model, or null before init (ADR-210). */
|
|
451
|
+
function getActiveModelId() {
|
|
452
|
+
return loadedModelId;
|
|
453
|
+
}
|
|
454
|
+
/**
|
|
455
|
+
* Embedding-provenance record (ADR-210 D0) describing vectors produced by the
|
|
456
|
+
* loaded ONNX embedder, or null before the model is initialized.
|
|
457
|
+
*/
|
|
458
|
+
function getEmbedderProvenance() {
|
|
459
|
+
if (!isInitialized)
|
|
460
|
+
return null;
|
|
461
|
+
const modelId = loadedModelId ?? DEFAULT_MODEL;
|
|
462
|
+
return {
|
|
463
|
+
embedderKind: (0, embedding_provenance_1.embedderKindForModel)(modelId),
|
|
464
|
+
modelId,
|
|
465
|
+
dimension: getDimension(),
|
|
466
|
+
normalize: loadedNormalize,
|
|
467
|
+
prefixPolicy: (0, embedding_provenance_1.getModelPrefixSpec)(modelId).prefixPolicy,
|
|
468
|
+
};
|
|
469
|
+
}
|
|
421
470
|
/**
|
|
422
471
|
* Get embedder stats including SIMD and parallel capabilities
|
|
423
472
|
*/
|
|
@@ -479,12 +528,57 @@ async function initParallelEmbedder(numWorkers) {
|
|
|
479
528
|
async function embedBatchParallel(texts) {
|
|
480
529
|
if (!bundledPool)
|
|
481
530
|
await initParallelEmbedder();
|
|
482
|
-
|
|
531
|
+
// ADR-210 D4: bulk ingest is the passage path; MiniLM prefixes are empty.
|
|
532
|
+
const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(loadedModelId ?? DEFAULT_MODEL, 'passage', t));
|
|
533
|
+
return bundledPool.embedBatch(prepared);
|
|
483
534
|
}
|
|
484
535
|
/** Number of active pool workers (0 if the pool isn't started). */
|
|
485
536
|
function getParallelWorkerCount() {
|
|
486
537
|
return bundledPool ? bundledPool.numWorkers : 0;
|
|
487
538
|
}
|
|
539
|
+
/** Batches at or above this size route through the worker pool (ADR-210 D3). */
|
|
540
|
+
exports.BULK_EMBED_THRESHOLD = 32;
|
|
541
|
+
let bulkPoolFallbackWarned = false;
|
|
542
|
+
/**
|
|
543
|
+
* Default bulk-embedding path (ADR-210 D3): batches of `threshold`
|
|
544
|
+
* (default 32) or more texts route through the bundled parallel worker pool
|
|
545
|
+
* — fp32 model bytes shared across workers via SharedArrayBuffer, vectors
|
|
546
|
+
* identical to the single-thread path. Smaller batches, and any batch when
|
|
547
|
+
* pool startup fails (no worker_threads, no SharedArrayBuffer), use the
|
|
548
|
+
* single-threaded batch path with one stderr note.
|
|
549
|
+
*
|
|
550
|
+
* INT8 STATUS (honest gap, ADR-210 D3): the registered int8 variants
|
|
551
|
+
* (QUANTIZED_MODELS in onnx-optimized.ts) cannot run on the bundled WASM
|
|
552
|
+
* runtime today — its graph analyzer rejects quantized MiniLM exports
|
|
553
|
+
* ("Failed analyse for node /Unsqueeze", verified against both
|
|
554
|
+
* Xenova/all-MiniLM-L6-v2 model_quantized.onnx and the official
|
|
555
|
+
* sentence-transformers model_quint8_avx2.onnx exports). Bulk ingest
|
|
556
|
+
* therefore defaults to parallel-fp32; int8 ingest needs a Rust-side
|
|
557
|
+
* runtime upgrade in the ruvector-onnx-embeddings-wasm crate (tracked as
|
|
558
|
+
* an ADR-210 follow-up). Single-query latency keeps fp32 either way.
|
|
559
|
+
*/
|
|
560
|
+
async function embedBulk(texts, opts = {}) {
|
|
561
|
+
if (!texts || texts.length === 0)
|
|
562
|
+
return [];
|
|
563
|
+
const threshold = opts.threshold ?? exports.BULK_EMBED_THRESHOLD;
|
|
564
|
+
if (!isInitialized) {
|
|
565
|
+
await initOnnxEmbedder();
|
|
566
|
+
}
|
|
567
|
+
if (texts.length >= threshold) {
|
|
568
|
+
try {
|
|
569
|
+
return await embedBatchParallel(texts);
|
|
570
|
+
}
|
|
571
|
+
catch (e) {
|
|
572
|
+
if (!bulkPoolFallbackWarned) {
|
|
573
|
+
bulkPoolFallbackWarned = true;
|
|
574
|
+
console.error(`ruvector: parallel bulk-embed pool unavailable (${e?.message ?? e}); ` +
|
|
575
|
+
`using single-threaded batch embedding.`);
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
const results = await embedBatch(texts);
|
|
580
|
+
return results.map(r => r.embedding);
|
|
581
|
+
}
|
|
488
582
|
/** Shut down the bundled worker pool and release its threads. */
|
|
489
583
|
async function shutdownParallelEmbedder() {
|
|
490
584
|
if (bundledPool) {
|
|
@@ -500,10 +594,21 @@ class OnnxEmbedder {
|
|
|
500
594
|
async init() {
|
|
501
595
|
return initOnnxEmbedder(this.config);
|
|
502
596
|
}
|
|
597
|
+
/** Equivalent to embedPassage() — ADR-210 D4. */
|
|
503
598
|
async embed(text) {
|
|
504
599
|
const result = await embed(text);
|
|
505
600
|
return result.embedding;
|
|
506
601
|
}
|
|
602
|
+
/** Embed a search query with the model's registered query prefix (D4). */
|
|
603
|
+
async embedQuery(text) {
|
|
604
|
+
const result = await embedQuery(text);
|
|
605
|
+
return result.embedding;
|
|
606
|
+
}
|
|
607
|
+
/** Embed a passage/document with the model's registered passage prefix (D4). */
|
|
608
|
+
async embedPassage(text) {
|
|
609
|
+
const result = await embedPassage(text);
|
|
610
|
+
return result.embedding;
|
|
611
|
+
}
|
|
507
612
|
async embedBatch(texts) {
|
|
508
613
|
const results = await embedBatch(texts);
|
|
509
614
|
return results.map(r => r.embedding);
|
|
@@ -52,9 +52,16 @@ export declare class OptimizedOnnxEmbedder {
|
|
|
52
52
|
init(): Promise<void>;
|
|
53
53
|
private doInit;
|
|
54
54
|
/**
|
|
55
|
-
* Embed a single text with caching
|
|
55
|
+
* Embed a single text with caching.
|
|
56
|
+
* Equivalent to `embedPassage()` — ADR-210 D4 (plain embed = passage path).
|
|
56
57
|
*/
|
|
57
58
|
embed(text: string): Promise<Float32Array>;
|
|
59
|
+
/** Embed a search query with the model's registered query prefix (D4). */
|
|
60
|
+
embedQuery(text: string): Promise<Float32Array>;
|
|
61
|
+
/** Embed a passage/document with the model's registered passage prefix (D4). */
|
|
62
|
+
embedPassage(text: string): Promise<Float32Array>;
|
|
63
|
+
private embedKind;
|
|
64
|
+
private embedRaw;
|
|
58
65
|
/**
|
|
59
66
|
* Embed multiple texts with batching and caching
|
|
60
67
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;
|
|
1
|
+
{"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAeH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0ID,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;IAkEpB;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAIhD,0EAA0E;IACpE,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAIrD,gFAAgF;IAC1E,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;YAIzC,SAAS;YAMT,QAAQ;IAiCtB;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAsD1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAmB1D;;OAEG;IACH,aAAa,IAAI;QACf,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB;IASD;;OAEG;IACH,UAAU,IAAI,IAAI;IAKlB;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,mBAAmB,CAAC;CAG3C;AAQD,wBAAgB,wBAAwB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,qBAAqB,CAK5F;AAED,wBAAsB,iBAAiB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAIpG;AAED,eAAe,qBAAqB,CAAC"}
|
|
@@ -55,6 +55,7 @@ exports.initOptimizedOnnx = initOptimizedOnnx;
|
|
|
55
55
|
const path = __importStar(require("path"));
|
|
56
56
|
const fs = __importStar(require("fs"));
|
|
57
57
|
const url_1 = require("url");
|
|
58
|
+
const embedding_provenance_1 = require("./embedding-provenance");
|
|
58
59
|
// Force native dynamic import
|
|
59
60
|
// eslint-disable-next-line @typescript-eslint/no-implied-eval
|
|
60
61
|
const dynamicImport = new Function('specifier', 'return import(specifier)');
|
|
@@ -70,6 +71,9 @@ const QUANTIZED_MODELS = {
|
|
|
70
71
|
tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
|
|
71
72
|
dimension: 384,
|
|
72
73
|
maxLength: 256,
|
|
74
|
+
prefixPolicy: 'none',
|
|
75
|
+
queryPrefix: '',
|
|
76
|
+
passagePrefix: '',
|
|
73
77
|
},
|
|
74
78
|
'bge-small-en-v1.5': {
|
|
75
79
|
onnx: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/onnx/model.onnx',
|
|
@@ -78,6 +82,11 @@ const QUANTIZED_MODELS = {
|
|
|
78
82
|
tokenizer: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.json',
|
|
79
83
|
dimension: 384,
|
|
80
84
|
maxLength: 512,
|
|
85
|
+
// Query instruction recommended for short-query → long-passage retrieval;
|
|
86
|
+
// passages need no instruction (model card).
|
|
87
|
+
prefixPolicy: 'query-recommended',
|
|
88
|
+
queryPrefix: 'Represent this sentence for searching relevant passages: ',
|
|
89
|
+
passagePrefix: '',
|
|
81
90
|
},
|
|
82
91
|
'e5-small-v2': {
|
|
83
92
|
onnx: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/onnx/model.onnx',
|
|
@@ -85,6 +94,10 @@ const QUANTIZED_MODELS = {
|
|
|
85
94
|
tokenizer: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/tokenizer.json',
|
|
86
95
|
dimension: 384,
|
|
87
96
|
maxLength: 512,
|
|
97
|
+
// The model card states quality degrades without these prefixes.
|
|
98
|
+
prefixPolicy: 'required',
|
|
99
|
+
queryPrefix: 'query: ',
|
|
100
|
+
passagePrefix: 'passage: ',
|
|
88
101
|
},
|
|
89
102
|
};
|
|
90
103
|
// ============================================================================
|
|
@@ -219,7 +232,10 @@ class OptimizedOnnxEmbedder {
|
|
|
219
232
|
// log a quantization (FP16/INT8) that is not actually applied. When the
|
|
220
233
|
// loader gains variant support, thread the selected variant through to
|
|
221
234
|
// loadModel() here instead of computing an unused URL.
|
|
222
|
-
|
|
235
|
+
// Own-property lookup only ('__proto__'-style ids must miss, ADR-210).
|
|
236
|
+
const modelInfo = Object.prototype.hasOwnProperty.call(QUANTIZED_MODELS, this.config.modelId)
|
|
237
|
+
? QUANTIZED_MODELS[this.config.modelId]
|
|
238
|
+
: undefined;
|
|
223
239
|
if (modelInfo) {
|
|
224
240
|
this.dimension = modelInfo.dimension;
|
|
225
241
|
}
|
|
@@ -246,9 +262,26 @@ class OptimizedOnnxEmbedder {
|
|
|
246
262
|
}
|
|
247
263
|
}
|
|
248
264
|
/**
|
|
249
|
-
* Embed a single text with caching
|
|
265
|
+
* Embed a single text with caching.
|
|
266
|
+
* Equivalent to `embedPassage()` — ADR-210 D4 (plain embed = passage path).
|
|
250
267
|
*/
|
|
251
268
|
async embed(text) {
|
|
269
|
+
return this.embedKind('passage', text);
|
|
270
|
+
}
|
|
271
|
+
/** Embed a search query with the model's registered query prefix (D4). */
|
|
272
|
+
async embedQuery(text) {
|
|
273
|
+
return this.embedKind('query', text);
|
|
274
|
+
}
|
|
275
|
+
/** Embed a passage/document with the model's registered passage prefix (D4). */
|
|
276
|
+
async embedPassage(text) {
|
|
277
|
+
return this.embedKind('passage', text);
|
|
278
|
+
}
|
|
279
|
+
async embedKind(kind, text) {
|
|
280
|
+
// ADR-210 D4: prefix before tokenization (and before the cache key, so
|
|
281
|
+
// query and passage embeds of the same text never collide for E5/BGE).
|
|
282
|
+
return this.embedRaw((0, embedding_provenance_1.prefixText)(this.config.modelId, kind, text));
|
|
283
|
+
}
|
|
284
|
+
async embedRaw(text) {
|
|
252
285
|
if (this.config.lazyInit && !this.initialized) {
|
|
253
286
|
await this.init();
|
|
254
287
|
}
|
|
@@ -284,17 +317,19 @@ class OptimizedOnnxEmbedder {
|
|
|
284
317
|
if (!this.embedder) {
|
|
285
318
|
throw new Error('Embedder not initialized');
|
|
286
319
|
}
|
|
287
|
-
|
|
320
|
+
// ADR-210 D4: batch embedding is the passage path (embed() === embedPassage()).
|
|
321
|
+
const prepared = texts.map(t => (0, embedding_provenance_1.prefixText)(this.config.modelId, 'passage', t));
|
|
322
|
+
const results = new Array(prepared.length);
|
|
288
323
|
const uncached = [];
|
|
289
324
|
// Check cache first
|
|
290
|
-
for (let i = 0; i <
|
|
291
|
-
const cacheKey = hashString(
|
|
325
|
+
for (let i = 0; i < prepared.length; i++) {
|
|
326
|
+
const cacheKey = hashString(prepared[i]);
|
|
292
327
|
const cached = this.embeddingCache.get(cacheKey);
|
|
293
328
|
if (cached) {
|
|
294
329
|
results[i] = cached;
|
|
295
330
|
}
|
|
296
331
|
else {
|
|
297
|
-
uncached.push({ index: i, text:
|
|
332
|
+
uncached.push({ index: i, text: prepared[i] });
|
|
298
333
|
}
|
|
299
334
|
}
|
|
300
335
|
// If all cached, return immediately
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ruvector",
|
|
3
|
-
"version": "0.2.
|
|
4
|
-
"description": "Self-learning vector database for Node.js
|
|
3
|
+
"version": "0.2.31",
|
|
4
|
+
"description": "Self-learning vector database for Node.js \u2014 hybrid search, Graph RAG, FlashAttention-3, HNSW, 50+ attention mechanisms",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
7
|
"bin": {
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
"verify-dist": "node scripts/verify-dist.js",
|
|
13
13
|
"prepack": "npm run build && npm run verify-dist",
|
|
14
14
|
"prepublishOnly": "npm run build && npm run verify-dist",
|
|
15
|
-
"test": "node test/integration.js && node test/cli-commands.js && node test/sigterm-cleanup.js"
|
|
15
|
+
"test": "node test/integration.js && node test/cli-commands.js && node test/db-workflow.js && node test/sigterm-cleanup.js"
|
|
16
16
|
},
|
|
17
17
|
"keywords": [
|
|
18
18
|
"vector",
|
|
@@ -82,7 +82,8 @@
|
|
|
82
82
|
"ora": "^5.4.1"
|
|
83
83
|
},
|
|
84
84
|
"optionalDependencies": {
|
|
85
|
-
"@ruvector/rvf": "^0.1.0"
|
|
85
|
+
"@ruvector/rvf": "^0.1.0",
|
|
86
|
+
"@ruvector/tiny-dancer": "^0.1.22"
|
|
86
87
|
},
|
|
87
88
|
"devDependencies": {
|
|
88
89
|
"@types/node": "^20.10.5",
|