ruvector 0.2.26 → 0.2.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/adaptive-embedder.d.ts +9 -0
- package/dist/core/adaptive-embedder.d.ts.map +1 -1
- package/dist/core/adaptive-embedder.js +11 -0
- package/dist/core/onnx/bundled-parallel.mjs +164 -0
- package/dist/core/onnx/embed-worker.mjs +67 -0
- package/dist/core/onnx/loader.js +87 -1
- package/dist/core/onnx/package.json +3 -0
- package/dist/core/onnx/pkg/ruvector.db +0 -0
- package/dist/core/onnx-embedder.d.ts +39 -2
- package/dist/core/onnx-embedder.d.ts.map +1 -1
- package/dist/core/onnx-embedder.js +123 -8
- package/dist/core/onnx-optimized.d.ts.map +1 -1
- package/dist/core/onnx-optimized.js +6 -24
- package/package.json +2 -2
|
@@ -81,6 +81,15 @@ export declare class AdaptiveEmbedder {
|
|
|
81
81
|
* Initialize ONNX backend
|
|
82
82
|
*/
|
|
83
83
|
init(): Promise<void>;
|
|
84
|
+
/**
|
|
85
|
+
* Whether the ONNX backend is initialized and producing real (non-hash)
|
|
86
|
+
* embeddings. Returns `false` until `init()` has successfully loaded the
|
|
87
|
+
* ONNX model. Note that `embed()` still works when this is `false` — it
|
|
88
|
+
* falls back to a deterministic hash embedding — so callers that require
|
|
89
|
+
* real semantic embeddings should check this after `init()`.
|
|
90
|
+
* See https://github.com/ruvnet/RuVector/issues/523.
|
|
91
|
+
*/
|
|
92
|
+
isReady(): boolean;
|
|
84
93
|
/**
|
|
85
94
|
* Generate adaptive embedding
|
|
86
95
|
* Pipeline: ONNX → LoRA → Prototype Adjustment → Episodic Augmentation
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"adaptive-embedder.d.ts","sourceRoot":"","sources":["../../src/core/adaptive-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,MAAM,WAAW,cAAc;IAC7B,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kCAAkC;IAClC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gDAAgD;IAChD,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,uCAAuC;IACvC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,6CAA6C;IAC7C,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AA+pBD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,IAAI,CAAY;IACxB,OAAO,CAAC,UAAU,CAAkB;IACpC,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,SAAS,CAAkB;IACnC,OAAO,CAAC,SAAS,CAAe;IAGhC,OAAO,CAAC,eAAe,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,gBAAgB,CAAa;IAGrC,OAAO,CAAC,YAAY,CAA+E;gBAEvF,MAAM,GAAE,cAAmB;IAiBvC;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAClC,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAmCrB;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE;QAC1C,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAsBvB;;;OAGG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAkBpG;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA+B1B;;OAEG;IACG,gBAAgB,CACpB,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,MAAY,GACpB,OAAO,CAAC,IAAI,CAAC;IAiBhB;;;OAGG;IACG,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAmBlC;;OAEG;IACH,OAAO,CAAC,SAAS;IAoBjB,OAAO,CAAC,SAAS;IAKjB;;OAEG;IACH,QAAQ,IAAI,aAAa;IAczB;;OAEG;IACH,MAAM,IAAI;QACR,IAAI,EAAE,WAAW,CAAC;QAClB,UAAU,EAAE,eAAe,EAAE,CAAC;QAC9B,KAAK,EAAE,aAAa,CAAC;KACtB;IAQD;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,EAAE,WAAW,CAAC;QAAC,UAAU,CAAC,EAAE,eAAe,EAAE,CAAA;KAAE,GAAG,IAAI;IAS1E;;OAEG;IACH,KAAK,IAAI,IAAI;IAUb;;OAEG;IACH,aAAa,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE;CAGnD;AAQD,wBAAgB,mBAAmB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,gBAAgB,CAK7E;AAED,wBAAsB,oBAAoB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAI7F;AAED,eAAe,gBAAgB,CAAC"}
|
|
1
|
+
{"version":3,"file":"adaptive-embedder.d.ts","sourceRoot":"","sources":["../../src/core/adaptive-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,MAAM,WAAW,cAAc;IAC7B,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kCAAkC;IAClC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gDAAgD;IAChD,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,uCAAuC;IACvC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,6CAA6C;IAC7C,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AA+pBD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,IAAI,CAAY;IACxB,OAAO,CAAC,UAAU,CAAkB;IACpC,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,SAAS,CAAkB;IACnC,OAAO,CAAC,SAAS,CAAe;IAGhC,OAAO,CAAC,eAAe,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,gBAAgB,CAAa;IAGrC,OAAO,CAAC,YAAY,CAA+E;gBAEvF,MAAM,GAAE,cAAmB;IAiBvC;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B;;;;;;;OAOG;IACH,OAAO,IAAI,OAAO;IAIlB;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAClC,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAmCrB;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE;QAC1C,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAsBvB;;;OAGG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAkBpG;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA+B1B;;OAEG;IACG,gBAAgB,CACpB,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,MAAY,GACpB,OAAO,CAAC,IAAI,CAAC;IAiBhB;;;OAGG;IACG,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAmBlC;;OAEG;IACH,OAAO,CAAC,SAAS;IAoBjB,OAAO,CAAC,SAAS;IAKjB;;OAEG;IACH,QAAQ,IAAI,aAAa;IAczB;;OAEG;IACH,MAAM,IAAI;QACR,IAAI,EAAE,WAAW,CAAC;QAClB,UAAU,EAAE,eAAe,EAAE,CAAC;QAC9B,KAAK,EAAE,aAAa,CAAC;KACtB;IAQD;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,EAAE,WAAW,CAAC;QAAC,UAAU,CAAC,EAAE,eAAe,EAAE,CAAA;KAAE,GAAG,IAAI;IAS1E;;OAEG;IACH,KAAK,IAAI,IAAI;IAUb;;OAEG;IACH,aAAa,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE;CAGnD;AAQD,wBAAgB,mBAAmB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,gBAAgB,CAK7E;AAED,wBAAsB,oBAAoB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAI7F;AAED,eAAe,gBAAgB,CAAC"}
|
|
@@ -611,6 +611,17 @@ class AdaptiveEmbedder {
|
|
|
611
611
|
this.onnxReady = true;
|
|
612
612
|
}
|
|
613
613
|
}
|
|
614
|
+
/**
|
|
615
|
+
* Whether the ONNX backend is initialized and producing real (non-hash)
|
|
616
|
+
* embeddings. Returns `false` until `init()` has successfully loaded the
|
|
617
|
+
* ONNX model. Note that `embed()` still works when this is `false` — it
|
|
618
|
+
* falls back to a deterministic hash embedding — so callers that require
|
|
619
|
+
* real semantic embeddings should check this after `init()`.
|
|
620
|
+
* See https://github.com/ruvnet/RuVector/issues/523.
|
|
621
|
+
*/
|
|
622
|
+
isReady() {
|
|
623
|
+
return this.onnxReady;
|
|
624
|
+
}
|
|
614
625
|
/**
|
|
615
626
|
* Generate adaptive embedding
|
|
616
627
|
* Pipeline: ONNX → LoRA → Prototype Adjustment → Episodic Augmentation
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bundled-WASM parallel embedder (issue #523 SOTA).
|
|
3
|
+
*
|
|
4
|
+
* A self-contained worker_threads pool — NO external dependency — that shards
|
|
5
|
+
* batches of text across CPU cores, each worker running the bundled ONNX WASM
|
|
6
|
+
* embedder over the SAME model bytes (shared via SharedArrayBuffer) and config.
|
|
7
|
+
* Output vectors are identical to the single-thread path (cosine-equivalent),
|
|
8
|
+
* so this is a pure throughput optimization with no quality change.
|
|
9
|
+
*
|
|
10
|
+
* Drop-in shape compatible with the optional `ruvector-onnx-embeddings-wasm/parallel`
|
|
11
|
+
* package: { numWorkers, dimension, init(), embedBatch(texts) -> number[][], shutdown() }.
|
|
12
|
+
*/
|
|
13
|
+
import { Worker } from 'node:worker_threads';
|
|
14
|
+
import * as os from 'node:os';
|
|
15
|
+
import { fileURLToPath } from 'node:url';
|
|
16
|
+
import * as path from 'node:path';
|
|
17
|
+
|
|
18
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
19
|
+
|
|
20
|
+
export class ParallelEmbedder {
|
|
21
|
+
/**
|
|
22
|
+
* @param {object} opts
|
|
23
|
+
* @param {Uint8Array} opts.modelBytes raw ONNX model bytes (loaded once by caller)
|
|
24
|
+
* @param {string} opts.tokenizerJson
|
|
25
|
+
* @param {number} [opts.maxLength=256]
|
|
26
|
+
* @param {number} [opts.dimension=384]
|
|
27
|
+
* @param {number} [opts.numWorkers] defaults to min(cpus-2, 16), >=2
|
|
28
|
+
*/
|
|
29
|
+
constructor(opts = {}) {
|
|
30
|
+
this.numWorkers = opts.numWorkers || Math.max(2, Math.min((os.cpus().length || 4) - 2, 16));
|
|
31
|
+
this.dimension = opts.dimension || 384;
|
|
32
|
+
this._modelBytes = opts.modelBytes;
|
|
33
|
+
this._tokenizerJson = opts.tokenizerJson;
|
|
34
|
+
this._maxLength = opts.maxLength || 256;
|
|
35
|
+
this._requestTimeoutMs = opts.requestTimeoutMs ?? 30000;
|
|
36
|
+
this._workers = [];
|
|
37
|
+
this._pending = new Map(); // id -> { resolve, reject, worker, timer }
|
|
38
|
+
this._seq = 0;
|
|
39
|
+
this._shuttingDown = false;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
async init() {
|
|
43
|
+
if (!this._modelBytes || !this._tokenizerJson) {
|
|
44
|
+
throw new Error('ParallelEmbedder requires modelBytes and tokenizerJson');
|
|
45
|
+
}
|
|
46
|
+
// Share model bytes across all workers via a single SharedArrayBuffer.
|
|
47
|
+
const sab = new SharedArrayBuffer(this._modelBytes.length);
|
|
48
|
+
new Uint8Array(sab).set(this._modelBytes);
|
|
49
|
+
|
|
50
|
+
const workerUrl = new URL('./embed-worker.mjs', import.meta.url);
|
|
51
|
+
const readies = [];
|
|
52
|
+
|
|
53
|
+
for (let i = 0; i < this.numWorkers; i++) {
|
|
54
|
+
const w = new Worker(workerUrl, {
|
|
55
|
+
workerData: { modelSab: sab, tokenizerJson: this._tokenizerJson, maxLength: this._maxLength },
|
|
56
|
+
});
|
|
57
|
+
w.on('message', (m) => this._onMessage(m));
|
|
58
|
+
// If a worker dies (uncaught error or unexpected exit), fail every request
|
|
59
|
+
// currently routed to it instead of letting those promises hang forever.
|
|
60
|
+
w.on('error', (e) => this._failWorker(w, e instanceof Error ? e : new Error(String(e))));
|
|
61
|
+
w.on('exit', (code) => {
|
|
62
|
+
if (!this._shuttingDown && code !== 0) {
|
|
63
|
+
this._failWorker(w, new Error(`embed worker exited unexpectedly (code ${code})`));
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
this._workers.push(w);
|
|
67
|
+
readies.push(new Promise((resolve, reject) => {
|
|
68
|
+
const onReady = (m) => {
|
|
69
|
+
if (m.type === 'ready') { cleanup(); resolve(); }
|
|
70
|
+
else if (m.type === 'init-error') { cleanup(); reject(new Error('worker init failed: ' + m.error)); }
|
|
71
|
+
};
|
|
72
|
+
const onErr = (e) => { cleanup(); reject(e); };
|
|
73
|
+
const cleanup = () => { w.off('message', onReady); w.off('error', onErr); };
|
|
74
|
+
w.on('message', onReady);
|
|
75
|
+
w.once('error', onErr);
|
|
76
|
+
}));
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
await Promise.all(readies);
|
|
80
|
+
// Drop the main-thread reference; the SAB keeps the shared copy alive.
|
|
81
|
+
this._modelBytes = null;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
_settle(id, fn) {
|
|
85
|
+
const p = this._pending.get(id);
|
|
86
|
+
if (!p) return;
|
|
87
|
+
this._pending.delete(id);
|
|
88
|
+
if (p.timer) clearTimeout(p.timer);
|
|
89
|
+
fn(p);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
_onMessage(m) {
|
|
93
|
+
if (m.type !== 'result' && m.type !== 'error') return;
|
|
94
|
+
this._settle(m.id, (p) => {
|
|
95
|
+
if (m.type === 'error') p.reject(new Error(m.error));
|
|
96
|
+
else p.resolve({ dim: m.dim, count: m.count, flat: new Float32Array(m.buffer) });
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Reject every in-flight request routed to a dead worker. */
|
|
101
|
+
_failWorker(worker, err) {
|
|
102
|
+
for (const [id, p] of this._pending) {
|
|
103
|
+
if (p.worker === worker) this._settle(id, () => p.reject(err));
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
_send(worker, texts) {
|
|
108
|
+
const id = ++this._seq;
|
|
109
|
+
return new Promise((resolve, reject) => {
|
|
110
|
+
const entry = { resolve, reject, worker, timer: null };
|
|
111
|
+
if (this._requestTimeoutMs > 0) {
|
|
112
|
+
entry.timer = setTimeout(() => {
|
|
113
|
+
this._settle(id, () =>
|
|
114
|
+
reject(new Error(`embed request timed out after ${this._requestTimeoutMs}ms`)));
|
|
115
|
+
}, this._requestTimeoutMs);
|
|
116
|
+
// Don't keep the event loop alive solely for this timer.
|
|
117
|
+
if (typeof entry.timer.unref === 'function') entry.timer.unref();
|
|
118
|
+
}
|
|
119
|
+
this._pending.set(id, entry);
|
|
120
|
+
worker.postMessage({ type: 'embed', id, texts });
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Embed many texts, sharded across workers. Returns number[][] in input order.
|
|
126
|
+
*/
|
|
127
|
+
async embedBatch(texts) {
|
|
128
|
+
if (!texts || texts.length === 0) return [];
|
|
129
|
+
const n = this._workers.length;
|
|
130
|
+
const shard = Math.ceil(texts.length / n);
|
|
131
|
+
const tasks = [];
|
|
132
|
+
const starts = [];
|
|
133
|
+
for (let i = 0; i < n; i++) {
|
|
134
|
+
const start = i * shard;
|
|
135
|
+
if (start >= texts.length) break;
|
|
136
|
+
const end = Math.min(texts.length, start + shard);
|
|
137
|
+
starts.push(start);
|
|
138
|
+
tasks.push(this._send(this._workers[i], texts.slice(start, end)));
|
|
139
|
+
}
|
|
140
|
+
const results = await Promise.all(tasks);
|
|
141
|
+
const out = new Array(texts.length);
|
|
142
|
+
for (let r = 0; r < results.length; r++) {
|
|
143
|
+
const { dim, count, flat } = results[r];
|
|
144
|
+
const start = starts[r];
|
|
145
|
+
for (let j = 0; j < count; j++) {
|
|
146
|
+
out[start + j] = Array.from(flat.subarray(j * dim, (j + 1) * dim));
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return out;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
async shutdown() {
|
|
153
|
+
this._shuttingDown = true;
|
|
154
|
+
// Reject anything still in flight so callers don't hang on shutdown.
|
|
155
|
+
for (const [id, p] of this._pending) {
|
|
156
|
+
this._settle(id, () => p.reject(new Error('ParallelEmbedder shut down')));
|
|
157
|
+
}
|
|
158
|
+
const ws = this._workers;
|
|
159
|
+
this._workers = [];
|
|
160
|
+
await Promise.all(ws.map((w) => w.terminate()));
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export default ParallelEmbedder;
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Worker-thread entry for the bundled-WASM parallel embedder (issue #523 SOTA).
|
|
3
|
+
*
|
|
4
|
+
* Each worker loads its own instance of the bundled ONNX WASM embedder from the
|
|
5
|
+
* SAME model bytes (shared via SharedArrayBuffer — no per-worker download) and
|
|
6
|
+
* the SAME config, so the vectors it produces are identical to the single-thread
|
|
7
|
+
* path (cosine-equivalent by construction).
|
|
8
|
+
*
|
|
9
|
+
* Protocol:
|
|
10
|
+
* workerData: { modelSab: SharedArrayBuffer, tokenizerJson: string, maxLength: number }
|
|
11
|
+
* → posts { type: 'ready' } once the WASM embedder is constructed
|
|
12
|
+
* message { type: 'embed', id, texts: string[] }
|
|
13
|
+
* → posts { type: 'result', id, dim, count, buffer } (Float32Array buffer, transferred)
|
|
14
|
+
* errors → { type: 'error', id, error }
|
|
15
|
+
*/
|
|
16
|
+
import { parentPort, workerData } from 'node:worker_threads';
|
|
17
|
+
import { pathToFileURL, fileURLToPath } from 'node:url';
|
|
18
|
+
import * as path from 'node:path';
|
|
19
|
+
import * as fs from 'node:fs';
|
|
20
|
+
|
|
21
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
22
|
+
|
|
23
|
+
let embedder = null;
|
|
24
|
+
|
|
25
|
+
async function init() {
|
|
26
|
+
const bgJsPath = path.join(__dirname, 'pkg', 'ruvector_onnx_embeddings_wasm_bg.js');
|
|
27
|
+
const wasmPath = path.join(__dirname, 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
|
|
28
|
+
|
|
29
|
+
const wasmModule = await import(pathToFileURL(bgJsPath).href);
|
|
30
|
+
const wasmBytes = fs.readFileSync(wasmPath);
|
|
31
|
+
const wasmResult = await WebAssembly.instantiate(wasmBytes, {
|
|
32
|
+
'./ruvector_onnx_embeddings_wasm_bg.js': wasmModule,
|
|
33
|
+
});
|
|
34
|
+
const wasmExports = wasmResult.instance.exports;
|
|
35
|
+
if (typeof wasmModule.__wbg_set_wasm === 'function') wasmModule.__wbg_set_wasm(wasmExports);
|
|
36
|
+
if (typeof wasmExports.__wbindgen_start === 'function') wasmExports.__wbindgen_start();
|
|
37
|
+
|
|
38
|
+
// Reconstruct model bytes from the shared buffer (zero-copy view, then handed
|
|
39
|
+
// to wasm-bindgen which copies into WASM linear memory).
|
|
40
|
+
const modelBytes = new Uint8Array(workerData.modelSab);
|
|
41
|
+
|
|
42
|
+
const cfg = new wasmModule.WasmEmbedderConfig()
|
|
43
|
+
.setMaxLength(workerData.maxLength || 256)
|
|
44
|
+
.setNormalize(true)
|
|
45
|
+
.setPooling(0); // Mean pooling — matches the single-thread path.
|
|
46
|
+
|
|
47
|
+
embedder = wasmModule.WasmEmbedder.withConfig(modelBytes, workerData.tokenizerJson, cfg);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
parentPort.on('message', (msg) => {
|
|
51
|
+
if (msg.type !== 'embed') return;
|
|
52
|
+
try {
|
|
53
|
+
const dim = embedder.dimension();
|
|
54
|
+
const flat = embedder.embedBatch(msg.texts); // length = texts.length * dim
|
|
55
|
+
const arr = Float32Array.from(flat);
|
|
56
|
+
parentPort.postMessage(
|
|
57
|
+
{ type: 'result', id: msg.id, dim, count: msg.texts.length, buffer: arr.buffer },
|
|
58
|
+
[arr.buffer],
|
|
59
|
+
);
|
|
60
|
+
} catch (e) {
|
|
61
|
+
parentPort.postMessage({ type: 'error', id: msg.id, error: e?.message || String(e) });
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
init()
|
|
66
|
+
.then(() => parentPort.postMessage({ type: 'ready' }))
|
|
67
|
+
.catch((e) => parentPort.postMessage({ type: 'init-error', error: e?.message || String(e) }));
|
package/dist/core/onnx/loader.js
CHANGED
|
@@ -76,6 +76,14 @@ export const MODELS = {
|
|
|
76
76
|
*/
|
|
77
77
|
export const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
|
|
78
78
|
|
|
79
|
+
/**
|
|
80
|
+
* In-memory memo of loaded models, keyed by model name. Deduplicates the
|
|
81
|
+
* (re-)download + decode when multiple embedder instances load the same model
|
|
82
|
+
* in one process. In Node there is no Cache API, so without this every
|
|
83
|
+
* ModelLoader.loadModel() re-fetches the model from HuggingFace (issue #523).
|
|
84
|
+
*/
|
|
85
|
+
const _inMemoryModelCache = new Map();
|
|
86
|
+
|
|
79
87
|
/**
|
|
80
88
|
* Model loader with caching support
|
|
81
89
|
*/
|
|
@@ -97,6 +105,24 @@ export class ModelLoader {
|
|
|
97
105
|
throw new Error(`Unknown model: ${modelName}. Available: ${Object.keys(MODELS).join(', ')}`);
|
|
98
106
|
}
|
|
99
107
|
|
|
108
|
+
// In-memory memo: a second load of the same model in this process reuses
|
|
109
|
+
// the already-downloaded bytes instead of re-fetching (issue #523).
|
|
110
|
+
if (this.cache && _inMemoryModelCache.has(modelName)) {
|
|
111
|
+
return _inMemoryModelCache.get(modelName);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// On-disk cache (Node only): models persist across processes so they are
|
|
115
|
+
// downloaded once, not every run. The browser has the Cache API instead
|
|
116
|
+
// (handled in fetchWithCache). See issue #523.
|
|
117
|
+
if (this.cache) {
|
|
118
|
+
const disk = await this._loadFromDisk(modelName);
|
|
119
|
+
if (disk) {
|
|
120
|
+
const cached = { ...disk, config: modelConfig };
|
|
121
|
+
_inMemoryModelCache.set(modelName, cached);
|
|
122
|
+
return cached;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
100
126
|
console.log(`Loading model: ${modelConfig.name} (${modelConfig.size})`);
|
|
101
127
|
|
|
102
128
|
const [modelBytes, tokenizerJson] = await Promise.all([
|
|
@@ -104,11 +130,71 @@ export class ModelLoader {
|
|
|
104
130
|
this.fetchWithCache(modelConfig.tokenizer, `${modelName}-tokenizer.json`, 'text'),
|
|
105
131
|
]);
|
|
106
132
|
|
|
107
|
-
|
|
133
|
+
const result = {
|
|
108
134
|
modelBytes: new Uint8Array(modelBytes),
|
|
109
135
|
tokenizerJson,
|
|
110
136
|
config: modelConfig,
|
|
111
137
|
};
|
|
138
|
+
|
|
139
|
+
if (this.cache) {
|
|
140
|
+
_inMemoryModelCache.set(modelName, result);
|
|
141
|
+
await this._saveToDisk(modelName, result.modelBytes, tokenizerJson);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
return result;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Resolve the Node on-disk cache dir for a model (null in non-Node envs).
|
|
149
|
+
* Uses dynamic import so this module stays loadable in browsers/bundlers.
|
|
150
|
+
*/
|
|
151
|
+
async _diskCacheDir(modelName) {
|
|
152
|
+
if (typeof process === 'undefined' || !process.versions?.node) return null;
|
|
153
|
+
const home = process.env.RUVECTOR_CACHE_DIR
|
|
154
|
+
|| process.env.HOME || process.env.USERPROFILE || '/tmp';
|
|
155
|
+
const path = await import('node:path');
|
|
156
|
+
return path.join(home, '.ruvector', 'models', modelName);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/** Load model bytes + tokenizer from the Node disk cache, or null if absent. */
|
|
160
|
+
async _loadFromDisk(modelName) {
|
|
161
|
+
const dir = await this._diskCacheDir(modelName);
|
|
162
|
+
if (!dir) return null;
|
|
163
|
+
try {
|
|
164
|
+
const fs = await import('node:fs');
|
|
165
|
+
const path = await import('node:path');
|
|
166
|
+
const modelPath = path.join(dir, 'model.onnx');
|
|
167
|
+
const tokPath = path.join(dir, 'tokenizer.json');
|
|
168
|
+
if (!fs.existsSync(modelPath) || !fs.existsSync(tokPath)) return null;
|
|
169
|
+
const modelBytes = new Uint8Array(fs.readFileSync(modelPath));
|
|
170
|
+
const tokenizerJson = fs.readFileSync(tokPath, 'utf8');
|
|
171
|
+
if (modelBytes.length === 0 || tokenizerJson.length === 0) return null;
|
|
172
|
+
console.log(` Disk cache hit: ${modelName}`);
|
|
173
|
+
return { modelBytes, tokenizerJson };
|
|
174
|
+
} catch {
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/** Persist model bytes + tokenizer to the Node disk cache (best-effort). */
|
|
180
|
+
async _saveToDisk(modelName, modelBytes, tokenizerJson) {
|
|
181
|
+
const dir = await this._diskCacheDir(modelName);
|
|
182
|
+
if (!dir) return;
|
|
183
|
+
try {
|
|
184
|
+
const fs = await import('node:fs');
|
|
185
|
+
const path = await import('node:path');
|
|
186
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
187
|
+
// Write to temp files then rename, so a crash mid-write can't leave a
|
|
188
|
+
// truncated cache entry that later reads would trust.
|
|
189
|
+
const mTmp = path.join(dir, 'model.onnx.tmp');
|
|
190
|
+
const tTmp = path.join(dir, 'tokenizer.json.tmp');
|
|
191
|
+
fs.writeFileSync(mTmp, Buffer.from(modelBytes));
|
|
192
|
+
fs.writeFileSync(tTmp, tokenizerJson);
|
|
193
|
+
fs.renameSync(mTmp, path.join(dir, 'model.onnx'));
|
|
194
|
+
fs.renameSync(tTmp, path.join(dir, 'tokenizer.json'));
|
|
195
|
+
} catch {
|
|
196
|
+
// Cache write is best-effort; embedding still works without it.
|
|
197
|
+
}
|
|
112
198
|
}
|
|
113
199
|
|
|
114
200
|
/**
|
|
Binary file
|
|
@@ -43,7 +43,13 @@ export interface SimilarityResult {
|
|
|
43
43
|
timeMs: number;
|
|
44
44
|
}
|
|
45
45
|
/**
|
|
46
|
-
* Check if ONNX embedder is available
|
|
46
|
+
* Check if the ONNX embedder is *available* — i.e. the bundled WASM files are
|
|
47
|
+
* present and the embedder can be initialized.
|
|
48
|
+
*
|
|
49
|
+
* NOTE: This is a capability check, NOT a readiness check. It returns `true`
|
|
50
|
+
* before `initOnnxEmbedder()` has run (so callers can decide whether to init).
|
|
51
|
+
* To check whether the model has actually been loaded, use `isOnnxInitialized()`
|
|
52
|
+
* or `isReady()`. See https://github.com/ruvnet/RuVector/issues/523.
|
|
47
53
|
*/
|
|
48
54
|
export declare function isOnnxAvailable(): boolean;
|
|
49
55
|
/**
|
|
@@ -72,9 +78,22 @@ export declare function cosineSimilarity(a: number[], b: number[]): number;
|
|
|
72
78
|
*/
|
|
73
79
|
export declare function getDimension(): number;
|
|
74
80
|
/**
|
|
75
|
-
* Check if embedder is ready
|
|
81
|
+
* Check if the embedder has been initialized (model loaded) and is ready to
|
|
82
|
+
* embed. Returns `false` until `initOnnxEmbedder()` (or the first `embed()`,
|
|
83
|
+
* which auto-initializes) has completed successfully.
|
|
76
84
|
*/
|
|
77
85
|
export declare function isReady(): boolean;
|
|
86
|
+
/**
|
|
87
|
+
* Whether the ONNX embedder has been initialized (model loaded).
|
|
88
|
+
*
|
|
89
|
+
* Post-init counterpart to `isOnnxAvailable()` (which only checks that the
|
|
90
|
+
* bundled files exist). Named distinctly from the WASM-core `isInitialized()`
|
|
91
|
+
* export to avoid a barrel name collision. Equivalent to `isReady()`; provided
|
|
92
|
+
* as a self-documenting gate so callers can distinguish "bundled" (available)
|
|
93
|
+
* from "loaded" (initialized). See
|
|
94
|
+
* https://github.com/ruvnet/RuVector/issues/523.
|
|
95
|
+
*/
|
|
96
|
+
export declare function isOnnxInitialized(): boolean;
|
|
78
97
|
/**
|
|
79
98
|
* Get embedder stats including SIMD and parallel capabilities
|
|
80
99
|
*/
|
|
@@ -91,6 +110,24 @@ export declare function getStats(): {
|
|
|
91
110
|
* Shutdown parallel workers (call on exit)
|
|
92
111
|
*/
|
|
93
112
|
export declare function shutdown(): Promise<void>;
|
|
113
|
+
/**
|
|
114
|
+
* Initialize the bundled-WASM worker pool for high-throughput batch embedding
|
|
115
|
+
* (issue #523 SOTA). Self-contained — uses Node worker_threads + the bundled
|
|
116
|
+
* WASM over SharedArrayBuffer model bytes, no external dependency. Vectors are
|
|
117
|
+
* identical to the single-thread path (cosine-equivalent).
|
|
118
|
+
*
|
|
119
|
+
* @param numWorkers number of worker threads (default: min(cpus-2, 16))
|
|
120
|
+
*/
|
|
121
|
+
export declare function initParallelEmbedder(numWorkers?: number): Promise<boolean>;
|
|
122
|
+
/**
|
|
123
|
+
* Batch-embed via the bundled worker pool, sharded across CPU cores. Lazily
|
|
124
|
+
* starts the pool on first use. Returns embeddings in input order.
|
|
125
|
+
*/
|
|
126
|
+
export declare function embedBatchParallel(texts: string[]): Promise<number[][]>;
|
|
127
|
+
/** Number of active pool workers (0 if the pool isn't started). */
|
|
128
|
+
export declare function getParallelWorkerCount(): number;
|
|
129
|
+
/** Shut down the bundled worker pool and release its threads. */
|
|
130
|
+
export declare function shutdownParallelEmbedder(): Promise<void>;
|
|
94
131
|
export declare class OnnxEmbedder {
|
|
95
132
|
private config;
|
|
96
133
|
constructor(config?: OnnxEmbedderConfig);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;
|
|
1
|
+
{"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAsBD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA0FD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAuGxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;;;GAIG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAO9C;AAED;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAkBhF;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAG7E;AAED,mEAAmE;AACnE,wBAAgB,sBAAsB,IAAI,MAAM,CAE/C;AAED,iEAAiE;AACjE,wBAAsB,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC,CAK9D;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
|
|
@@ -57,8 +57,13 @@ exports.similarity = similarity;
|
|
|
57
57
|
exports.cosineSimilarity = cosineSimilarity;
|
|
58
58
|
exports.getDimension = getDimension;
|
|
59
59
|
exports.isReady = isReady;
|
|
60
|
+
exports.isOnnxInitialized = isOnnxInitialized;
|
|
60
61
|
exports.getStats = getStats;
|
|
61
62
|
exports.shutdown = shutdown;
|
|
63
|
+
exports.initParallelEmbedder = initParallelEmbedder;
|
|
64
|
+
exports.embedBatchParallel = embedBatchParallel;
|
|
65
|
+
exports.getParallelWorkerCount = getParallelWorkerCount;
|
|
66
|
+
exports.shutdownParallelEmbedder = shutdownParallelEmbedder;
|
|
62
67
|
const path = __importStar(require("path"));
|
|
63
68
|
const fs = __importStar(require("fs"));
|
|
64
69
|
const url_1 = require("url");
|
|
@@ -95,10 +100,22 @@ let loadPromise = null;
|
|
|
95
100
|
let isInitialized = false;
|
|
96
101
|
let parallelEnabled = false;
|
|
97
102
|
let parallelThreshold = 4;
|
|
103
|
+
// Captured at init so the bundled worker pool can reuse the loaded model bytes
|
|
104
|
+
// (shared to workers via SharedArrayBuffer) instead of re-downloading per worker.
|
|
105
|
+
let loadedModelBytes = null;
|
|
106
|
+
let loadedTokenizerJson = null;
|
|
107
|
+
let loadedMaxLength = 256;
|
|
108
|
+
let bundledPool = null;
|
|
98
109
|
// Default model
|
|
99
110
|
const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
|
|
100
111
|
/**
|
|
101
|
-
* Check if ONNX embedder is available
|
|
112
|
+
* Check if the ONNX embedder is *available* — i.e. the bundled WASM files are
|
|
113
|
+
* present and the embedder can be initialized.
|
|
114
|
+
*
|
|
115
|
+
* NOTE: This is a capability check, NOT a readiness check. It returns `true`
|
|
116
|
+
* before `initOnnxEmbedder()` has run (so callers can decide whether to init).
|
|
117
|
+
* To check whether the model has actually been loaded, use `isOnnxInitialized()`
|
|
118
|
+
* or `isReady()`. See https://github.com/ruvnet/RuVector/issues/523.
|
|
102
119
|
*/
|
|
103
120
|
function isOnnxAvailable() {
|
|
104
121
|
try {
|
|
@@ -143,7 +160,7 @@ async function tryInitParallel(config) {
|
|
|
143
160
|
// Skip if explicitly disabled
|
|
144
161
|
if (config.enableParallel === false)
|
|
145
162
|
return false;
|
|
146
|
-
//
|
|
163
|
+
// 1) Optional external package (back-compat). Absent by default.
|
|
147
164
|
try {
|
|
148
165
|
const parallelModule = await dynamicImport('ruvector-onnx-embeddings-wasm/parallel');
|
|
149
166
|
const { ParallelEmbedder } = parallelModule;
|
|
@@ -154,15 +171,44 @@ async function tryInitParallel(config) {
|
|
|
154
171
|
parallelThreshold = config.parallelThreshold || 4;
|
|
155
172
|
parallelEnabled = true;
|
|
156
173
|
parallelAvailable = true;
|
|
157
|
-
console.error(`Parallel embedder ready: ${parallelEmbedder.numWorkers} workers, SIMD: ${simdAvailable}`);
|
|
174
|
+
console.error(`Parallel embedder ready (external): ${parallelEmbedder.numWorkers} workers, SIMD: ${simdAvailable}`);
|
|
158
175
|
return true;
|
|
159
176
|
}
|
|
160
|
-
catch
|
|
177
|
+
catch {
|
|
178
|
+
// External package not installed — fall through to the bundled pool.
|
|
179
|
+
}
|
|
180
|
+
// 2) Bundled, zero-dependency worker pool over the already-loaded model bytes.
|
|
181
|
+
// Opt-in only (enableParallel === true) so the default/'auto' path does not
|
|
182
|
+
// silently spawn worker threads for existing callers. Vectors are identical
|
|
183
|
+
// to the single-thread path (issue #523).
|
|
184
|
+
if (config.enableParallel !== true) {
|
|
161
185
|
parallelAvailable = false;
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
186
|
+
return false;
|
|
187
|
+
}
|
|
188
|
+
try {
|
|
189
|
+
if (!loadedModelBytes || !loadedTokenizerJson) {
|
|
190
|
+
throw new Error('model bytes unavailable for bundled pool');
|
|
165
191
|
}
|
|
192
|
+
const poolUrl = (0, url_1.pathToFileURL)(path.join(__dirname, 'onnx', 'bundled-parallel.mjs')).href;
|
|
193
|
+
const { ParallelEmbedder } = await dynamicImport(poolUrl);
|
|
194
|
+
const pool = new ParallelEmbedder({
|
|
195
|
+
modelBytes: loadedModelBytes,
|
|
196
|
+
tokenizerJson: loadedTokenizerJson,
|
|
197
|
+
maxLength: loadedMaxLength,
|
|
198
|
+
dimension: embedder ? embedder.dimension() : 384,
|
|
199
|
+
numWorkers: config.numWorkers,
|
|
200
|
+
});
|
|
201
|
+
await pool.init();
|
|
202
|
+
parallelEmbedder = pool;
|
|
203
|
+
parallelThreshold = config.parallelThreshold || 4;
|
|
204
|
+
parallelEnabled = true;
|
|
205
|
+
parallelAvailable = true;
|
|
206
|
+
console.error(`Parallel embedder ready (bundled): ${pool.numWorkers} workers, SIMD: ${simdAvailable}`);
|
|
207
|
+
return true;
|
|
208
|
+
}
|
|
209
|
+
catch (e) {
|
|
210
|
+
parallelAvailable = false;
|
|
211
|
+
console.error(`Parallel embedder not available: ${e.message}`);
|
|
166
212
|
return false;
|
|
167
213
|
}
|
|
168
214
|
}
|
|
@@ -213,6 +259,10 @@ async function initOnnxEmbedder(config = {}) {
|
|
|
213
259
|
const modelId = config.modelId || DEFAULT_MODEL;
|
|
214
260
|
console.error(`Loading ONNX model: ${modelId}...`);
|
|
215
261
|
const { modelBytes, tokenizerJson, config: modelConfig } = await modelLoader.loadModel(modelId);
|
|
262
|
+
// Retain for the bundled parallel worker pool (see initParallelEmbedder).
|
|
263
|
+
loadedModelBytes = modelBytes;
|
|
264
|
+
loadedTokenizerJson = tokenizerJson;
|
|
265
|
+
loadedMaxLength = config.maxLength || modelConfig.maxLength || 256;
|
|
216
266
|
// Create embedder with config
|
|
217
267
|
const embedderConfig = new wasmModule.WasmEmbedderConfig()
|
|
218
268
|
.setMaxLength(config.maxLength || modelConfig.maxLength || 256)
|
|
@@ -354,11 +404,26 @@ function getDimension() {
|
|
|
354
404
|
return embedder ? embedder.dimension() : 384;
|
|
355
405
|
}
|
|
356
406
|
/**
|
|
357
|
-
* Check if embedder is ready
|
|
407
|
+
* Check if the embedder has been initialized (model loaded) and is ready to
|
|
408
|
+
* embed. Returns `false` until `initOnnxEmbedder()` (or the first `embed()`,
|
|
409
|
+
* which auto-initializes) has completed successfully.
|
|
358
410
|
*/
|
|
359
411
|
function isReady() {
|
|
360
412
|
return isInitialized;
|
|
361
413
|
}
|
|
414
|
+
/**
|
|
415
|
+
* Whether the ONNX embedder has been initialized (model loaded).
|
|
416
|
+
*
|
|
417
|
+
* Post-init counterpart to `isOnnxAvailable()` (which only checks that the
|
|
418
|
+
* bundled files exist). Named distinctly from the WASM-core `isInitialized()`
|
|
419
|
+
* export to avoid a barrel name collision. Equivalent to `isReady()`; provided
|
|
420
|
+
* as a self-documenting gate so callers can distinguish "bundled" (available)
|
|
421
|
+
* from "loaded" (initialized). See
|
|
422
|
+
* https://github.com/ruvnet/RuVector/issues/523.
|
|
423
|
+
*/
|
|
424
|
+
function isOnnxInitialized() {
|
|
425
|
+
return isInitialized;
|
|
426
|
+
}
|
|
362
427
|
/**
|
|
363
428
|
* Get embedder stats including SIMD and parallel capabilities
|
|
364
429
|
*/
|
|
@@ -382,6 +447,56 @@ async function shutdown() {
|
|
|
382
447
|
parallelEmbedder = null;
|
|
383
448
|
parallelEnabled = false;
|
|
384
449
|
}
|
|
450
|
+
await shutdownParallelEmbedder();
|
|
451
|
+
}
|
|
452
|
+
/**
|
|
453
|
+
* Initialize the bundled-WASM worker pool for high-throughput batch embedding
|
|
454
|
+
* (issue #523 SOTA). Self-contained — uses Node worker_threads + the bundled
|
|
455
|
+
* WASM over SharedArrayBuffer model bytes, no external dependency. Vectors are
|
|
456
|
+
* identical to the single-thread path (cosine-equivalent).
|
|
457
|
+
*
|
|
458
|
+
* @param numWorkers number of worker threads (default: min(cpus-2, 16))
|
|
459
|
+
*/
|
|
460
|
+
async function initParallelEmbedder(numWorkers) {
|
|
461
|
+
if (bundledPool)
|
|
462
|
+
return true;
|
|
463
|
+
if (!isInitialized)
|
|
464
|
+
await initOnnxEmbedder();
|
|
465
|
+
if (!loadedModelBytes || !loadedTokenizerJson) {
|
|
466
|
+
throw new Error('Model bytes unavailable; cannot start parallel embedder.');
|
|
467
|
+
}
|
|
468
|
+
const poolUrl = (0, url_1.pathToFileURL)(path.join(__dirname, 'onnx', 'bundled-parallel.mjs')).href;
|
|
469
|
+
const { ParallelEmbedder } = await dynamicImport(poolUrl);
|
|
470
|
+
const pool = new ParallelEmbedder({
|
|
471
|
+
modelBytes: loadedModelBytes,
|
|
472
|
+
tokenizerJson: loadedTokenizerJson,
|
|
473
|
+
maxLength: loadedMaxLength,
|
|
474
|
+
dimension: getDimension(),
|
|
475
|
+
numWorkers,
|
|
476
|
+
});
|
|
477
|
+
await pool.init();
|
|
478
|
+
bundledPool = pool;
|
|
479
|
+
return true;
|
|
480
|
+
}
|
|
481
|
+
/**
|
|
482
|
+
* Batch-embed via the bundled worker pool, sharded across CPU cores. Lazily
|
|
483
|
+
* starts the pool on first use. Returns embeddings in input order.
|
|
484
|
+
*/
|
|
485
|
+
async function embedBatchParallel(texts) {
|
|
486
|
+
if (!bundledPool)
|
|
487
|
+
await initParallelEmbedder();
|
|
488
|
+
return bundledPool.embedBatch(texts);
|
|
489
|
+
}
|
|
490
|
+
/** Number of active pool workers (0 if the pool isn't started). */
|
|
491
|
+
function getParallelWorkerCount() {
|
|
492
|
+
return bundledPool ? bundledPool.numWorkers : 0;
|
|
493
|
+
}
|
|
494
|
+
/** Shut down the bundled worker pool and release its threads. */
|
|
495
|
+
async function shutdownParallelEmbedder() {
|
|
496
|
+
if (bundledPool) {
|
|
497
|
+
await bundledPool.shutdown();
|
|
498
|
+
bundledPool = null;
|
|
499
|
+
}
|
|
385
500
|
}
|
|
386
501
|
// Export class wrapper for compatibility
|
|
387
502
|
class OnnxEmbedder {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAcH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0HD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;
|
|
1
|
+
{"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAcH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0HD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;IA+DpB;;OAEG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAiChD;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAmD1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAmB1D;;OAEG;IACH,aAAa,IAAI;QACf,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB;IASD;;OAEG;IACH,UAAU,IAAI,IAAI;IAKlB;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,mBAAmB,CAAC;CAG3C;AAQD,wBAAgB,wBAAwB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,qBAAqB,CAK5F;AAED,wBAAsB,iBAAiB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAIpG;AAED,eAAe,qBAAqB,CAAC"}
|
|
@@ -213,34 +213,16 @@ class OptimizedOnnxEmbedder {
|
|
|
213
213
|
}
|
|
214
214
|
const loaderModule = await dynamicImport(loaderUrl);
|
|
215
215
|
const { ModelLoader } = loaderModule;
|
|
216
|
-
//
|
|
216
|
+
// NOTE (issue #523): ModelLoader.loadModel() resolves the model by modelId
|
|
217
|
+
// from its own registry (currently FP32 all-MiniLM-L6-v2). The per-variant
|
|
218
|
+
// URLs in QUANTIZED_MODELS are not wired into the loader yet, so we must not
|
|
219
|
+
// log a quantization (FP16/INT8) that is not actually applied. When the
|
|
220
|
+
// loader gains variant support, thread the selected variant through to
|
|
221
|
+
// loadModel() here instead of computing an unused URL.
|
|
217
222
|
const modelInfo = QUANTIZED_MODELS[this.config.modelId];
|
|
218
|
-
let modelUrl;
|
|
219
223
|
if (modelInfo) {
|
|
220
|
-
if (this.config.useQuantized && this.config.quantization !== 'none') {
|
|
221
|
-
// Try quantized version first
|
|
222
|
-
if (this.config.quantization === 'int8' && modelInfo.int8) {
|
|
223
|
-
modelUrl = modelInfo.int8;
|
|
224
|
-
console.error(`Using INT8 quantized model: ${this.config.modelId}`);
|
|
225
|
-
}
|
|
226
|
-
else if (modelInfo.fp16) {
|
|
227
|
-
modelUrl = modelInfo.fp16;
|
|
228
|
-
console.error(`Using FP16 quantized model: ${this.config.modelId}`);
|
|
229
|
-
}
|
|
230
|
-
else {
|
|
231
|
-
modelUrl = modelInfo.onnx;
|
|
232
|
-
console.error(`Using FP32 model (no quantized version): ${this.config.modelId}`);
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
else {
|
|
236
|
-
modelUrl = modelInfo.onnx;
|
|
237
|
-
}
|
|
238
224
|
this.dimension = modelInfo.dimension;
|
|
239
225
|
}
|
|
240
|
-
else {
|
|
241
|
-
// Fallback to default loader
|
|
242
|
-
modelUrl = '';
|
|
243
|
-
}
|
|
244
226
|
const modelLoader = new ModelLoader({
|
|
245
227
|
cache: true,
|
|
246
228
|
cacheDir: path.join(process.env.HOME || '/tmp', '.ruvector', 'models'),
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ruvector",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.27",
|
|
4
4
|
"description": "Self-learning vector database for Node.js — hybrid search, Graph RAG, FlashAttention-3, HNSW, 50+ attention mechanisms",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
"ruvector": "./bin/cli.js"
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
|
-
"build": "tsc && mkdir -p dist/core/onnx
|
|
11
|
+
"build": "tsc && mkdir -p dist/core/onnx && cp -r src/core/onnx/. dist/core/onnx/",
|
|
12
12
|
"verify-dist": "node scripts/verify-dist.js",
|
|
13
13
|
"prepack": "npm run build && npm run verify-dist",
|
|
14
14
|
"prepublishOnly": "npm run build && npm run verify-dist",
|