ruvector 0.2.26 → 0.2.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,6 +81,15 @@ export declare class AdaptiveEmbedder {
81
81
  * Initialize ONNX backend
82
82
  */
83
83
  init(): Promise<void>;
84
+ /**
85
+ * Whether the ONNX backend is initialized and producing real (non-hash)
86
+ * embeddings. Returns `false` until `init()` has successfully loaded the
87
+ * ONNX model. Note that `embed()` still works when this is `false` — it
88
+ * falls back to a deterministic hash embedding — so callers that require
89
+ * real semantic embeddings should check this after `init()`.
90
+ * See https://github.com/ruvnet/RuVector/issues/523.
91
+ */
92
+ isReady(): boolean;
84
93
  /**
85
94
  * Generate adaptive embedding
86
95
  * Pipeline: ONNX → LoRA → Prototype Adjustment → Episodic Augmentation
@@ -1 +1 @@
1
- {"version":3,"file":"adaptive-embedder.d.ts","sourceRoot":"","sources":["../../src/core/adaptive-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,MAAM,WAAW,cAAc;IAC7B,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kCAAkC;IAClC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gDAAgD;IAChD,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,uCAAuC;IACvC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,6CAA6C;IAC7C,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AA+pBD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,IAAI,CAAY;IACxB,OAAO,CAAC,UAAU,CAAkB;IACpC,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,SAAS,CAAkB;IACnC,OAAO,CAAC,SAAS,CAAe;IAGhC,OAAO,CAAC,eAAe,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,gBAAgB,CAAa;IAGrC,OAAO,CAAC,YAAY,CAA+E;gBAEvF,MAAM,GAAE,cAAmB;IAiBvC;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAClC,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAmCrB;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE;QAC1C,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAsBvB;;;OAGG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAkBpG;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA+B1B;;OAEG;IACG,gBAAgB,CACpB,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,MAAY,GACpB,OAAO,CAAC,IAAI,CAAC;IAiBhB;;;OAGG;IACG,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAmBlC;;OAEG;IACH,OAAO,CAAC,SAAS;IAoBjB,OAAO,CAAC,SAAS;IAKjB;;OAEG;IACH,QAAQ,IAAI,aAAa;IAczB;;OAEG;IACH,MAAM,IAAI;QACR,IAAI,EAAE,WAAW,CAAC;QAClB,UAAU,EAAE,eAAe,EAAE,CAAC;QAC9B,KAAK,EAAE,aAAa,CAAC;KACtB;IAQD;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,EAAE,WAAW,CAAC;QAAC,UAAU,CAAC,EAAE,eAAe,EAAE,CAAA;KAAE,GAAG,IAAI;IAS1E;;OAEG;IACH,KAAK,IAAI,IAAI;IAUb;;OAEG;IACH,aAAa,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE;CAGnD;AAQD,wBAAgB,mBAAmB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,gBAAgB,CAK7E;AAED,wBAAsB,oBAAoB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAI7F;AAED,eAAe,gBAAgB,CAAC"}
1
+ {"version":3,"file":"adaptive-embedder.d.ts","sourceRoot":"","sources":["../../src/core/adaptive-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4BG;AAQH,MAAM,WAAW,cAAc;IAC7B,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uCAAuC;IACvC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,kCAAkC;IAClC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,gDAAgD;IAChD,mBAAmB,CAAC,EAAE,OAAO,CAAC;IAC9B,uCAAuC;IACvC,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,6CAA6C;IAC7C,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;CACjB;AAED,MAAM,WAAW,eAAe;IAC9B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,aAAa;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,kBAAkB,EAAE,MAAM,CAAC;CAC5B;AA+pBD,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,MAAM,CAA2B;IACzC,OAAO,CAAC,IAAI,CAAY;IACxB,OAAO,CAAC,UAAU,CAAkB;IACpC,OAAO,CAAC,QAAQ,CAAiB;IACjC,OAAO,CAAC,SAAS,CAAkB;IACnC,OAAO,CAAC,SAAS,CAAe;IAGhC,OAAO,CAAC,eAAe,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,gBAAgB,CAAa;IAGrC,OAAO,CAAC,YAAY,CAA+E;gBAEvF,MAAM,GAAE,cAAmB;IAiBvC;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAO3B;;;;;;;OAOG;IACH,OAAO,IAAI,OAAO;IAIlB;;;OAGG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE;QAClC,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,WAAW,CAAC,EAAE,OAAO,CAAC;QACtB,aAAa,CAAC,EAAE,OAAO,CAAC;KACzB,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAmCrB;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE;QAC1C,MAAM,CAAC,EAAE,MAAM,CAAC;KACjB,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAsBvB;;;OAGG;IACG,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAkBpG;;OAEG;IACH,OAAO,CAAC,kBAAkB;IA+B1B;;OAEG;IACG,gBAAgB,CACpB,OAAO,EAAE,MAAM,EACf,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,OAAO,EAChB,OAAO,GAAE,MAAY,GACpB,OAAO,CAAC,IAAI,CAAC;IAiBhB;;;OAGG;IACG,WAAW,IAAI,OAAO,CAAC,IAAI,CAAC;IAmBlC;;OAEG;IACH,OAAO,CAAC,SAAS;IAoBjB,OAAO,CAAC,SAAS;IAKjB;;OAEG;IACH,QAAQ,IAAI,aAAa;IAczB;;OAEG;IACH,MAAM,IAAI;QACR,IAAI,EAAE,WAAW,CAAC;QAClB,UAAU,EAAE,eAAe,EAAE,CAAC;QAC9B,KAAK,EAAE,aAAa,CAAC;KACtB;IAQD;;OAEG;IACH,MAAM,CAAC,IAAI,EAAE;QAAE,IAAI,CAAC,EAAE,WAAW,CAAC;QAAC,UAAU,CAAC,EAAE,eAAe,EAAE,CAAA;KAAE,GAAG,IAAI;IAS1E;;OAEG;IACH,KAAK,IAAI,IAAI;IAUb;;OAEG;IACH,aAAa,IAAI;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE;CAGnD;AAQD,wBAAgB,mBAAmB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,gBAAgB,CAK7E;AAED,wBAAsB,oBAAoB,CAAC,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAI7F;AAED,eAAe,gBAAgB,CAAC"}
@@ -611,6 +611,17 @@ class AdaptiveEmbedder {
611
611
  this.onnxReady = true;
612
612
  }
613
613
  }
614
+ /**
615
+ * Whether the ONNX backend is initialized and producing real (non-hash)
616
+ * embeddings. Returns `false` until `init()` has successfully loaded the
617
+ * ONNX model. Note that `embed()` still works when this is `false` — it
618
+ * falls back to a deterministic hash embedding — so callers that require
619
+ * real semantic embeddings should check this after `init()`.
620
+ * See https://github.com/ruvnet/RuVector/issues/523.
621
+ */
622
+ isReady() {
623
+ return this.onnxReady;
624
+ }
614
625
  /**
615
626
  * Generate adaptive embedding
616
627
  * Pipeline: ONNX → LoRA → Prototype Adjustment → Episodic Augmentation
@@ -0,0 +1,164 @@
1
+ /**
2
+ * Bundled-WASM parallel embedder (issue #523 SOTA).
3
+ *
4
+ * A self-contained worker_threads pool — NO external dependency — that shards
5
+ * batches of text across CPU cores, each worker running the bundled ONNX WASM
6
+ * embedder over the SAME model bytes (shared via SharedArrayBuffer) and config.
7
+ * Output vectors are identical to the single-thread path (cosine-equivalent),
8
+ * so this is a pure throughput optimization with no quality change.
9
+ *
10
+ * Drop-in shape compatible with the optional `ruvector-onnx-embeddings-wasm/parallel`
11
+ * package: { numWorkers, dimension, init(), embedBatch(texts) -> number[][], shutdown() }.
12
+ */
13
+ import { Worker } from 'node:worker_threads';
14
+ import * as os from 'node:os';
15
+ import { fileURLToPath } from 'node:url';
16
+ import * as path from 'node:path';
17
+
18
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
19
+
20
+ export class ParallelEmbedder {
21
+ /**
22
+ * @param {object} opts
23
+ * @param {Uint8Array} opts.modelBytes raw ONNX model bytes (loaded once by caller)
24
+ * @param {string} opts.tokenizerJson
25
+ * @param {number} [opts.maxLength=256]
26
+ * @param {number} [opts.dimension=384]
27
+ * @param {number} [opts.numWorkers] defaults to min(cpus-2, 16), >=2
28
+ */
29
+ constructor(opts = {}) {
30
+ this.numWorkers = opts.numWorkers || Math.max(2, Math.min((os.cpus().length || 4) - 2, 16));
31
+ this.dimension = opts.dimension || 384;
32
+ this._modelBytes = opts.modelBytes;
33
+ this._tokenizerJson = opts.tokenizerJson;
34
+ this._maxLength = opts.maxLength || 256;
35
+ this._requestTimeoutMs = opts.requestTimeoutMs ?? 30000;
36
+ this._workers = [];
37
+ this._pending = new Map(); // id -> { resolve, reject, worker, timer }
38
+ this._seq = 0;
39
+ this._shuttingDown = false;
40
+ }
41
+
42
+ async init() {
43
+ if (!this._modelBytes || !this._tokenizerJson) {
44
+ throw new Error('ParallelEmbedder requires modelBytes and tokenizerJson');
45
+ }
46
+ // Share model bytes across all workers via a single SharedArrayBuffer.
47
+ const sab = new SharedArrayBuffer(this._modelBytes.length);
48
+ new Uint8Array(sab).set(this._modelBytes);
49
+
50
+ const workerUrl = new URL('./embed-worker.mjs', import.meta.url);
51
+ const readies = [];
52
+
53
+ for (let i = 0; i < this.numWorkers; i++) {
54
+ const w = new Worker(workerUrl, {
55
+ workerData: { modelSab: sab, tokenizerJson: this._tokenizerJson, maxLength: this._maxLength },
56
+ });
57
+ w.on('message', (m) => this._onMessage(m));
58
+ // If a worker dies (uncaught error or unexpected exit), fail every request
59
+ // currently routed to it instead of letting those promises hang forever.
60
+ w.on('error', (e) => this._failWorker(w, e instanceof Error ? e : new Error(String(e))));
61
+ w.on('exit', (code) => {
62
+ if (!this._shuttingDown && code !== 0) {
63
+ this._failWorker(w, new Error(`embed worker exited unexpectedly (code ${code})`));
64
+ }
65
+ });
66
+ this._workers.push(w);
67
+ readies.push(new Promise((resolve, reject) => {
68
+ const onReady = (m) => {
69
+ if (m.type === 'ready') { cleanup(); resolve(); }
70
+ else if (m.type === 'init-error') { cleanup(); reject(new Error('worker init failed: ' + m.error)); }
71
+ };
72
+ const onErr = (e) => { cleanup(); reject(e); };
73
+ const cleanup = () => { w.off('message', onReady); w.off('error', onErr); };
74
+ w.on('message', onReady);
75
+ w.once('error', onErr);
76
+ }));
77
+ }
78
+
79
+ await Promise.all(readies);
80
+ // Drop the main-thread reference; the SAB keeps the shared copy alive.
81
+ this._modelBytes = null;
82
+ }
83
+
84
+ _settle(id, fn) {
85
+ const p = this._pending.get(id);
86
+ if (!p) return;
87
+ this._pending.delete(id);
88
+ if (p.timer) clearTimeout(p.timer);
89
+ fn(p);
90
+ }
91
+
92
+ _onMessage(m) {
93
+ if (m.type !== 'result' && m.type !== 'error') return;
94
+ this._settle(m.id, (p) => {
95
+ if (m.type === 'error') p.reject(new Error(m.error));
96
+ else p.resolve({ dim: m.dim, count: m.count, flat: new Float32Array(m.buffer) });
97
+ });
98
+ }
99
+
100
+ /** Reject every in-flight request routed to a dead worker. */
101
+ _failWorker(worker, err) {
102
+ for (const [id, p] of this._pending) {
103
+ if (p.worker === worker) this._settle(id, () => p.reject(err));
104
+ }
105
+ }
106
+
107
+ _send(worker, texts) {
108
+ const id = ++this._seq;
109
+ return new Promise((resolve, reject) => {
110
+ const entry = { resolve, reject, worker, timer: null };
111
+ if (this._requestTimeoutMs > 0) {
112
+ entry.timer = setTimeout(() => {
113
+ this._settle(id, () =>
114
+ reject(new Error(`embed request timed out after ${this._requestTimeoutMs}ms`)));
115
+ }, this._requestTimeoutMs);
116
+ // Don't keep the event loop alive solely for this timer.
117
+ if (typeof entry.timer.unref === 'function') entry.timer.unref();
118
+ }
119
+ this._pending.set(id, entry);
120
+ worker.postMessage({ type: 'embed', id, texts });
121
+ });
122
+ }
123
+
124
+ /**
125
+ * Embed many texts, sharded across workers. Returns number[][] in input order.
126
+ */
127
+ async embedBatch(texts) {
128
+ if (!texts || texts.length === 0) return [];
129
+ const n = this._workers.length;
130
+ const shard = Math.ceil(texts.length / n);
131
+ const tasks = [];
132
+ const starts = [];
133
+ for (let i = 0; i < n; i++) {
134
+ const start = i * shard;
135
+ if (start >= texts.length) break;
136
+ const end = Math.min(texts.length, start + shard);
137
+ starts.push(start);
138
+ tasks.push(this._send(this._workers[i], texts.slice(start, end)));
139
+ }
140
+ const results = await Promise.all(tasks);
141
+ const out = new Array(texts.length);
142
+ for (let r = 0; r < results.length; r++) {
143
+ const { dim, count, flat } = results[r];
144
+ const start = starts[r];
145
+ for (let j = 0; j < count; j++) {
146
+ out[start + j] = Array.from(flat.subarray(j * dim, (j + 1) * dim));
147
+ }
148
+ }
149
+ return out;
150
+ }
151
+
152
+ async shutdown() {
153
+ this._shuttingDown = true;
154
+ // Reject anything still in flight so callers don't hang on shutdown.
155
+ for (const [id, p] of this._pending) {
156
+ this._settle(id, () => p.reject(new Error('ParallelEmbedder shut down')));
157
+ }
158
+ const ws = this._workers;
159
+ this._workers = [];
160
+ await Promise.all(ws.map((w) => w.terminate()));
161
+ }
162
+ }
163
+
164
+ export default ParallelEmbedder;
@@ -0,0 +1,67 @@
1
+ /**
2
+ * Worker-thread entry for the bundled-WASM parallel embedder (issue #523 SOTA).
3
+ *
4
+ * Each worker loads its own instance of the bundled ONNX WASM embedder from the
5
+ * SAME model bytes (shared via SharedArrayBuffer — no per-worker download) and
6
+ * the SAME config, so the vectors it produces are identical to the single-thread
7
+ * path (cosine-equivalent by construction).
8
+ *
9
+ * Protocol:
10
+ * workerData: { modelSab: SharedArrayBuffer, tokenizerJson: string, maxLength: number }
11
+ * → posts { type: 'ready' } once the WASM embedder is constructed
12
+ * message { type: 'embed', id, texts: string[] }
13
+ * → posts { type: 'result', id, dim, count, buffer } (Float32Array buffer, transferred)
14
+ * errors → { type: 'error', id, error }
15
+ */
16
+ import { parentPort, workerData } from 'node:worker_threads';
17
+ import { pathToFileURL, fileURLToPath } from 'node:url';
18
+ import * as path from 'node:path';
19
+ import * as fs from 'node:fs';
20
+
21
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
22
+
23
+ let embedder = null;
24
+
25
+ async function init() {
26
+ const bgJsPath = path.join(__dirname, 'pkg', 'ruvector_onnx_embeddings_wasm_bg.js');
27
+ const wasmPath = path.join(__dirname, 'pkg', 'ruvector_onnx_embeddings_wasm_bg.wasm');
28
+
29
+ const wasmModule = await import(pathToFileURL(bgJsPath).href);
30
+ const wasmBytes = fs.readFileSync(wasmPath);
31
+ const wasmResult = await WebAssembly.instantiate(wasmBytes, {
32
+ './ruvector_onnx_embeddings_wasm_bg.js': wasmModule,
33
+ });
34
+ const wasmExports = wasmResult.instance.exports;
35
+ if (typeof wasmModule.__wbg_set_wasm === 'function') wasmModule.__wbg_set_wasm(wasmExports);
36
+ if (typeof wasmExports.__wbindgen_start === 'function') wasmExports.__wbindgen_start();
37
+
38
+ // Reconstruct model bytes from the shared buffer (zero-copy view, then handed
39
+ // to wasm-bindgen which copies into WASM linear memory).
40
+ const modelBytes = new Uint8Array(workerData.modelSab);
41
+
42
+ const cfg = new wasmModule.WasmEmbedderConfig()
43
+ .setMaxLength(workerData.maxLength || 256)
44
+ .setNormalize(true)
45
+ .setPooling(0); // Mean pooling — matches the single-thread path.
46
+
47
+ embedder = wasmModule.WasmEmbedder.withConfig(modelBytes, workerData.tokenizerJson, cfg);
48
+ }
49
+
50
+ parentPort.on('message', (msg) => {
51
+ if (msg.type !== 'embed') return;
52
+ try {
53
+ const dim = embedder.dimension();
54
+ const flat = embedder.embedBatch(msg.texts); // length = texts.length * dim
55
+ const arr = Float32Array.from(flat);
56
+ parentPort.postMessage(
57
+ { type: 'result', id: msg.id, dim, count: msg.texts.length, buffer: arr.buffer },
58
+ [arr.buffer],
59
+ );
60
+ } catch (e) {
61
+ parentPort.postMessage({ type: 'error', id: msg.id, error: e?.message || String(e) });
62
+ }
63
+ });
64
+
65
+ init()
66
+ .then(() => parentPort.postMessage({ type: 'ready' }))
67
+ .catch((e) => parentPort.postMessage({ type: 'init-error', error: e?.message || String(e) }));
@@ -76,6 +76,14 @@ export const MODELS = {
76
76
  */
77
77
  export const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
78
78
 
79
+ /**
80
+ * In-memory memo of loaded models, keyed by model name. Deduplicates the
81
+ * (re-)download + decode when multiple embedder instances load the same model
82
+ * in one process. In Node there is no Cache API, so without this every
83
+ * ModelLoader.loadModel() re-fetches the model from HuggingFace (issue #523).
84
+ */
85
+ const _inMemoryModelCache = new Map();
86
+
79
87
  /**
80
88
  * Model loader with caching support
81
89
  */
@@ -97,6 +105,24 @@ export class ModelLoader {
97
105
  throw new Error(`Unknown model: ${modelName}. Available: ${Object.keys(MODELS).join(', ')}`);
98
106
  }
99
107
 
108
+ // In-memory memo: a second load of the same model in this process reuses
109
+ // the already-downloaded bytes instead of re-fetching (issue #523).
110
+ if (this.cache && _inMemoryModelCache.has(modelName)) {
111
+ return _inMemoryModelCache.get(modelName);
112
+ }
113
+
114
+ // On-disk cache (Node only): models persist across processes so they are
115
+ // downloaded once, not every run. The browser has the Cache API instead
116
+ // (handled in fetchWithCache). See issue #523.
117
+ if (this.cache) {
118
+ const disk = await this._loadFromDisk(modelName);
119
+ if (disk) {
120
+ const cached = { ...disk, config: modelConfig };
121
+ _inMemoryModelCache.set(modelName, cached);
122
+ return cached;
123
+ }
124
+ }
125
+
100
126
  console.log(`Loading model: ${modelConfig.name} (${modelConfig.size})`);
101
127
 
102
128
  const [modelBytes, tokenizerJson] = await Promise.all([
@@ -104,11 +130,71 @@ export class ModelLoader {
104
130
  this.fetchWithCache(modelConfig.tokenizer, `${modelName}-tokenizer.json`, 'text'),
105
131
  ]);
106
132
 
107
- return {
133
+ const result = {
108
134
  modelBytes: new Uint8Array(modelBytes),
109
135
  tokenizerJson,
110
136
  config: modelConfig,
111
137
  };
138
+
139
+ if (this.cache) {
140
+ _inMemoryModelCache.set(modelName, result);
141
+ await this._saveToDisk(modelName, result.modelBytes, tokenizerJson);
142
+ }
143
+
144
+ return result;
145
+ }
146
+
147
+ /**
148
+ * Resolve the Node on-disk cache dir for a model (null in non-Node envs).
149
+ * Uses dynamic import so this module stays loadable in browsers/bundlers.
150
+ */
151
+ async _diskCacheDir(modelName) {
152
+ if (typeof process === 'undefined' || !process.versions?.node) return null;
153
+ const home = process.env.RUVECTOR_CACHE_DIR
154
+ || process.env.HOME || process.env.USERPROFILE || '/tmp';
155
+ const path = await import('node:path');
156
+ return path.join(home, '.ruvector', 'models', modelName);
157
+ }
158
+
159
+ /** Load model bytes + tokenizer from the Node disk cache, or null if absent. */
160
+ async _loadFromDisk(modelName) {
161
+ const dir = await this._diskCacheDir(modelName);
162
+ if (!dir) return null;
163
+ try {
164
+ const fs = await import('node:fs');
165
+ const path = await import('node:path');
166
+ const modelPath = path.join(dir, 'model.onnx');
167
+ const tokPath = path.join(dir, 'tokenizer.json');
168
+ if (!fs.existsSync(modelPath) || !fs.existsSync(tokPath)) return null;
169
+ const modelBytes = new Uint8Array(fs.readFileSync(modelPath));
170
+ const tokenizerJson = fs.readFileSync(tokPath, 'utf8');
171
+ if (modelBytes.length === 0 || tokenizerJson.length === 0) return null;
172
+ console.log(` Disk cache hit: ${modelName}`);
173
+ return { modelBytes, tokenizerJson };
174
+ } catch {
175
+ return null;
176
+ }
177
+ }
178
+
179
+ /** Persist model bytes + tokenizer to the Node disk cache (best-effort). */
180
+ async _saveToDisk(modelName, modelBytes, tokenizerJson) {
181
+ const dir = await this._diskCacheDir(modelName);
182
+ if (!dir) return;
183
+ try {
184
+ const fs = await import('node:fs');
185
+ const path = await import('node:path');
186
+ fs.mkdirSync(dir, { recursive: true });
187
+ // Write to temp files then rename, so a crash mid-write can't leave a
188
+ // truncated cache entry that later reads would trust.
189
+ const mTmp = path.join(dir, 'model.onnx.tmp');
190
+ const tTmp = path.join(dir, 'tokenizer.json.tmp');
191
+ fs.writeFileSync(mTmp, Buffer.from(modelBytes));
192
+ fs.writeFileSync(tTmp, tokenizerJson);
193
+ fs.renameSync(mTmp, path.join(dir, 'model.onnx'));
194
+ fs.renameSync(tTmp, path.join(dir, 'tokenizer.json'));
195
+ } catch {
196
+ // Cache write is best-effort; embedding still works without it.
197
+ }
112
198
  }
113
199
 
114
200
  /**
@@ -0,0 +1,3 @@
1
+ {
2
+ "type": "module"
3
+ }
Binary file
@@ -43,7 +43,13 @@ export interface SimilarityResult {
43
43
  timeMs: number;
44
44
  }
45
45
  /**
46
- * Check if ONNX embedder is available (bundled files exist)
46
+ * Check if the ONNX embedder is *available* — i.e. the bundled WASM files are
47
+ * present and the embedder can be initialized.
48
+ *
49
+ * NOTE: This is a capability check, NOT a readiness check. It returns `true`
50
+ * before `initOnnxEmbedder()` has run (so callers can decide whether to init).
51
+ * To check whether the model has actually been loaded, use `isOnnxInitialized()`
52
+ * or `isReady()`. See https://github.com/ruvnet/RuVector/issues/523.
47
53
  */
48
54
  export declare function isOnnxAvailable(): boolean;
49
55
  /**
@@ -72,9 +78,22 @@ export declare function cosineSimilarity(a: number[], b: number[]): number;
72
78
  */
73
79
  export declare function getDimension(): number;
74
80
  /**
75
- * Check if embedder is ready
81
+ * Check if the embedder has been initialized (model loaded) and is ready to
82
+ * embed. Returns `false` until `initOnnxEmbedder()` (or the first `embed()`,
83
+ * which auto-initializes) has completed successfully.
76
84
  */
77
85
  export declare function isReady(): boolean;
86
+ /**
87
+ * Whether the ONNX embedder has been initialized (model loaded).
88
+ *
89
+ * Post-init counterpart to `isOnnxAvailable()` (which only checks that the
90
+ * bundled files exist). Named distinctly from the WASM-core `isInitialized()`
91
+ * export to avoid a barrel name collision. Equivalent to `isReady()`; provided
92
+ * as a self-documenting gate so callers can distinguish "bundled" (available)
93
+ * from "loaded" (initialized). See
94
+ * https://github.com/ruvnet/RuVector/issues/523.
95
+ */
96
+ export declare function isOnnxInitialized(): boolean;
78
97
  /**
79
98
  * Get embedder stats including SIMD and parallel capabilities
80
99
  */
@@ -91,6 +110,24 @@ export declare function getStats(): {
91
110
  * Shutdown parallel workers (call on exit)
92
111
  */
93
112
  export declare function shutdown(): Promise<void>;
113
+ /**
114
+ * Initialize the bundled-WASM worker pool for high-throughput batch embedding
115
+ * (issue #523 SOTA). Self-contained — uses Node worker_threads + the bundled
116
+ * WASM over SharedArrayBuffer model bytes, no external dependency. Vectors are
117
+ * identical to the single-thread path (cosine-equivalent).
118
+ *
119
+ * @param numWorkers number of worker threads (default: min(cpus-2, 16))
120
+ */
121
+ export declare function initParallelEmbedder(numWorkers?: number): Promise<boolean>;
122
+ /**
123
+ * Batch-embed via the bundled worker pool, sharded across CPU cores. Lazily
124
+ * starts the pool on first use. Returns embeddings in input order.
125
+ */
126
+ export declare function embedBatchParallel(texts: string[]): Promise<number[][]>;
127
+ /** Number of active pool workers (0 if the pool isn't started). */
128
+ export declare function getParallelWorkerCount(): number;
129
+ /** Shut down the bundled worker pool and release its threads. */
130
+ export declare function shutdownParallelEmbedder(): Promise<void>;
94
131
  export declare class OnnxEmbedder {
95
132
  private config;
96
133
  constructor(config?: OnnxEmbedderConfig);
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAeD;;GAEG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA6DD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAkGxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;GAEG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAM9C;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
1
+ {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAsBD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AA0FD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAuGxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;;;GAIG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAO9C;AAED;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAkBhF;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAG7E;AAED,mEAAmE;AACnE,wBAAgB,sBAAsB,IAAI,MAAM,CAE/C;AAED,iEAAiE;AACjE,wBAAsB,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC,CAK9D;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
@@ -57,8 +57,13 @@ exports.similarity = similarity;
57
57
  exports.cosineSimilarity = cosineSimilarity;
58
58
  exports.getDimension = getDimension;
59
59
  exports.isReady = isReady;
60
+ exports.isOnnxInitialized = isOnnxInitialized;
60
61
  exports.getStats = getStats;
61
62
  exports.shutdown = shutdown;
63
+ exports.initParallelEmbedder = initParallelEmbedder;
64
+ exports.embedBatchParallel = embedBatchParallel;
65
+ exports.getParallelWorkerCount = getParallelWorkerCount;
66
+ exports.shutdownParallelEmbedder = shutdownParallelEmbedder;
62
67
  const path = __importStar(require("path"));
63
68
  const fs = __importStar(require("fs"));
64
69
  const url_1 = require("url");
@@ -95,10 +100,22 @@ let loadPromise = null;
95
100
  let isInitialized = false;
96
101
  let parallelEnabled = false;
97
102
  let parallelThreshold = 4;
103
+ // Captured at init so the bundled worker pool can reuse the loaded model bytes
104
+ // (shared to workers via SharedArrayBuffer) instead of re-downloading per worker.
105
+ let loadedModelBytes = null;
106
+ let loadedTokenizerJson = null;
107
+ let loadedMaxLength = 256;
108
+ let bundledPool = null;
98
109
  // Default model
99
110
  const DEFAULT_MODEL = 'all-MiniLM-L6-v2';
100
111
  /**
101
- * Check if ONNX embedder is available (bundled files exist)
112
+ * Check if the ONNX embedder is *available* — i.e. the bundled WASM files are
113
+ * present and the embedder can be initialized.
114
+ *
115
+ * NOTE: This is a capability check, NOT a readiness check. It returns `true`
116
+ * before `initOnnxEmbedder()` has run (so callers can decide whether to init).
117
+ * To check whether the model has actually been loaded, use `isOnnxInitialized()`
118
+ * or `isReady()`. See https://github.com/ruvnet/RuVector/issues/523.
102
119
  */
103
120
  function isOnnxAvailable() {
104
121
  try {
@@ -143,7 +160,7 @@ async function tryInitParallel(config) {
143
160
  // Skip if explicitly disabled
144
161
  if (config.enableParallel === false)
145
162
  return false;
146
- // For 'auto' or true, try to initialize
163
+ // 1) Optional external package (back-compat). Absent by default.
147
164
  try {
148
165
  const parallelModule = await dynamicImport('ruvector-onnx-embeddings-wasm/parallel');
149
166
  const { ParallelEmbedder } = parallelModule;
@@ -154,15 +171,44 @@ async function tryInitParallel(config) {
154
171
  parallelThreshold = config.parallelThreshold || 4;
155
172
  parallelEnabled = true;
156
173
  parallelAvailable = true;
157
- console.error(`Parallel embedder ready: ${parallelEmbedder.numWorkers} workers, SIMD: ${simdAvailable}`);
174
+ console.error(`Parallel embedder ready (external): ${parallelEmbedder.numWorkers} workers, SIMD: ${simdAvailable}`);
158
175
  return true;
159
176
  }
160
- catch (e) {
177
+ catch {
178
+ // External package not installed — fall through to the bundled pool.
179
+ }
180
+ // 2) Bundled, zero-dependency worker pool over the already-loaded model bytes.
181
+ // Opt-in only (enableParallel === true) so the default/'auto' path does not
182
+ // silently spawn worker threads for existing callers. Vectors are identical
183
+ // to the single-thread path (issue #523).
184
+ if (config.enableParallel !== true) {
161
185
  parallelAvailable = false;
162
- if (config.enableParallel === true) {
163
- // Only warn if explicitly requested
164
- console.error(`Parallel embedder not available: ${e.message}`);
186
+ return false;
187
+ }
188
+ try {
189
+ if (!loadedModelBytes || !loadedTokenizerJson) {
190
+ throw new Error('model bytes unavailable for bundled pool');
165
191
  }
192
+ const poolUrl = (0, url_1.pathToFileURL)(path.join(__dirname, 'onnx', 'bundled-parallel.mjs')).href;
193
+ const { ParallelEmbedder } = await dynamicImport(poolUrl);
194
+ const pool = new ParallelEmbedder({
195
+ modelBytes: loadedModelBytes,
196
+ tokenizerJson: loadedTokenizerJson,
197
+ maxLength: loadedMaxLength,
198
+ dimension: embedder ? embedder.dimension() : 384,
199
+ numWorkers: config.numWorkers,
200
+ });
201
+ await pool.init();
202
+ parallelEmbedder = pool;
203
+ parallelThreshold = config.parallelThreshold || 4;
204
+ parallelEnabled = true;
205
+ parallelAvailable = true;
206
+ console.error(`Parallel embedder ready (bundled): ${pool.numWorkers} workers, SIMD: ${simdAvailable}`);
207
+ return true;
208
+ }
209
+ catch (e) {
210
+ parallelAvailable = false;
211
+ console.error(`Parallel embedder not available: ${e.message}`);
166
212
  return false;
167
213
  }
168
214
  }
@@ -213,6 +259,10 @@ async function initOnnxEmbedder(config = {}) {
213
259
  const modelId = config.modelId || DEFAULT_MODEL;
214
260
  console.error(`Loading ONNX model: ${modelId}...`);
215
261
  const { modelBytes, tokenizerJson, config: modelConfig } = await modelLoader.loadModel(modelId);
262
+ // Retain for the bundled parallel worker pool (see initParallelEmbedder).
263
+ loadedModelBytes = modelBytes;
264
+ loadedTokenizerJson = tokenizerJson;
265
+ loadedMaxLength = config.maxLength || modelConfig.maxLength || 256;
216
266
  // Create embedder with config
217
267
  const embedderConfig = new wasmModule.WasmEmbedderConfig()
218
268
  .setMaxLength(config.maxLength || modelConfig.maxLength || 256)
@@ -354,11 +404,26 @@ function getDimension() {
354
404
  return embedder ? embedder.dimension() : 384;
355
405
  }
356
406
  /**
357
- * Check if embedder is ready
407
+ * Check if the embedder has been initialized (model loaded) and is ready to
408
+ * embed. Returns `false` until `initOnnxEmbedder()` (or the first `embed()`,
409
+ * which auto-initializes) has completed successfully.
358
410
  */
359
411
  function isReady() {
360
412
  return isInitialized;
361
413
  }
414
+ /**
415
+ * Whether the ONNX embedder has been initialized (model loaded).
416
+ *
417
+ * Post-init counterpart to `isOnnxAvailable()` (which only checks that the
418
+ * bundled files exist). Named distinctly from the WASM-core `isInitialized()`
419
+ * export to avoid a barrel name collision. Equivalent to `isReady()`; provided
420
+ * as a self-documenting gate so callers can distinguish "bundled" (available)
421
+ * from "loaded" (initialized). See
422
+ * https://github.com/ruvnet/RuVector/issues/523.
423
+ */
424
+ function isOnnxInitialized() {
425
+ return isInitialized;
426
+ }
362
427
  /**
363
428
  * Get embedder stats including SIMD and parallel capabilities
364
429
  */
@@ -382,6 +447,56 @@ async function shutdown() {
382
447
  parallelEmbedder = null;
383
448
  parallelEnabled = false;
384
449
  }
450
+ await shutdownParallelEmbedder();
451
+ }
452
+ /**
453
+ * Initialize the bundled-WASM worker pool for high-throughput batch embedding
454
+ * (issue #523 SOTA). Self-contained — uses Node worker_threads + the bundled
455
+ * WASM over SharedArrayBuffer model bytes, no external dependency. Vectors are
456
+ * identical to the single-thread path (cosine-equivalent).
457
+ *
458
+ * @param numWorkers number of worker threads (default: min(cpus-2, 16))
459
+ */
460
+ async function initParallelEmbedder(numWorkers) {
461
+ if (bundledPool)
462
+ return true;
463
+ if (!isInitialized)
464
+ await initOnnxEmbedder();
465
+ if (!loadedModelBytes || !loadedTokenizerJson) {
466
+ throw new Error('Model bytes unavailable; cannot start parallel embedder.');
467
+ }
468
+ const poolUrl = (0, url_1.pathToFileURL)(path.join(__dirname, 'onnx', 'bundled-parallel.mjs')).href;
469
+ const { ParallelEmbedder } = await dynamicImport(poolUrl);
470
+ const pool = new ParallelEmbedder({
471
+ modelBytes: loadedModelBytes,
472
+ tokenizerJson: loadedTokenizerJson,
473
+ maxLength: loadedMaxLength,
474
+ dimension: getDimension(),
475
+ numWorkers,
476
+ });
477
+ await pool.init();
478
+ bundledPool = pool;
479
+ return true;
480
+ }
481
+ /**
482
+ * Batch-embed via the bundled worker pool, sharded across CPU cores. Lazily
483
+ * starts the pool on first use. Returns embeddings in input order.
484
+ */
485
+ async function embedBatchParallel(texts) {
486
+ if (!bundledPool)
487
+ await initParallelEmbedder();
488
+ return bundledPool.embedBatch(texts);
489
+ }
490
+ /** Number of active pool workers (0 if the pool isn't started). */
491
+ function getParallelWorkerCount() {
492
+ return bundledPool ? bundledPool.numWorkers : 0;
493
+ }
494
+ /** Shut down the bundled worker pool and release its threads. */
495
+ async function shutdownParallelEmbedder() {
496
+ if (bundledPool) {
497
+ await bundledPool.shutdown();
498
+ bundledPool = null;
499
+ }
385
500
  }
386
501
  // Export class wrapper for compatibility
387
502
  class OnnxEmbedder {
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAcH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0HD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;IA8EpB;;OAEG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAiChD;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAmD1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAmB1D;;OAEG;IACH,aAAa,IAAI;QACf,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB;IASD;;OAEG;IACH,UAAU,IAAI,IAAI;IAKlB;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,mBAAmB,CAAC;CAG3C;AAQD,wBAAgB,wBAAwB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,qBAAqB,CAK5F;AAED,wBAAsB,iBAAiB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAIpG;AAED,eAAe,qBAAqB,CAAC"}
1
+ {"version":3,"file":"onnx-optimized.d.ts","sourceRoot":"","sources":["../../src/core/onnx-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;GAeG;AAcH,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,qDAAqD;IACrD,YAAY,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS,GAAG,MAAM,CAAC;IACpD,sCAAsC;IACtC,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0CAA0C;IAC1C,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B,iDAAiD;IACjD,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,oDAAoD;IACpD,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,qDAAqD;IACrD,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AA0HD,qBAAa,qBAAqB;IAChC,OAAO,CAAC,MAAM,CAAgC;IAC9C,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,cAAc,CAAiC;IACvD,OAAO,CAAC,cAAc,CAAwB;IAG9C,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,WAAW,CAAK;IACxB,OAAO,CAAC,SAAS,CAAO;gBAEZ,MAAM,GAAE,mBAAwB;IAiB5C;;OAEG;IACG,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;YAWb,MAAM;IA+DpB;;OAEG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAiChD;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC;IAmD1D;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D;;OAEG;IACH,gBAAgB,CAAC,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,YAAY,GAAG,MAAM;IAmB1D;;OAEG;IACH,aAAa,IAAI;QACf,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,MAAM,EAAE,MAAM,CAAC;YAAC,OAAO,EAAE,MAAM,CAAC;YAAC,IAAI,EAAE,MAAM,CAAA;SAAE,CAAC;QAC3E,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,MAAM,CAAC;KACrB;IASD;;OAEG;IACH,UAAU,IAAI,IAAI;IAKlB;;OAEG;IACH,YAAY,IAAI,MAAM;IAItB;;OAEG;IACH,OAAO,IAAI,OAAO;IAIlB;;OAEG;IACH,SAAS,IAAI,QAAQ,CAAC,mBAAmB,CAAC;CAG3C;AAQD,wBAAgB,wBAAwB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,qBAAqB,CAK5F;AAED,wBAAsB,iBAAiB,CAAC,MAAM,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAIpG;AAED,eAAe,qBAAqB,CAAC"}
@@ -213,34 +213,16 @@ class OptimizedOnnxEmbedder {
213
213
  }
214
214
  const loaderModule = await dynamicImport(loaderUrl);
215
215
  const { ModelLoader } = loaderModule;
216
- // Select model URL based on quantization preference
216
+ // NOTE (issue #523): ModelLoader.loadModel() resolves the model by modelId
217
+ // from its own registry (currently FP32 all-MiniLM-L6-v2). The per-variant
218
+ // URLs in QUANTIZED_MODELS are not wired into the loader yet, so we must not
219
+ // log a quantization (FP16/INT8) that is not actually applied. When the
220
+ // loader gains variant support, thread the selected variant through to
221
+ // loadModel() here instead of computing an unused URL.
217
222
  const modelInfo = QUANTIZED_MODELS[this.config.modelId];
218
- let modelUrl;
219
223
  if (modelInfo) {
220
- if (this.config.useQuantized && this.config.quantization !== 'none') {
221
- // Try quantized version first
222
- if (this.config.quantization === 'int8' && modelInfo.int8) {
223
- modelUrl = modelInfo.int8;
224
- console.error(`Using INT8 quantized model: ${this.config.modelId}`);
225
- }
226
- else if (modelInfo.fp16) {
227
- modelUrl = modelInfo.fp16;
228
- console.error(`Using FP16 quantized model: ${this.config.modelId}`);
229
- }
230
- else {
231
- modelUrl = modelInfo.onnx;
232
- console.error(`Using FP32 model (no quantized version): ${this.config.modelId}`);
233
- }
234
- }
235
- else {
236
- modelUrl = modelInfo.onnx;
237
- }
238
224
  this.dimension = modelInfo.dimension;
239
225
  }
240
- else {
241
- // Fallback to default loader
242
- modelUrl = '';
243
- }
244
226
  const modelLoader = new ModelLoader({
245
227
  cache: true,
246
228
  cacheDir: path.join(process.env.HOME || '/tmp', '.ruvector', 'models'),
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ruvector",
3
- "version": "0.2.26",
3
+ "version": "0.2.27",
4
4
  "description": "Self-learning vector database for Node.js — hybrid search, Graph RAG, FlashAttention-3, HNSW, 50+ attention mechanisms",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -8,7 +8,7 @@
8
8
  "ruvector": "./bin/cli.js"
9
9
  },
10
10
  "scripts": {
11
- "build": "tsc && mkdir -p dist/core/onnx/pkg && cp -r src/core/onnx/pkg/. dist/core/onnx/pkg/",
11
+ "build": "tsc && mkdir -p dist/core/onnx && cp -r src/core/onnx/. dist/core/onnx/",
12
12
  "verify-dist": "node scripts/verify-dist.js",
13
13
  "prepack": "npm run build && npm run verify-dist",
14
14
  "prepublishOnly": "npm run build && npm run verify-dist",