ruvector 0.2.29 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +624 -88
- package/bin/mcp-server.js +198 -17
- package/dist/core/embedding-provenance.d.ts +145 -0
- package/dist/core/embedding-provenance.d.ts.map +1 -0
- package/dist/core/embedding-provenance.js +258 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +1 -0
- package/dist/core/intelligence-engine.d.ts +65 -4
- package/dist/core/intelligence-engine.d.ts.map +1 -1
- package/dist/core/intelligence-engine.js +149 -12
- package/dist/core/onnx/bundled-parallel.mjs +24 -19
- package/dist/core/onnx/loader.js +31 -4
- package/dist/core/onnx-embedder.d.ts +42 -1
- package/dist/core/onnx-embedder.d.ts.map +1 -1
- package/dist/core/onnx-embedder.js +116 -11
- package/dist/core/onnx-optimized.d.ts +8 -1
- package/dist/core/onnx-optimized.d.ts.map +1 -1
- package/dist/core/onnx-optimized.js +41 -6
- package/package.json +5 -4
|
@@ -19,6 +19,7 @@ exports.createLightweightEngine = createLightweightEngine;
|
|
|
19
19
|
const agentdb_fast_1 = require("./agentdb-fast");
|
|
20
20
|
const sona_wrapper_1 = require("./sona-wrapper");
|
|
21
21
|
const onnx_embedder_1 = require("./onnx-embedder");
|
|
22
|
+
const embedding_provenance_1 = require("./embedding-provenance");
|
|
22
23
|
const parallel_intelligence_1 = require("./parallel-intelligence");
|
|
23
24
|
// ============================================================================
|
|
24
25
|
// Lazy Loading
|
|
@@ -76,6 +77,10 @@ class IntelligenceEngine {
|
|
|
76
77
|
this.attention = null;
|
|
77
78
|
this.onnxEmbedder = null;
|
|
78
79
|
this.onnxReady = false;
|
|
80
|
+
this.onnxInitPromise = null;
|
|
81
|
+
this.onnxInitError = null;
|
|
82
|
+
/** RUVECTOR_EMBEDDER=minilm: fail rather than fall back (ADR-210 D5). */
|
|
83
|
+
this.onnxHardRequire = false;
|
|
79
84
|
this.parallel = null;
|
|
80
85
|
// In-memory data structures
|
|
81
86
|
this.memories = new Map();
|
|
@@ -92,8 +97,28 @@ class IntelligenceEngine {
|
|
|
92
97
|
this.sessionStart = Date.now();
|
|
93
98
|
this.learningEnabled = true;
|
|
94
99
|
this.episodeBatchQueue = [];
|
|
100
|
+
// ADR-210 D1/D5: ONNX semantic embeddings are the default. Environment
|
|
101
|
+
// rollout flags override config: RUVECTOR_EMBEDDER=auto|minilm|hash wins
|
|
102
|
+
// over RUVECTOR_ONNX=0|1, which wins over config.enableOnnx.
|
|
103
|
+
const selection = (0, embedding_provenance_1.resolveEmbedderSelection)();
|
|
104
|
+
let useOnnx;
|
|
105
|
+
if (selection === 'hash') {
|
|
106
|
+
useOnnx = false;
|
|
107
|
+
}
|
|
108
|
+
else if (selection === 'minilm') {
|
|
109
|
+
// Hard-require: init failure is an error, never a silent fallback.
|
|
110
|
+
if (!(0, onnx_embedder_1.isOnnxAvailable)()) {
|
|
111
|
+
throw new Error('RUVECTOR_EMBEDDER=minilm (or RUVECTOR_ONNX=1) hard-requires the ONNX embedder, ' +
|
|
112
|
+
'but the bundled WASM files are missing. Reinstall ruvector or unset the flag.');
|
|
113
|
+
}
|
|
114
|
+
useOnnx = true;
|
|
115
|
+
this.onnxHardRequire = true;
|
|
116
|
+
}
|
|
117
|
+
else {
|
|
118
|
+
// auto: default-on — MiniLM when loadable, loud hash fallback otherwise.
|
|
119
|
+
useOnnx = (config.enableOnnx ?? true) && (0, onnx_embedder_1.isOnnxAvailable)();
|
|
120
|
+
}
|
|
95
121
|
// If ONNX is enabled, use 384 dimensions (MiniLM default)
|
|
96
|
-
const useOnnx = !!(config.enableOnnx && (0, onnx_embedder_1.isOnnxAvailable)());
|
|
97
122
|
const embeddingDim = useOnnx ? 384 : (config.embeddingDim ?? 256);
|
|
98
123
|
this.config = {
|
|
99
124
|
embeddingDim,
|
|
@@ -102,6 +127,7 @@ class IntelligenceEngine {
|
|
|
102
127
|
enableSona: config.enableSona ?? true,
|
|
103
128
|
enableAttention: config.enableAttention ?? true,
|
|
104
129
|
enableOnnx: useOnnx,
|
|
130
|
+
onnxConfig: config.onnxConfig ?? {},
|
|
105
131
|
sonaConfig: config.sonaConfig ?? {},
|
|
106
132
|
storagePath: config.storagePath ?? '',
|
|
107
133
|
learningRate: config.learningRate ?? 0.1,
|
|
@@ -114,9 +140,9 @@ class IntelligenceEngine {
|
|
|
114
140
|
this.agentDb = new agentdb_fast_1.FastAgentDB(this.config.embeddingDim, this.config.maxEpisodes);
|
|
115
141
|
// Initialize ONNX embedder if enabled
|
|
116
142
|
if (this.config.enableOnnx) {
|
|
117
|
-
this.onnxEmbedder = new onnx_embedder_1.OnnxEmbedder();
|
|
143
|
+
this.onnxEmbedder = new onnx_embedder_1.OnnxEmbedder(this.config.onnxConfig);
|
|
118
144
|
// Initialize async (don't block constructor)
|
|
119
|
-
this.initOnnx();
|
|
145
|
+
this.onnxInitPromise = this.initOnnx();
|
|
120
146
|
}
|
|
121
147
|
// Initialize SONA if enabled and available
|
|
122
148
|
if (this.config.enableSona && (0, sona_wrapper_1.isSonaAvailable)()) {
|
|
@@ -144,16 +170,34 @@ class IntelligenceEngine {
|
|
|
144
170
|
}
|
|
145
171
|
async initOnnx() {
|
|
146
172
|
if (!this.onnxEmbedder)
|
|
147
|
-
return;
|
|
173
|
+
return false;
|
|
148
174
|
try {
|
|
149
175
|
await this.onnxEmbedder.init();
|
|
150
176
|
this.onnxReady = true;
|
|
177
|
+
return true;
|
|
151
178
|
}
|
|
152
179
|
catch (e) {
|
|
153
|
-
|
|
180
|
+
// Quiet here; the loud once-per-process notice fires on first
|
|
181
|
+
// fallback USE (ADR-210 D1 / acceptance gate 2).
|
|
182
|
+
this.onnxInitError = e instanceof Error ? e : new Error(String(e));
|
|
154
183
|
this.onnxReady = false;
|
|
184
|
+
return false;
|
|
155
185
|
}
|
|
156
186
|
}
|
|
187
|
+
/**
|
|
188
|
+
* Await lazy ONNX initialization. Resolves true once the model is loaded,
|
|
189
|
+
* false when it could not be (offline / restricted CI) — in which case
|
|
190
|
+
* stats().embedderKind reports 'hash-fallback' (ADR-210 D1).
|
|
191
|
+
*/
|
|
192
|
+
async awaitOnnx() {
|
|
193
|
+
if (!this.onnxInitPromise)
|
|
194
|
+
return false;
|
|
195
|
+
return this.onnxInitPromise;
|
|
196
|
+
}
|
|
197
|
+
/** Why ONNX init failed, or null (ADR-210 D1 observability). */
|
|
198
|
+
getOnnxInitError() {
|
|
199
|
+
return this.onnxInitError;
|
|
200
|
+
}
|
|
157
201
|
async initVectorDb() {
|
|
158
202
|
try {
|
|
159
203
|
const VDB = getVectorDB();
|
|
@@ -185,6 +229,11 @@ class IntelligenceEngine {
|
|
|
185
229
|
*/
|
|
186
230
|
embed(text) {
|
|
187
231
|
const dim = this.config.embeddingDim;
|
|
232
|
+
// ADR-210 D1: ONNX was requested but the model could not load — the hash
|
|
233
|
+
// fallback now serves embeds. Report it loudly, exactly once per process.
|
|
234
|
+
if (this.config.enableOnnx && this.onnxInitError) {
|
|
235
|
+
(0, embedding_provenance_1.warnHashFallbackOnce)(this.onnxInitError.message);
|
|
236
|
+
}
|
|
188
237
|
// Try to use attention-based embedding (best sync quality)
|
|
189
238
|
if (this.attention?.DotProductAttention) {
|
|
190
239
|
try {
|
|
@@ -198,25 +247,81 @@ class IntelligenceEngine {
|
|
|
198
247
|
return this.hashEmbed(text, dim);
|
|
199
248
|
}
|
|
200
249
|
/**
|
|
201
|
-
* Async embedding with ONNX support (recommended for semantic quality)
|
|
250
|
+
* Async embedding with ONNX support (recommended for semantic quality).
|
|
251
|
+
*
|
|
252
|
+
* ADR-210 D1: when ONNX is enabled but the model cannot load, the hash
|
|
253
|
+
* fallback is used and reported (one stderr warning per process, and
|
|
254
|
+
* stats().embedderKind === 'hash-fallback'). Under RUVECTOR_EMBEDDER=minilm
|
|
255
|
+
* the failure is an error instead — no fallback (D5).
|
|
202
256
|
*/
|
|
203
257
|
async embedAsync(text) {
|
|
204
258
|
// Try ONNX first (best semantic quality)
|
|
205
259
|
if (this.onnxEmbedder) {
|
|
206
260
|
try {
|
|
207
261
|
if (!this.onnxReady) {
|
|
208
|
-
await this.
|
|
209
|
-
|
|
262
|
+
const ok = this.onnxInitPromise ? await this.onnxInitPromise : await this.initOnnx();
|
|
263
|
+
if (!ok)
|
|
264
|
+
throw this.onnxInitError ?? new Error('ONNX initialization failed');
|
|
210
265
|
}
|
|
211
266
|
return await this.onnxEmbedder.embed(text);
|
|
212
267
|
}
|
|
213
|
-
catch {
|
|
268
|
+
catch (e) {
|
|
269
|
+
if (this.onnxHardRequire) {
|
|
270
|
+
throw new Error(`RUVECTOR_EMBEDDER=minilm hard-requires the ONNX embedder and fallback is disabled: ${e?.message ?? e}`);
|
|
271
|
+
}
|
|
272
|
+
(0, embedding_provenance_1.warnHashFallbackOnce)(e?.message ?? String(e));
|
|
214
273
|
// Fall through to sync methods
|
|
215
274
|
}
|
|
216
275
|
}
|
|
217
276
|
// Fall back to sync embedding
|
|
218
277
|
return this.embed(text);
|
|
219
278
|
}
|
|
279
|
+
/**
|
|
280
|
+
* Batch embedding for bulk ingest (ADR-210 D3). When the ONNX model is
|
|
281
|
+
* loaded, batches of 32+ texts route through the bundled parallel worker
|
|
282
|
+
* pool (parallel-fp32 — see embedBulk in onnx-embedder.ts for the int8
|
|
283
|
+
* status note); smaller batches use the single-threaded batch path. On
|
|
284
|
+
* fallback, semantics match embedAsync exactly: hash per-item with the
|
|
285
|
+
* loud once-per-process warning, or a hard error under
|
|
286
|
+
* RUVECTOR_EMBEDDER=minilm (D5). Texts are embedded as passages (D4).
|
|
287
|
+
*
|
|
288
|
+
* Callers that start the pool should call shutdownEmbedderPool() when the
|
|
289
|
+
* bulk work is done so worker threads do not keep the process alive.
|
|
290
|
+
*/
|
|
291
|
+
async embedBatchAsync(texts) {
|
|
292
|
+
if (!texts || texts.length === 0)
|
|
293
|
+
return [];
|
|
294
|
+
if (this.onnxEmbedder) {
|
|
295
|
+
try {
|
|
296
|
+
if (!this.onnxReady) {
|
|
297
|
+
const ok = this.onnxInitPromise ? await this.onnxInitPromise : await this.initOnnx();
|
|
298
|
+
if (!ok)
|
|
299
|
+
throw this.onnxInitError ?? new Error('ONNX initialization failed');
|
|
300
|
+
}
|
|
301
|
+
return await (0, onnx_embedder_1.embedBulk)(texts);
|
|
302
|
+
}
|
|
303
|
+
catch (e) {
|
|
304
|
+
if (this.onnxHardRequire) {
|
|
305
|
+
throw new Error(`RUVECTOR_EMBEDDER=minilm hard-requires the ONNX embedder and fallback is disabled: ${e?.message ?? e}`);
|
|
306
|
+
}
|
|
307
|
+
(0, embedding_provenance_1.warnHashFallbackOnce)(e?.message ?? String(e));
|
|
308
|
+
// Fall through to sync methods
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
return texts.map(t => this.embed(t));
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Shut down the bundled bulk-embed worker pool, releasing its threads
|
|
315
|
+
* (ADR-210 D3). Safe to call when the pool was never started.
|
|
316
|
+
*/
|
|
317
|
+
async shutdownEmbedderPool() {
|
|
318
|
+
try {
|
|
319
|
+
await (0, onnx_embedder_1.shutdownParallelEmbedder)();
|
|
320
|
+
}
|
|
321
|
+
catch {
|
|
322
|
+
// Pool teardown is best-effort.
|
|
323
|
+
}
|
|
324
|
+
}
|
|
220
325
|
/**
|
|
221
326
|
* Attention-based embedding using Flash or Multi-head attention
|
|
222
327
|
*/
|
|
@@ -865,12 +970,39 @@ class IntelligenceEngine {
|
|
|
865
970
|
workerTriggers: this.workerTriggerMappings.size,
|
|
866
971
|
attentionEnabled: this.attention !== null,
|
|
867
972
|
onnxEnabled: this.onnxReady,
|
|
973
|
+
embedderKind: this.config.enableOnnx
|
|
974
|
+
? (this.onnxReady ? 'onnx-minilm' : 'hash-fallback')
|
|
975
|
+
: 'hash',
|
|
868
976
|
parallelEnabled: parallelStats.enabled,
|
|
869
977
|
parallelWorkers: parallelStats.workers,
|
|
870
978
|
parallelBusy: parallelStats.busy,
|
|
871
979
|
parallelQueued: parallelStats.queued,
|
|
872
980
|
};
|
|
873
981
|
}
|
|
982
|
+
/**
|
|
983
|
+
* Embedding provenance of vectors embedAsync() would produce right now
|
|
984
|
+
* (ADR-210 D0). Hash fallback embeds are 'hash' even while ONNX is enabled
|
|
985
|
+
* but not ready — provenance records what actually happened, not intent.
|
|
986
|
+
*/
|
|
987
|
+
getActiveProvenance() {
|
|
988
|
+
if (this.onnxReady) {
|
|
989
|
+
return ((0, onnx_embedder_1.getEmbedderProvenance)() ?? {
|
|
990
|
+
embedderKind: 'onnx-minilm',
|
|
991
|
+
modelId: 'all-MiniLM-L6-v2',
|
|
992
|
+
dimension: 384,
|
|
993
|
+
normalize: true,
|
|
994
|
+
prefixPolicy: 'none',
|
|
995
|
+
});
|
|
996
|
+
}
|
|
997
|
+
return {
|
|
998
|
+
embedderKind: 'hash',
|
|
999
|
+
modelId: null,
|
|
1000
|
+
dimension: this.config.embeddingDim,
|
|
1001
|
+
// The engine's hash/attention embedders L2-normalize their output.
|
|
1002
|
+
normalize: true,
|
|
1003
|
+
prefixPolicy: 'none',
|
|
1004
|
+
};
|
|
1005
|
+
}
|
|
874
1006
|
// =========================================================================
|
|
875
1007
|
// Persistence
|
|
876
1008
|
// =========================================================================
|
|
@@ -882,6 +1014,7 @@ class IntelligenceEngine {
|
|
|
882
1014
|
version: '2.0.0',
|
|
883
1015
|
exported: new Date().toISOString(),
|
|
884
1016
|
config: this.config,
|
|
1017
|
+
embeddingProvenance: this.getActiveProvenance(),
|
|
885
1018
|
memories: Array.from(this.memories.values()),
|
|
886
1019
|
routingPatterns: Object.fromEntries(Array.from(this.routingPatterns.entries()).map(([k, v]) => [
|
|
887
1020
|
k,
|
|
@@ -1032,7 +1165,10 @@ function createIntelligenceEngine(config) {
|
|
|
1032
1165
|
return new IntelligenceEngine(config);
|
|
1033
1166
|
}
|
|
1034
1167
|
/**
|
|
1035
|
-
* Create a high-performance engine with all features enabled
|
|
1168
|
+
* Create a high-performance engine with all features enabled.
|
|
1169
|
+
* Note (ADR-210): with default-on ONNX the embedding space is 384-dim; the
|
|
1170
|
+
* 512-dim setting only applies on the hash path (RUVECTOR_EMBEDDER=hash or
|
|
1171
|
+
* ONNX unavailable). SONA dims follow the engine's actual embeddingDim.
|
|
1036
1172
|
*/
|
|
1037
1173
|
function createHighPerformanceEngine() {
|
|
1038
1174
|
return new IntelligenceEngine({
|
|
@@ -1042,7 +1178,6 @@ function createHighPerformanceEngine() {
|
|
|
1042
1178
|
enableSona: true,
|
|
1043
1179
|
enableAttention: true,
|
|
1044
1180
|
sonaConfig: {
|
|
1045
|
-
hiddenDim: 512,
|
|
1046
1181
|
microLoraRank: 2,
|
|
1047
1182
|
baseLoraRank: 16,
|
|
1048
1183
|
patternClusters: 200,
|
|
@@ -1050,7 +1185,8 @@ function createHighPerformanceEngine() {
|
|
|
1050
1185
|
});
|
|
1051
1186
|
}
|
|
1052
1187
|
/**
|
|
1053
|
-
* Create a lightweight engine for fast startup
|
|
1188
|
+
* Create a lightweight engine for fast startup (hash embedder: no model load,
|
|
1189
|
+
* no download — the deterministic no-model path stays available, ADR-210).
|
|
1054
1190
|
*/
|
|
1055
1191
|
function createLightweightEngine() {
|
|
1056
1192
|
return new IntelligenceEngine({
|
|
@@ -1059,6 +1195,7 @@ function createLightweightEngine() {
|
|
|
1059
1195
|
maxEpisodes: 5000,
|
|
1060
1196
|
enableSona: false,
|
|
1061
1197
|
enableAttention: false,
|
|
1198
|
+
enableOnnx: false,
|
|
1062
1199
|
});
|
|
1063
1200
|
}
|
|
1064
1201
|
exports.default = IntelligenceEngine;
|
|
@@ -122,30 +122,35 @@ export class ParallelEmbedder {
|
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
/**
|
|
125
|
-
* Embed many texts
|
|
125
|
+
* Embed many texts across workers. Returns number[][] in input order.
|
|
126
|
+
*
|
|
127
|
+
* Texts are dispatched in bounded chunks (default 8) that workers pull as
|
|
128
|
+
* they finish (work-stealing), rather than one giant shard per worker:
|
|
129
|
+
* a large bulk batch (ADR-210 D3 ingest) would otherwise exceed the
|
|
130
|
+
* per-request timeout (~400ms/text in WASM x hundreds of texts), and a
|
|
131
|
+
* single slow worker would gate the whole batch.
|
|
126
132
|
*/
|
|
127
|
-
async embedBatch(texts) {
|
|
133
|
+
async embedBatch(texts, opts = {}) {
|
|
128
134
|
if (!texts || texts.length === 0) return [];
|
|
129
|
-
const
|
|
130
|
-
const
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
for (let i = 0; i < n; i++) {
|
|
134
|
-
const start = i * shard;
|
|
135
|
-
if (start >= texts.length) break;
|
|
136
|
-
const end = Math.min(texts.length, start + shard);
|
|
137
|
-
starts.push(start);
|
|
138
|
-
tasks.push(this._send(this._workers[i], texts.slice(start, end)));
|
|
135
|
+
const chunkSize = Math.max(1, opts.chunkSize ?? 8);
|
|
136
|
+
const chunks = [];
|
|
137
|
+
for (let start = 0; start < texts.length; start += chunkSize) {
|
|
138
|
+
chunks.push({ start, texts: texts.slice(start, start + chunkSize) });
|
|
139
139
|
}
|
|
140
|
-
const results = await Promise.all(tasks);
|
|
141
140
|
const out = new Array(texts.length);
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
141
|
+
let next = 0;
|
|
142
|
+
const drain = async (worker) => {
|
|
143
|
+
for (;;) {
|
|
144
|
+
const idx = next++;
|
|
145
|
+
if (idx >= chunks.length) return;
|
|
146
|
+
const { start, texts: chunkTexts } = chunks[idx];
|
|
147
|
+
const { dim, count, flat } = await this._send(worker, chunkTexts);
|
|
148
|
+
for (let j = 0; j < count; j++) {
|
|
149
|
+
out[start + j] = Array.from(flat.subarray(j * dim, (j + 1) * dim));
|
|
150
|
+
}
|
|
147
151
|
}
|
|
148
|
-
}
|
|
152
|
+
};
|
|
153
|
+
await Promise.all(this._workers.map(drain));
|
|
149
154
|
return out;
|
|
150
155
|
}
|
|
151
156
|
|
package/dist/core/onnx/loader.js
CHANGED
|
@@ -9,6 +9,9 @@
|
|
|
9
9
|
*/
|
|
10
10
|
export const MODELS = {
|
|
11
11
|
// Sentence Transformers - Small & Fast
|
|
12
|
+
// prefixPolicy / queryPrefix / passagePrefix (ADR-210 D4) encode each
|
|
13
|
+
// model card's query/passage convention: 'none' | 'required' |
|
|
14
|
+
// 'query-recommended'. MiniLM models take NO prefixes.
|
|
12
15
|
'all-MiniLM-L6-v2': {
|
|
13
16
|
name: 'all-MiniLM-L6-v2',
|
|
14
17
|
dimension: 384,
|
|
@@ -17,6 +20,9 @@ export const MODELS = {
|
|
|
17
20
|
description: 'Fast, general-purpose embeddings',
|
|
18
21
|
model: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx',
|
|
19
22
|
tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
|
|
23
|
+
prefixPolicy: 'none',
|
|
24
|
+
queryPrefix: '',
|
|
25
|
+
passagePrefix: '',
|
|
20
26
|
},
|
|
21
27
|
'all-MiniLM-L12-v2': {
|
|
22
28
|
name: 'all-MiniLM-L12-v2',
|
|
@@ -26,9 +32,13 @@ export const MODELS = {
|
|
|
26
32
|
description: 'Better quality, balanced speed',
|
|
27
33
|
model: 'https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/main/onnx/model.onnx',
|
|
28
34
|
tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/main/tokenizer.json',
|
|
35
|
+
prefixPolicy: 'none',
|
|
36
|
+
queryPrefix: '',
|
|
37
|
+
passagePrefix: '',
|
|
29
38
|
},
|
|
30
39
|
|
|
31
|
-
// BGE Models - State of the art
|
|
40
|
+
// BGE Models - State of the art. Query instruction recommended for
|
|
41
|
+
// short-query → long-passage retrieval; passages need no instruction.
|
|
32
42
|
'bge-small-en-v1.5': {
|
|
33
43
|
name: 'bge-small-en-v1.5',
|
|
34
44
|
dimension: 384,
|
|
@@ -37,6 +47,9 @@ export const MODELS = {
|
|
|
37
47
|
description: 'State-of-the-art small model',
|
|
38
48
|
model: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/onnx/model.onnx',
|
|
39
49
|
tokenizer: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.json',
|
|
50
|
+
prefixPolicy: 'query-recommended',
|
|
51
|
+
queryPrefix: 'Represent this sentence for searching relevant passages: ',
|
|
52
|
+
passagePrefix: '',
|
|
40
53
|
},
|
|
41
54
|
'bge-base-en-v1.5': {
|
|
42
55
|
name: 'bge-base-en-v1.5',
|
|
@@ -46,9 +59,13 @@ export const MODELS = {
|
|
|
46
59
|
description: 'Best overall quality',
|
|
47
60
|
model: 'https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/onnx/model.onnx',
|
|
48
61
|
tokenizer: 'https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/tokenizer.json',
|
|
62
|
+
prefixPolicy: 'query-recommended',
|
|
63
|
+
queryPrefix: 'Represent this sentence for searching relevant passages: ',
|
|
64
|
+
passagePrefix: '',
|
|
49
65
|
},
|
|
50
66
|
|
|
51
|
-
// E5 Models - Microsoft
|
|
67
|
+
// E5 Models - Microsoft. The model card REQUIRES 'query: '/'passage: '
|
|
68
|
+
// prefixes; quality degrades without them.
|
|
52
69
|
'e5-small-v2': {
|
|
53
70
|
name: 'e5-small-v2',
|
|
54
71
|
dimension: 384,
|
|
@@ -57,9 +74,12 @@ export const MODELS = {
|
|
|
57
74
|
description: 'Excellent for search & retrieval',
|
|
58
75
|
model: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/onnx/model.onnx',
|
|
59
76
|
tokenizer: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/tokenizer.json',
|
|
77
|
+
prefixPolicy: 'required',
|
|
78
|
+
queryPrefix: 'query: ',
|
|
79
|
+
passagePrefix: 'passage: ',
|
|
60
80
|
},
|
|
61
81
|
|
|
62
|
-
// GTE Models - Alibaba
|
|
82
|
+
// GTE Models - Alibaba (no prefixes documented)
|
|
63
83
|
'gte-small': {
|
|
64
84
|
name: 'gte-small',
|
|
65
85
|
dimension: 384,
|
|
@@ -68,6 +88,9 @@ export const MODELS = {
|
|
|
68
88
|
description: 'Good multilingual support',
|
|
69
89
|
model: 'https://huggingface.co/thenlper/gte-small/resolve/main/onnx/model.onnx',
|
|
70
90
|
tokenizer: 'https://huggingface.co/thenlper/gte-small/resolve/main/tokenizer.json',
|
|
91
|
+
prefixPolicy: 'none',
|
|
92
|
+
queryPrefix: '',
|
|
93
|
+
passagePrefix: '',
|
|
71
94
|
},
|
|
72
95
|
};
|
|
73
96
|
|
|
@@ -100,7 +123,11 @@ export class ModelLoader {
|
|
|
100
123
|
* @returns {Promise<{modelBytes: Uint8Array, tokenizerJson: string, config: object}>}
|
|
101
124
|
*/
|
|
102
125
|
async loadModel(modelName = DEFAULT_MODEL) {
|
|
103
|
-
|
|
126
|
+
// Own-property lookup only: a hostile model name like '__proto__'
|
|
127
|
+
// must be rejected as unknown, not resolve to a prototype member.
|
|
128
|
+
const modelConfig = Object.prototype.hasOwnProperty.call(MODELS, modelName)
|
|
129
|
+
? MODELS[modelName]
|
|
130
|
+
: undefined;
|
|
104
131
|
if (!modelConfig) {
|
|
105
132
|
throw new Error(`Unknown model: ${modelName}. Available: ${Object.keys(MODELS).join(', ')}`);
|
|
106
133
|
}
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
* - Batch embedding support
|
|
14
14
|
* - Optional parallel workers for 3.8x batch speedup
|
|
15
15
|
*/
|
|
16
|
+
import { EmbeddingProvenance } from './embedding-provenance';
|
|
16
17
|
declare global {
|
|
17
18
|
var __ruvector_require: NodeRequire | undefined;
|
|
18
19
|
}
|
|
@@ -57,9 +58,14 @@ export declare function isOnnxAvailable(): boolean;
|
|
|
57
58
|
*/
|
|
58
59
|
export declare function initOnnxEmbedder(config?: OnnxEmbedderConfig): Promise<boolean>;
|
|
59
60
|
/**
|
|
60
|
-
* Generate embedding for text
|
|
61
|
+
* Generate embedding for text. Equivalent to `embedPassage()` (ADR-210 D4):
|
|
62
|
+
* stored/passage text is the default; use `embedQuery()` for search queries.
|
|
61
63
|
*/
|
|
62
64
|
export declare function embed(text: string): Promise<EmbeddingResult>;
|
|
65
|
+
/** Embed a search query, applying the model's registered query prefix (D4). */
|
|
66
|
+
export declare function embedQuery(text: string): Promise<EmbeddingResult>;
|
|
67
|
+
/** Embed a passage/document, applying the model's registered passage prefix (D4). */
|
|
68
|
+
export declare function embedPassage(text: string): Promise<EmbeddingResult>;
|
|
63
69
|
/**
|
|
64
70
|
* Generate embeddings for multiple texts
|
|
65
71
|
* Uses parallel workers automatically for batches >= parallelThreshold
|
|
@@ -94,6 +100,13 @@ export declare function isReady(): boolean;
|
|
|
94
100
|
* https://github.com/ruvnet/RuVector/issues/523.
|
|
95
101
|
*/
|
|
96
102
|
export declare function isOnnxInitialized(): boolean;
|
|
103
|
+
/** Model id of the loaded model, or null before init (ADR-210). */
|
|
104
|
+
export declare function getActiveModelId(): string | null;
|
|
105
|
+
/**
|
|
106
|
+
* Embedding-provenance record (ADR-210 D0) describing vectors produced by the
|
|
107
|
+
* loaded ONNX embedder, or null before the model is initialized.
|
|
108
|
+
*/
|
|
109
|
+
export declare function getEmbedderProvenance(): EmbeddingProvenance | null;
|
|
97
110
|
/**
|
|
98
111
|
* Get embedder stats including SIMD and parallel capabilities
|
|
99
112
|
*/
|
|
@@ -126,13 +139,41 @@ export declare function initParallelEmbedder(numWorkers?: number): Promise<boole
|
|
|
126
139
|
export declare function embedBatchParallel(texts: string[]): Promise<number[][]>;
|
|
127
140
|
/** Number of active pool workers (0 if the pool isn't started). */
|
|
128
141
|
export declare function getParallelWorkerCount(): number;
|
|
142
|
+
/** Batches at or above this size route through the worker pool (ADR-210 D3). */
|
|
143
|
+
export declare const BULK_EMBED_THRESHOLD = 32;
|
|
144
|
+
/**
|
|
145
|
+
* Default bulk-embedding path (ADR-210 D3): batches of `threshold`
|
|
146
|
+
* (default 32) or more texts route through the bundled parallel worker pool
|
|
147
|
+
* — fp32 model bytes shared across workers via SharedArrayBuffer, vectors
|
|
148
|
+
* identical to the single-thread path. Smaller batches, and any batch when
|
|
149
|
+
* pool startup fails (no worker_threads, no SharedArrayBuffer), use the
|
|
150
|
+
* single-threaded batch path with one stderr note.
|
|
151
|
+
*
|
|
152
|
+
* INT8 STATUS (honest gap, ADR-210 D3): the registered int8 variants
|
|
153
|
+
* (QUANTIZED_MODELS in onnx-optimized.ts) cannot run on the bundled WASM
|
|
154
|
+
* runtime today — its graph analyzer rejects quantized MiniLM exports
|
|
155
|
+
* ("Failed analyse for node /Unsqueeze", verified against both
|
|
156
|
+
* Xenova/all-MiniLM-L6-v2 model_quantized.onnx and the official
|
|
157
|
+
* sentence-transformers model_quint8_avx2.onnx exports). Bulk ingest
|
|
158
|
+
* therefore defaults to parallel-fp32; int8 ingest needs a Rust-side
|
|
159
|
+
* runtime upgrade in the ruvector-onnx-embeddings-wasm crate (tracked as
|
|
160
|
+
* an ADR-210 follow-up). Single-query latency keeps fp32 either way.
|
|
161
|
+
*/
|
|
162
|
+
export declare function embedBulk(texts: string[], opts?: {
|
|
163
|
+
threshold?: number;
|
|
164
|
+
}): Promise<number[][]>;
|
|
129
165
|
/** Shut down the bundled worker pool and release its threads. */
|
|
130
166
|
export declare function shutdownParallelEmbedder(): Promise<void>;
|
|
131
167
|
export declare class OnnxEmbedder {
|
|
132
168
|
private config;
|
|
133
169
|
constructor(config?: OnnxEmbedderConfig);
|
|
134
170
|
init(): Promise<boolean>;
|
|
171
|
+
/** Equivalent to embedPassage() — ADR-210 D4. */
|
|
135
172
|
embed(text: string): Promise<number[]>;
|
|
173
|
+
/** Embed a search query with the model's registered query prefix (D4). */
|
|
174
|
+
embedQuery(text: string): Promise<number[]>;
|
|
175
|
+
/** Embed a passage/document with the model's registered passage prefix (D4). */
|
|
176
|
+
embedPassage(text: string): Promise<number[]>;
|
|
136
177
|
embedBatch(texts: string[]): Promise<number[][]>;
|
|
137
178
|
similarity(text1: string, text2: string): Promise<number>;
|
|
138
179
|
get dimension(): number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;
|
|
1
|
+
{"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAMH,OAAO,EACL,mBAAmB,EAKpB,MAAM,wBAAwB,CAAC;AAGhC,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AA2BD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AAkFD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAyGxF;AAyBD;;;GAGG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAElE;AAED,+EAA+E;AAC/E,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAEvE;AAED,qFAAqF;AACrF,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAEzE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA2C5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;;;GAIG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C;AAED,mEAAmE;AACnE,wBAAgB,gBAAgB,IAAI,MAAM,GAAG,IAAI,CAEhD;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,IAAI,mBAAmB,GAAG,IAAI,CAUlE;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAO9C;AAED;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAkBhF;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAK7E;AAED,mEAAmE;AACnE,wBAAgB,sBAAsB,IAAI,MAAM,CAE/C;AAED,gFAAgF;AAChF,eAAO,MAAM,oBAAoB,KAAK,CAAC;AAIvC;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,SAAS,CAC7B,KAAK,EAAE,MAAM,EAAE,EACf,IAAI,GAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAO,GAChC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAqBrB;AAED,iEAAiE;AACjE,wBAAsB,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC,CAK9D;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAI9B,iDAAiD;IAC3C,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAK5C,0EAA0E;IACpE,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKjD,gFAAgF;IAC1E,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAK7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
|