ruvector 0.2.29 → 0.2.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ exports.createLightweightEngine = createLightweightEngine;
19
19
  const agentdb_fast_1 = require("./agentdb-fast");
20
20
  const sona_wrapper_1 = require("./sona-wrapper");
21
21
  const onnx_embedder_1 = require("./onnx-embedder");
22
+ const embedding_provenance_1 = require("./embedding-provenance");
22
23
  const parallel_intelligence_1 = require("./parallel-intelligence");
23
24
  // ============================================================================
24
25
  // Lazy Loading
@@ -76,6 +77,10 @@ class IntelligenceEngine {
76
77
  this.attention = null;
77
78
  this.onnxEmbedder = null;
78
79
  this.onnxReady = false;
80
+ this.onnxInitPromise = null;
81
+ this.onnxInitError = null;
82
+ /** RUVECTOR_EMBEDDER=minilm: fail rather than fall back (ADR-210 D5). */
83
+ this.onnxHardRequire = false;
79
84
  this.parallel = null;
80
85
  // In-memory data structures
81
86
  this.memories = new Map();
@@ -92,8 +97,28 @@ class IntelligenceEngine {
92
97
  this.sessionStart = Date.now();
93
98
  this.learningEnabled = true;
94
99
  this.episodeBatchQueue = [];
100
+ // ADR-210 D1/D5: ONNX semantic embeddings are the default. Environment
101
+ // rollout flags override config: RUVECTOR_EMBEDDER=auto|minilm|hash wins
102
+ // over RUVECTOR_ONNX=0|1, which wins over config.enableOnnx.
103
+ const selection = (0, embedding_provenance_1.resolveEmbedderSelection)();
104
+ let useOnnx;
105
+ if (selection === 'hash') {
106
+ useOnnx = false;
107
+ }
108
+ else if (selection === 'minilm') {
109
+ // Hard-require: init failure is an error, never a silent fallback.
110
+ if (!(0, onnx_embedder_1.isOnnxAvailable)()) {
111
+ throw new Error('RUVECTOR_EMBEDDER=minilm (or RUVECTOR_ONNX=1) hard-requires the ONNX embedder, ' +
112
+ 'but the bundled WASM files are missing. Reinstall ruvector or unset the flag.');
113
+ }
114
+ useOnnx = true;
115
+ this.onnxHardRequire = true;
116
+ }
117
+ else {
118
+ // auto: default-on — MiniLM when loadable, loud hash fallback otherwise.
119
+ useOnnx = (config.enableOnnx ?? true) && (0, onnx_embedder_1.isOnnxAvailable)();
120
+ }
95
121
  // If ONNX is enabled, use 384 dimensions (MiniLM default)
96
- const useOnnx = !!(config.enableOnnx && (0, onnx_embedder_1.isOnnxAvailable)());
97
122
  const embeddingDim = useOnnx ? 384 : (config.embeddingDim ?? 256);
98
123
  this.config = {
99
124
  embeddingDim,
@@ -102,6 +127,7 @@ class IntelligenceEngine {
102
127
  enableSona: config.enableSona ?? true,
103
128
  enableAttention: config.enableAttention ?? true,
104
129
  enableOnnx: useOnnx,
130
+ onnxConfig: config.onnxConfig ?? {},
105
131
  sonaConfig: config.sonaConfig ?? {},
106
132
  storagePath: config.storagePath ?? '',
107
133
  learningRate: config.learningRate ?? 0.1,
@@ -114,9 +140,9 @@ class IntelligenceEngine {
114
140
  this.agentDb = new agentdb_fast_1.FastAgentDB(this.config.embeddingDim, this.config.maxEpisodes);
115
141
  // Initialize ONNX embedder if enabled
116
142
  if (this.config.enableOnnx) {
117
- this.onnxEmbedder = new onnx_embedder_1.OnnxEmbedder();
143
+ this.onnxEmbedder = new onnx_embedder_1.OnnxEmbedder(this.config.onnxConfig);
118
144
  // Initialize async (don't block constructor)
119
- this.initOnnx();
145
+ this.onnxInitPromise = this.initOnnx();
120
146
  }
121
147
  // Initialize SONA if enabled and available
122
148
  if (this.config.enableSona && (0, sona_wrapper_1.isSonaAvailable)()) {
@@ -144,16 +170,34 @@ class IntelligenceEngine {
144
170
  }
145
171
  async initOnnx() {
146
172
  if (!this.onnxEmbedder)
147
- return;
173
+ return false;
148
174
  try {
149
175
  await this.onnxEmbedder.init();
150
176
  this.onnxReady = true;
177
+ return true;
151
178
  }
152
179
  catch (e) {
153
- console.warn('ONNX initialization failed, using fallback embeddings');
180
+ // Quiet here; the loud once-per-process notice fires on first
181
+ // fallback USE (ADR-210 D1 / acceptance gate 2).
182
+ this.onnxInitError = e instanceof Error ? e : new Error(String(e));
154
183
  this.onnxReady = false;
184
+ return false;
155
185
  }
156
186
  }
187
+ /**
188
+ * Await lazy ONNX initialization. Resolves true once the model is loaded,
189
+ * false when it could not be (offline / restricted CI) — in which case
190
+ * stats().embedderKind reports 'hash-fallback' (ADR-210 D1).
191
+ */
192
+ async awaitOnnx() {
193
+ if (!this.onnxInitPromise)
194
+ return false;
195
+ return this.onnxInitPromise;
196
+ }
197
+ /** Why ONNX init failed, or null (ADR-210 D1 observability). */
198
+ getOnnxInitError() {
199
+ return this.onnxInitError;
200
+ }
157
201
  async initVectorDb() {
158
202
  try {
159
203
  const VDB = getVectorDB();
@@ -185,6 +229,11 @@ class IntelligenceEngine {
185
229
  */
186
230
  embed(text) {
187
231
  const dim = this.config.embeddingDim;
232
+ // ADR-210 D1: ONNX was requested but the model could not load — the hash
233
+ // fallback now serves embeds. Report it loudly, exactly once per process.
234
+ if (this.config.enableOnnx && this.onnxInitError) {
235
+ (0, embedding_provenance_1.warnHashFallbackOnce)(this.onnxInitError.message);
236
+ }
188
237
  // Try to use attention-based embedding (best sync quality)
189
238
  if (this.attention?.DotProductAttention) {
190
239
  try {
@@ -198,25 +247,81 @@ class IntelligenceEngine {
198
247
  return this.hashEmbed(text, dim);
199
248
  }
200
249
  /**
201
- * Async embedding with ONNX support (recommended for semantic quality)
250
+ * Async embedding with ONNX support (recommended for semantic quality).
251
+ *
252
+ * ADR-210 D1: when ONNX is enabled but the model cannot load, the hash
253
+ * fallback is used and reported (one stderr warning per process, and
254
+ * stats().embedderKind === 'hash-fallback'). Under RUVECTOR_EMBEDDER=minilm
255
+ * the failure is an error instead — no fallback (D5).
202
256
  */
203
257
  async embedAsync(text) {
204
258
  // Try ONNX first (best semantic quality)
205
259
  if (this.onnxEmbedder) {
206
260
  try {
207
261
  if (!this.onnxReady) {
208
- await this.onnxEmbedder.init();
209
- this.onnxReady = true;
262
+ const ok = this.onnxInitPromise ? await this.onnxInitPromise : await this.initOnnx();
263
+ if (!ok)
264
+ throw this.onnxInitError ?? new Error('ONNX initialization failed');
210
265
  }
211
266
  return await this.onnxEmbedder.embed(text);
212
267
  }
213
- catch {
268
+ catch (e) {
269
+ if (this.onnxHardRequire) {
270
+ throw new Error(`RUVECTOR_EMBEDDER=minilm hard-requires the ONNX embedder and fallback is disabled: ${e?.message ?? e}`);
271
+ }
272
+ (0, embedding_provenance_1.warnHashFallbackOnce)(e?.message ?? String(e));
214
273
  // Fall through to sync methods
215
274
  }
216
275
  }
217
276
  // Fall back to sync embedding
218
277
  return this.embed(text);
219
278
  }
279
+ /**
280
+ * Batch embedding for bulk ingest (ADR-210 D3). When the ONNX model is
281
+ * loaded, batches of 32+ texts route through the bundled parallel worker
282
+ * pool (parallel-fp32 — see embedBulk in onnx-embedder.ts for the int8
283
+ * status note); smaller batches use the single-threaded batch path. On
284
+ * fallback, semantics match embedAsync exactly: hash per-item with the
285
+ * loud once-per-process warning, or a hard error under
286
+ * RUVECTOR_EMBEDDER=minilm (D5). Texts are embedded as passages (D4).
287
+ *
288
+ * Callers that start the pool should call shutdownEmbedderPool() when the
289
+ * bulk work is done so worker threads do not keep the process alive.
290
+ */
291
+ async embedBatchAsync(texts) {
292
+ if (!texts || texts.length === 0)
293
+ return [];
294
+ if (this.onnxEmbedder) {
295
+ try {
296
+ if (!this.onnxReady) {
297
+ const ok = this.onnxInitPromise ? await this.onnxInitPromise : await this.initOnnx();
298
+ if (!ok)
299
+ throw this.onnxInitError ?? new Error('ONNX initialization failed');
300
+ }
301
+ return await (0, onnx_embedder_1.embedBulk)(texts);
302
+ }
303
+ catch (e) {
304
+ if (this.onnxHardRequire) {
305
+ throw new Error(`RUVECTOR_EMBEDDER=minilm hard-requires the ONNX embedder and fallback is disabled: ${e?.message ?? e}`);
306
+ }
307
+ (0, embedding_provenance_1.warnHashFallbackOnce)(e?.message ?? String(e));
308
+ // Fall through to sync methods
309
+ }
310
+ }
311
+ return texts.map(t => this.embed(t));
312
+ }
313
+ /**
314
+ * Shut down the bundled bulk-embed worker pool, releasing its threads
315
+ * (ADR-210 D3). Safe to call when the pool was never started.
316
+ */
317
+ async shutdownEmbedderPool() {
318
+ try {
319
+ await (0, onnx_embedder_1.shutdownParallelEmbedder)();
320
+ }
321
+ catch {
322
+ // Pool teardown is best-effort.
323
+ }
324
+ }
220
325
  /**
221
326
  * Attention-based embedding using Flash or Multi-head attention
222
327
  */
@@ -865,12 +970,39 @@ class IntelligenceEngine {
865
970
  workerTriggers: this.workerTriggerMappings.size,
866
971
  attentionEnabled: this.attention !== null,
867
972
  onnxEnabled: this.onnxReady,
973
+ embedderKind: this.config.enableOnnx
974
+ ? (this.onnxReady ? 'onnx-minilm' : 'hash-fallback')
975
+ : 'hash',
868
976
  parallelEnabled: parallelStats.enabled,
869
977
  parallelWorkers: parallelStats.workers,
870
978
  parallelBusy: parallelStats.busy,
871
979
  parallelQueued: parallelStats.queued,
872
980
  };
873
981
  }
982
+ /**
983
+ * Embedding provenance of vectors embedAsync() would produce right now
984
+ * (ADR-210 D0). Hash fallback embeds are 'hash' even while ONNX is enabled
985
+ * but not ready — provenance records what actually happened, not intent.
986
+ */
987
+ getActiveProvenance() {
988
+ if (this.onnxReady) {
989
+ return ((0, onnx_embedder_1.getEmbedderProvenance)() ?? {
990
+ embedderKind: 'onnx-minilm',
991
+ modelId: 'all-MiniLM-L6-v2',
992
+ dimension: 384,
993
+ normalize: true,
994
+ prefixPolicy: 'none',
995
+ });
996
+ }
997
+ return {
998
+ embedderKind: 'hash',
999
+ modelId: null,
1000
+ dimension: this.config.embeddingDim,
1001
+ // The engine's hash/attention embedders L2-normalize their output.
1002
+ normalize: true,
1003
+ prefixPolicy: 'none',
1004
+ };
1005
+ }
874
1006
  // =========================================================================
875
1007
  // Persistence
876
1008
  // =========================================================================
@@ -882,6 +1014,7 @@ class IntelligenceEngine {
882
1014
  version: '2.0.0',
883
1015
  exported: new Date().toISOString(),
884
1016
  config: this.config,
1017
+ embeddingProvenance: this.getActiveProvenance(),
885
1018
  memories: Array.from(this.memories.values()),
886
1019
  routingPatterns: Object.fromEntries(Array.from(this.routingPatterns.entries()).map(([k, v]) => [
887
1020
  k,
@@ -1032,7 +1165,10 @@ function createIntelligenceEngine(config) {
1032
1165
  return new IntelligenceEngine(config);
1033
1166
  }
1034
1167
  /**
1035
- * Create a high-performance engine with all features enabled
1168
+ * Create a high-performance engine with all features enabled.
1169
+ * Note (ADR-210): with default-on ONNX the embedding space is 384-dim; the
1170
+ * 512-dim setting only applies on the hash path (RUVECTOR_EMBEDDER=hash or
1171
+ * ONNX unavailable). SONA dims follow the engine's actual embeddingDim.
1036
1172
  */
1037
1173
  function createHighPerformanceEngine() {
1038
1174
  return new IntelligenceEngine({
@@ -1042,7 +1178,6 @@ function createHighPerformanceEngine() {
1042
1178
  enableSona: true,
1043
1179
  enableAttention: true,
1044
1180
  sonaConfig: {
1045
- hiddenDim: 512,
1046
1181
  microLoraRank: 2,
1047
1182
  baseLoraRank: 16,
1048
1183
  patternClusters: 200,
@@ -1050,7 +1185,8 @@ function createHighPerformanceEngine() {
1050
1185
  });
1051
1186
  }
1052
1187
  /**
1053
- * Create a lightweight engine for fast startup
1188
+ * Create a lightweight engine for fast startup (hash embedder: no model load,
1189
+ * no download — the deterministic no-model path stays available, ADR-210).
1054
1190
  */
1055
1191
  function createLightweightEngine() {
1056
1192
  return new IntelligenceEngine({
@@ -1059,6 +1195,7 @@ function createLightweightEngine() {
1059
1195
  maxEpisodes: 5000,
1060
1196
  enableSona: false,
1061
1197
  enableAttention: false,
1198
+ enableOnnx: false,
1062
1199
  });
1063
1200
  }
1064
1201
  exports.default = IntelligenceEngine;
@@ -122,30 +122,35 @@ export class ParallelEmbedder {
122
122
  }
123
123
 
124
124
  /**
125
- * Embed many texts, sharded across workers. Returns number[][] in input order.
125
+ * Embed many texts across workers. Returns number[][] in input order.
126
+ *
127
+ * Texts are dispatched in bounded chunks (default 8) that workers pull as
128
+ * they finish (work-stealing), rather than one giant shard per worker:
129
+ * a large bulk batch (ADR-210 D3 ingest) would otherwise exceed the
130
+ * per-request timeout (~400ms/text in WASM x hundreds of texts), and a
131
+ * single slow worker would gate the whole batch.
126
132
  */
127
- async embedBatch(texts) {
133
+ async embedBatch(texts, opts = {}) {
128
134
  if (!texts || texts.length === 0) return [];
129
- const n = this._workers.length;
130
- const shard = Math.ceil(texts.length / n);
131
- const tasks = [];
132
- const starts = [];
133
- for (let i = 0; i < n; i++) {
134
- const start = i * shard;
135
- if (start >= texts.length) break;
136
- const end = Math.min(texts.length, start + shard);
137
- starts.push(start);
138
- tasks.push(this._send(this._workers[i], texts.slice(start, end)));
135
+ const chunkSize = Math.max(1, opts.chunkSize ?? 8);
136
+ const chunks = [];
137
+ for (let start = 0; start < texts.length; start += chunkSize) {
138
+ chunks.push({ start, texts: texts.slice(start, start + chunkSize) });
139
139
  }
140
- const results = await Promise.all(tasks);
141
140
  const out = new Array(texts.length);
142
- for (let r = 0; r < results.length; r++) {
143
- const { dim, count, flat } = results[r];
144
- const start = starts[r];
145
- for (let j = 0; j < count; j++) {
146
- out[start + j] = Array.from(flat.subarray(j * dim, (j + 1) * dim));
141
+ let next = 0;
142
+ const drain = async (worker) => {
143
+ for (;;) {
144
+ const idx = next++;
145
+ if (idx >= chunks.length) return;
146
+ const { start, texts: chunkTexts } = chunks[idx];
147
+ const { dim, count, flat } = await this._send(worker, chunkTexts);
148
+ for (let j = 0; j < count; j++) {
149
+ out[start + j] = Array.from(flat.subarray(j * dim, (j + 1) * dim));
150
+ }
147
151
  }
148
- }
152
+ };
153
+ await Promise.all(this._workers.map(drain));
149
154
  return out;
150
155
  }
151
156
 
@@ -9,6 +9,9 @@
9
9
  */
10
10
  export const MODELS = {
11
11
  // Sentence Transformers - Small & Fast
12
+ // prefixPolicy / queryPrefix / passagePrefix (ADR-210 D4) encode each
13
+ // model card's query/passage convention: 'none' | 'required' |
14
+ // 'query-recommended'. MiniLM models take NO prefixes.
12
15
  'all-MiniLM-L6-v2': {
13
16
  name: 'all-MiniLM-L6-v2',
14
17
  dimension: 384,
@@ -17,6 +20,9 @@ export const MODELS = {
17
20
  description: 'Fast, general-purpose embeddings',
18
21
  model: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx',
19
22
  tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json',
23
+ prefixPolicy: 'none',
24
+ queryPrefix: '',
25
+ passagePrefix: '',
20
26
  },
21
27
  'all-MiniLM-L12-v2': {
22
28
  name: 'all-MiniLM-L12-v2',
@@ -26,9 +32,13 @@ export const MODELS = {
26
32
  description: 'Better quality, balanced speed',
27
33
  model: 'https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/main/onnx/model.onnx',
28
34
  tokenizer: 'https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2/resolve/main/tokenizer.json',
35
+ prefixPolicy: 'none',
36
+ queryPrefix: '',
37
+ passagePrefix: '',
29
38
  },
30
39
 
31
- // BGE Models - State of the art
40
+ // BGE Models - State of the art. Query instruction recommended for
41
+ // short-query → long-passage retrieval; passages need no instruction.
32
42
  'bge-small-en-v1.5': {
33
43
  name: 'bge-small-en-v1.5',
34
44
  dimension: 384,
@@ -37,6 +47,9 @@ export const MODELS = {
37
47
  description: 'State-of-the-art small model',
38
48
  model: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/onnx/model.onnx',
39
49
  tokenizer: 'https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.json',
50
+ prefixPolicy: 'query-recommended',
51
+ queryPrefix: 'Represent this sentence for searching relevant passages: ',
52
+ passagePrefix: '',
40
53
  },
41
54
  'bge-base-en-v1.5': {
42
55
  name: 'bge-base-en-v1.5',
@@ -46,9 +59,13 @@ export const MODELS = {
46
59
  description: 'Best overall quality',
47
60
  model: 'https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/onnx/model.onnx',
48
61
  tokenizer: 'https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/tokenizer.json',
62
+ prefixPolicy: 'query-recommended',
63
+ queryPrefix: 'Represent this sentence for searching relevant passages: ',
64
+ passagePrefix: '',
49
65
  },
50
66
 
51
- // E5 Models - Microsoft
67
+ // E5 Models - Microsoft. The model card REQUIRES 'query: '/'passage: '
68
+ // prefixes; quality degrades without them.
52
69
  'e5-small-v2': {
53
70
  name: 'e5-small-v2',
54
71
  dimension: 384,
@@ -57,9 +74,12 @@ export const MODELS = {
57
74
  description: 'Excellent for search & retrieval',
58
75
  model: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/onnx/model.onnx',
59
76
  tokenizer: 'https://huggingface.co/intfloat/e5-small-v2/resolve/main/tokenizer.json',
77
+ prefixPolicy: 'required',
78
+ queryPrefix: 'query: ',
79
+ passagePrefix: 'passage: ',
60
80
  },
61
81
 
62
- // GTE Models - Alibaba
82
+ // GTE Models - Alibaba (no prefixes documented)
63
83
  'gte-small': {
64
84
  name: 'gte-small',
65
85
  dimension: 384,
@@ -68,6 +88,9 @@ export const MODELS = {
68
88
  description: 'Good multilingual support',
69
89
  model: 'https://huggingface.co/thenlper/gte-small/resolve/main/onnx/model.onnx',
70
90
  tokenizer: 'https://huggingface.co/thenlper/gte-small/resolve/main/tokenizer.json',
91
+ prefixPolicy: 'none',
92
+ queryPrefix: '',
93
+ passagePrefix: '',
71
94
  },
72
95
  };
73
96
 
@@ -100,7 +123,11 @@ export class ModelLoader {
100
123
  * @returns {Promise<{modelBytes: Uint8Array, tokenizerJson: string, config: object}>}
101
124
  */
102
125
  async loadModel(modelName = DEFAULT_MODEL) {
103
- const modelConfig = MODELS[modelName];
126
+ // Own-property lookup only: a hostile model name like '__proto__'
127
+ // must be rejected as unknown, not resolve to a prototype member.
128
+ const modelConfig = Object.prototype.hasOwnProperty.call(MODELS, modelName)
129
+ ? MODELS[modelName]
130
+ : undefined;
104
131
  if (!modelConfig) {
105
132
  throw new Error(`Unknown model: ${modelName}. Available: ${Object.keys(MODELS).join(', ')}`);
106
133
  }
@@ -13,6 +13,7 @@
13
13
  * - Batch embedding support
14
14
  * - Optional parallel workers for 3.8x batch speedup
15
15
  */
16
+ import { EmbeddingProvenance } from './embedding-provenance';
16
17
  declare global {
17
18
  var __ruvector_require: NodeRequire | undefined;
18
19
  }
@@ -57,9 +58,14 @@ export declare function isOnnxAvailable(): boolean;
57
58
  */
58
59
  export declare function initOnnxEmbedder(config?: OnnxEmbedderConfig): Promise<boolean>;
59
60
  /**
60
- * Generate embedding for text
61
+ * Generate embedding for text. Equivalent to `embedPassage()` (ADR-210 D4):
62
+ * stored/passage text is the default; use `embedQuery()` for search queries.
61
63
  */
62
64
  export declare function embed(text: string): Promise<EmbeddingResult>;
65
+ /** Embed a search query, applying the model's registered query prefix (D4). */
66
+ export declare function embedQuery(text: string): Promise<EmbeddingResult>;
67
+ /** Embed a passage/document, applying the model's registered passage prefix (D4). */
68
+ export declare function embedPassage(text: string): Promise<EmbeddingResult>;
63
69
  /**
64
70
  * Generate embeddings for multiple texts
65
71
  * Uses parallel workers automatically for batches >= parallelThreshold
@@ -94,6 +100,13 @@ export declare function isReady(): boolean;
94
100
  * https://github.com/ruvnet/RuVector/issues/523.
95
101
  */
96
102
  export declare function isOnnxInitialized(): boolean;
103
+ /** Model id of the loaded model, or null before init (ADR-210). */
104
+ export declare function getActiveModelId(): string | null;
105
+ /**
106
+ * Embedding-provenance record (ADR-210 D0) describing vectors produced by the
107
+ * loaded ONNX embedder, or null before the model is initialized.
108
+ */
109
+ export declare function getEmbedderProvenance(): EmbeddingProvenance | null;
97
110
  /**
98
111
  * Get embedder stats including SIMD and parallel capabilities
99
112
  */
@@ -126,13 +139,41 @@ export declare function initParallelEmbedder(numWorkers?: number): Promise<boole
126
139
  export declare function embedBatchParallel(texts: string[]): Promise<number[][]>;
127
140
  /** Number of active pool workers (0 if the pool isn't started). */
128
141
  export declare function getParallelWorkerCount(): number;
142
+ /** Batches at or above this size route through the worker pool (ADR-210 D3). */
143
+ export declare const BULK_EMBED_THRESHOLD = 32;
144
+ /**
145
+ * Default bulk-embedding path (ADR-210 D3): batches of `threshold`
146
+ * (default 32) or more texts route through the bundled parallel worker pool
147
+ * — fp32 model bytes shared across workers via SharedArrayBuffer, vectors
148
+ * identical to the single-thread path. Smaller batches, and any batch when
149
+ * pool startup fails (no worker_threads, no SharedArrayBuffer), use the
150
+ * single-threaded batch path with one stderr note.
151
+ *
152
+ * INT8 STATUS (honest gap, ADR-210 D3): the registered int8 variants
153
+ * (QUANTIZED_MODELS in onnx-optimized.ts) cannot run on the bundled WASM
154
+ * runtime today — its graph analyzer rejects quantized MiniLM exports
155
+ * ("Failed analyse for node /Unsqueeze", verified against both
156
+ * Xenova/all-MiniLM-L6-v2 model_quantized.onnx and the official
157
+ * sentence-transformers model_quint8_avx2.onnx exports). Bulk ingest
158
+ * therefore defaults to parallel-fp32; int8 ingest needs a Rust-side
159
+ * runtime upgrade in the ruvector-onnx-embeddings-wasm crate (tracked as
160
+ * an ADR-210 follow-up). Single-query latency keeps fp32 either way.
161
+ */
162
+ export declare function embedBulk(texts: string[], opts?: {
163
+ threshold?: number;
164
+ }): Promise<number[][]>;
129
165
  /** Shut down the bundled worker pool and release its threads. */
130
166
  export declare function shutdownParallelEmbedder(): Promise<void>;
131
167
  export declare class OnnxEmbedder {
132
168
  private config;
133
169
  constructor(config?: OnnxEmbedderConfig);
134
170
  init(): Promise<boolean>;
171
+ /** Equivalent to embedPassage() — ADR-210 D4. */
135
172
  embed(text: string): Promise<number[]>;
173
+ /** Embed a search query with the model's registered query prefix (D4). */
174
+ embedQuery(text: string): Promise<number[]>;
175
+ /** Embed a passage/document with the model's registered passage prefix (D4). */
176
+ embedPassage(text: string): Promise<number[]>;
136
177
  embedBatch(texts: string[]): Promise<number[][]>;
137
178
  similarity(text1: string, text2: string): Promise<number>;
138
179
  get dimension(): number;
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAQH,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AAsBD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AAkFD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAuGxF;AAED;;GAEG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAiBlE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CAwC5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;;;GAIG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAO9C;AAED;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAkBhF;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAG7E;AAED,mEAAmE;AACnE,wBAAgB,sBAAsB,IAAI,MAAM,CAE/C;AAED,iEAAiE;AACjE,wBAAsB,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC,CAK9D;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAIxB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKtC,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}
1
+ {"version":3,"file":"onnx-embedder.d.ts","sourceRoot":"","sources":["../../src/core/onnx-embedder.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAMH,OAAO,EACL,mBAAmB,EAKpB,MAAM,wBAAwB,CAAC;AAGhC,OAAO,CAAC,MAAM,CAAC;IAEb,IAAI,kBAAkB,EAAE,WAAW,GAAG,SAAS,CAAC;CACjD;AAuBD,MAAM,WAAW,kBAAkB;IACjC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,OAAO,CAAC;IACpB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB;;;;;OAKG;IACH,cAAc,CAAC,EAAE,OAAO,GAAG,MAAM,CAAC;IAClC,wDAAwD;IACxD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,iEAAiE;IACjE,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAMD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,MAAM,EAAE,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;CAChB;AA2BD;;;;;;;;GAQG;AACH,wBAAgB,eAAe,IAAI,OAAO,CAOzC;AAkFD;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,MAAM,GAAE,kBAAuB,GAAG,OAAO,CAAC,OAAO,CAAC,CAyGxF;AAyBD;;;GAGG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAElE;AAED,+EAA+E;AAC/E,wBAAsB,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAEvE;AAED,qFAAqF;AACrF,wBAAsB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAEzE;AAED;;;GAGG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,eAAe,EAAE,CAAC,CA2C5E;AAED;;GAEG;AACH,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAaxF;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAiBjE;AAED;;GAEG;AACH,wBAAgB,YAAY,IAAI,MAAM,CAErC;AAED;;;;GAIG;AACH,wBAAgB,OAAO,IAAI,OAAO,CAEjC;AAED;;;;;;;;;GASG;AACH,wBAAgB,iBAAiB,IAAI,OAAO,CAE3C;AAED,mEAAmE;AACnE,wBAAgB,gBAAgB,IAAI,MAAM,GAAG,IAAI,CAEhD;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,IAAI,mBAAmB,GAAG,IAAI,CAUlE;AAED;;GAEG;AACH,wBAAgB,QAAQ,IAAI;IAC1B,KAAK,EAAE,OAAO,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,OAAO,CAAC;IACd,QAAQ,EAAE,OAAO,CAAC;IAClB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B,CAUA;AAED;;GAEG;AACH,wBAAsB,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC,CAO9C;AAED;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,CAkBhF;AAED;;;GAGG;AACH,wBAAsB,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAK7E;AAED,mEAAmE;AACnE,wBAAgB,sBAAsB,IAAI,MAAM,CAE/C;AAED,gFAAgF;AAChF,eAAO,MAAM,oBAAoB,KAAK,CAAC;AAIvC;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,SAAS,CAC7B,KAAK,EAAE,MAAM,EAAE,EACf,IAAI,GAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAA;CAAO,GAChC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAqBrB;AAED,iEAAiE;AACjE,wBAAsB,wBAAwB,IAAI,OAAO,CAAC,IAAI,CAAC,CAK9D;AAGD,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAqB;gBAEvB,MAAM,GAAE,kBAAuB;IAIrC,IAAI,IAAI,OAAO,CAAC,OAAO,CAAC;IAI9B,iDAAiD;IAC3C,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAK5C,0EAA0E;IACpE,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAKjD,gFAAgF;IAC1E,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAK7C,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKhD,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IAK/D,IAAI,SAAS,IAAI,MAAM,CAEtB;IAED,IAAI,KAAK,IAAI,OAAO,CAEnB;CACF;AAED,eAAe,YAAY,CAAC"}