ruvector 0.2.29 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +624 -88
- package/bin/mcp-server.js +198 -17
- package/dist/core/embedding-provenance.d.ts +145 -0
- package/dist/core/embedding-provenance.d.ts.map +1 -0
- package/dist/core/embedding-provenance.js +258 -0
- package/dist/core/index.d.ts +1 -0
- package/dist/core/index.d.ts.map +1 -1
- package/dist/core/index.js +1 -0
- package/dist/core/intelligence-engine.d.ts +65 -4
- package/dist/core/intelligence-engine.d.ts.map +1 -1
- package/dist/core/intelligence-engine.js +149 -12
- package/dist/core/onnx/bundled-parallel.mjs +24 -19
- package/dist/core/onnx/loader.js +31 -4
- package/dist/core/onnx-embedder.d.ts +42 -1
- package/dist/core/onnx-embedder.d.ts.map +1 -1
- package/dist/core/onnx-embedder.js +116 -11
- package/dist/core/onnx-optimized.d.ts +8 -1
- package/dist/core/onnx-optimized.d.ts.map +1 -1
- package/dist/core/onnx-optimized.js +41 -6
- package/package.json +5 -4
package/bin/mcp-server.js
CHANGED
|
@@ -107,6 +107,18 @@ try {
|
|
|
107
107
|
// IntelligenceEngine not available
|
|
108
108
|
}
|
|
109
109
|
|
|
110
|
+
// ADR-210 D0: shared embedding-provenance invariant — the SAME dist module
|
|
111
|
+
// bin/cli.js uses (loadProvenance there), so the MCP server's writes to
|
|
112
|
+
// .ruvector/intelligence.json enforce the same contract instead of bypassing
|
|
113
|
+
// it. When dist is missing, enforcement degrades exactly like the CLI:
|
|
114
|
+
// pre-ADR-210 behavior.
|
|
115
|
+
let provenanceMod = null;
|
|
116
|
+
try {
|
|
117
|
+
provenanceMod = require('../dist/core/embedding-provenance.js');
|
|
118
|
+
} catch (e) {
|
|
119
|
+
provenanceMod = null;
|
|
120
|
+
}
|
|
121
|
+
|
|
110
122
|
// Intelligence class with full RuVector stack support
|
|
111
123
|
class Intelligence {
|
|
112
124
|
constructor() {
|
|
@@ -169,12 +181,126 @@ class Intelligence {
|
|
|
169
181
|
load() {
|
|
170
182
|
try {
|
|
171
183
|
if (fs.existsSync(this.intelPath)) {
|
|
172
|
-
|
|
184
|
+
const data = JSON.parse(fs.readFileSync(this.intelPath, 'utf-8'));
|
|
185
|
+
// Untrusted on-disk input (ADR-210 security pass): a corrupted or
|
|
186
|
+
// hand-edited store must not crash array/object consumers.
|
|
187
|
+
if (data && typeof data === 'object' && !Array.isArray(data)) {
|
|
188
|
+
if (!Array.isArray(data.memories)) data.memories = [];
|
|
189
|
+
return data;
|
|
190
|
+
}
|
|
173
191
|
}
|
|
174
192
|
} catch {}
|
|
175
193
|
return { patterns: {}, memories: [], trajectories: [], errors: {}, agents: {}, edges: [] };
|
|
176
194
|
}
|
|
177
195
|
|
|
196
|
+
// ==========================================================================
|
|
197
|
+
// ADR-210 D0: embedding-provenance invariant for intelligence.json writes.
|
|
198
|
+
// Same contract bin/cli.js enforces (this server previously bypassed it):
|
|
199
|
+
// mismatched vector writes are refused naming both sides, legacy stores
|
|
200
|
+
// (vectors without provenance) are read-only until `ruvector hooks reembed`,
|
|
201
|
+
// and degraded reads warn once per process.
|
|
202
|
+
// ==========================================================================
|
|
203
|
+
|
|
204
|
+
storedProvenance() {
|
|
205
|
+
const raw = this.data.embeddingProvenance || null;
|
|
206
|
+
if (!provenanceMod) return raw;
|
|
207
|
+
return provenanceMod.sanitizeProvenance(raw);
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
vectorMemoryCount() {
|
|
211
|
+
const mems = Array.isArray(this.data.memories) ? this.data.memories : [];
|
|
212
|
+
return mems.filter(m => m && Array.isArray(m.embedding) && m.embedding.length > 0).length;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/** Store predates ADR-210 (has vectors but no provenance record). */
|
|
216
|
+
isLegacyVectorStore() {
|
|
217
|
+
return !this.storedProvenance() && this.vectorMemoryCount() > 0;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/** Legacy default: hash, dimension inferred from the stored vectors. */
|
|
221
|
+
inferredLegacyProvenance() {
|
|
222
|
+
const mems = Array.isArray(this.data.memories) ? this.data.memories : [];
|
|
223
|
+
const first = mems.find(m => m && Array.isArray(m.embedding) && m.embedding.length > 0);
|
|
224
|
+
const dim = first ? first.embedding.length : 256;
|
|
225
|
+
if (provenanceMod) return provenanceMod.legacyHashProvenance(dim);
|
|
226
|
+
return { embedderKind: 'hash', modelId: null, dimension: dim, normalize: false, prefixPolicy: 'none' };
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/** Provenance of the embedder that just produced `embedding`. */
|
|
230
|
+
activeWriteProvenance(embedding) {
|
|
231
|
+
if (this.engine && typeof this.engine.getActiveProvenance === 'function') {
|
|
232
|
+
try { return this.engine.getActiveProvenance(); } catch {}
|
|
233
|
+
}
|
|
234
|
+
return { embedderKind: 'hash', modelId: null, dimension: embedding.length, normalize: true, prefixPolicy: 'none' };
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* Gate a vector write (throws on refusal). Stamps provenance on the first
|
|
239
|
+
* write to a fresh store; refuses mismatched writes naming both sides;
|
|
240
|
+
* legacy stores are read-only until re-embedded.
|
|
241
|
+
*/
|
|
242
|
+
checkVectorWrite(active) {
|
|
243
|
+
if (!provenanceMod || !active) return; // enforcement needs the dist module
|
|
244
|
+
if (this.isLegacyVectorStore()) {
|
|
245
|
+
const legacy = this.inferredLegacyProvenance();
|
|
246
|
+
const err = new Error(
|
|
247
|
+
`Vector store ${this.intelPath} predates embedding provenance (ADR-210) and is read-only for vector writes. ` +
|
|
248
|
+
`Stored vectors are treated as ${provenanceMod.describeProvenance(legacy)}; the active embedder is ` +
|
|
249
|
+
`${provenanceMod.describeProvenance(active)}. Run 'ruvector hooks reembed' to re-embed and unlock it.`
|
|
250
|
+
);
|
|
251
|
+
err.code = 'ERR_LEGACY_STORE_READONLY';
|
|
252
|
+
throw err;
|
|
253
|
+
}
|
|
254
|
+
const stored = this.storedProvenance();
|
|
255
|
+
if (!stored) {
|
|
256
|
+
this.data.embeddingProvenance = active;
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
provenanceMod.assertProvenanceMatch(stored, active, this.intelPath);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
/**
|
|
263
|
+
* Non-throwing write gate honoring RUVECTOR_REEMBED (D5): refuse (default)
|
|
264
|
+
* rethrows; warn skips the write with one stderr warning per process.
|
|
265
|
+
*/
|
|
266
|
+
guardVectorWrite(active) {
|
|
267
|
+
try {
|
|
268
|
+
this.checkVectorWrite(active);
|
|
269
|
+
return { ok: true };
|
|
270
|
+
} catch (e) {
|
|
271
|
+
const policy = provenanceMod ? provenanceMod.resolveReembedPolicy() : 'refuse';
|
|
272
|
+
if (policy === 'warn') {
|
|
273
|
+
if (!Intelligence._reembedWarned) {
|
|
274
|
+
Intelligence._reembedWarned = true;
|
|
275
|
+
console.error(`ruvector: ${e.message} (RUVECTOR_REEMBED=warn: store stays read-only, write skipped)`);
|
|
276
|
+
}
|
|
277
|
+
return { ok: false, skipped: true, error: e.message };
|
|
278
|
+
}
|
|
279
|
+
if (policy === 'auto') e.message += ` (RUVECTOR_REEMBED=auto: run 'ruvector hooks reembed' — in-place re-embedding needs the CLI)`;
|
|
280
|
+
throw e;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* ADR-210: reads stay allowed on legacy/mismatched stores, but similarity
|
|
286
|
+
* against differently-embedded vectors is meaningless — say so once.
|
|
287
|
+
*/
|
|
288
|
+
warnRecallProvenance(active) {
|
|
289
|
+
if (!provenanceMod || !active || Intelligence._recallWarned) return;
|
|
290
|
+
let stored = this.storedProvenance();
|
|
291
|
+
if (!stored && this.isLegacyVectorStore()) stored = this.inferredLegacyProvenance();
|
|
292
|
+
if (!stored) return;
|
|
293
|
+
const mismatches = provenanceMod.compareProvenance(stored, active);
|
|
294
|
+
if (mismatches.length > 0) {
|
|
295
|
+
Intelligence._recallWarned = true;
|
|
296
|
+
console.error(
|
|
297
|
+
`ruvector: recall quality degraded — stored vectors are ${provenanceMod.describeProvenance(stored)} ` +
|
|
298
|
+
`but the query was embedded as ${provenanceMod.describeProvenance(active)} (differs on: ${mismatches.join(', ')}). ` +
|
|
299
|
+
`Run 'ruvector hooks reembed' to fix.`
|
|
300
|
+
);
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
178
304
|
save() {
|
|
179
305
|
const dir = path.dirname(this.intelPath);
|
|
180
306
|
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
@@ -207,6 +333,8 @@ class Intelligence {
|
|
|
207
333
|
sonaEnabled: engineStats.sonaEnabled,
|
|
208
334
|
attentionEnabled: engineStats.attentionEnabled,
|
|
209
335
|
embeddingDim: engineStats.memoryDimensions,
|
|
336
|
+
// ADR-210 D1: which embedder actually serves embeds right now
|
|
337
|
+
embedderKind: engineStats.embedderKind,
|
|
210
338
|
totalMemories: engineStats.totalMemories,
|
|
211
339
|
totalEpisodes: engineStats.totalEpisodes,
|
|
212
340
|
trajectoriesRecorded: engineStats.trajectoriesRecorded,
|
|
@@ -248,19 +376,31 @@ class Intelligence {
|
|
|
248
376
|
async remember(content, type = 'general') {
|
|
249
377
|
// Use engine if available (VectorDB storage)
|
|
250
378
|
if (this.engine) {
|
|
379
|
+
let entry = null;
|
|
251
380
|
try {
|
|
252
|
-
|
|
381
|
+
entry = await this.engine.remember(content, type);
|
|
382
|
+
} catch {}
|
|
383
|
+
if (entry) {
|
|
384
|
+
// ADR-210 D0: validate provenance BEFORE persisting. Refusals
|
|
385
|
+
// propagate as errors (the tool handler reports them) — no silent
|
|
386
|
+
// fallback into a mixed store.
|
|
387
|
+
const guard = this.guardVectorWrite(this.activeWriteProvenance(entry.embedding));
|
|
388
|
+
if (!guard.ok) return { stored: false, skipped: true, reason: guard.error };
|
|
253
389
|
// Also store in legacy format
|
|
254
|
-
this.data.memories = this.data.memories
|
|
390
|
+
this.data.memories = Array.isArray(this.data.memories) ? this.data.memories : [];
|
|
255
391
|
this.data.memories.push({ content, type, created: new Date().toISOString(), embedding: entry.embedding });
|
|
256
392
|
this.save();
|
|
257
393
|
return { stored: true, total: this.data.memories.length, engineStored: true };
|
|
258
|
-
}
|
|
394
|
+
}
|
|
259
395
|
}
|
|
260
396
|
|
|
261
397
|
// Fallback
|
|
262
|
-
|
|
263
|
-
|
|
398
|
+
const embedding = this.embed(content);
|
|
399
|
+
// ADR-210 D0: same gate on the fallback hash path.
|
|
400
|
+
const guard = this.guardVectorWrite({ embedderKind: 'hash', modelId: null, dimension: embedding.length, normalize: true, prefixPolicy: 'none' });
|
|
401
|
+
if (!guard.ok) return { stored: false, skipped: true, reason: guard.error };
|
|
402
|
+
this.data.memories = Array.isArray(this.data.memories) ? this.data.memories : [];
|
|
403
|
+
this.data.memories.push({ content, type, created: new Date().toISOString(), embedding });
|
|
264
404
|
this.save();
|
|
265
405
|
return { stored: true, total: this.data.memories.length };
|
|
266
406
|
}
|
|
@@ -270,6 +410,11 @@ class Intelligence {
|
|
|
270
410
|
if (this.engine) {
|
|
271
411
|
try {
|
|
272
412
|
const results = await this.engine.recall(query, topK);
|
|
413
|
+
// ADR-210: after recall the engine's lazy init has settled, so the
|
|
414
|
+
// active provenance reflects the embedder that served the query.
|
|
415
|
+
if (typeof this.engine.getActiveProvenance === 'function') {
|
|
416
|
+
this.warnRecallProvenance(this.engine.getActiveProvenance());
|
|
417
|
+
}
|
|
273
418
|
return results.map(r => ({
|
|
274
419
|
content: r.content,
|
|
275
420
|
type: r.type,
|
|
@@ -282,10 +427,12 @@ class Intelligence {
|
|
|
282
427
|
|
|
283
428
|
// Fallback: brute-force
|
|
284
429
|
const queryEmbed = this.embed(query);
|
|
285
|
-
|
|
430
|
+
this.warnRecallProvenance({ embedderKind: 'hash', modelId: null, dimension: queryEmbed.length, normalize: true, prefixPolicy: 'none' });
|
|
431
|
+
const mems = Array.isArray(this.data.memories) ? this.data.memories : [];
|
|
432
|
+
const scored = mems.map((m, i) => ({
|
|
286
433
|
...m,
|
|
287
434
|
index: i,
|
|
288
|
-
score: this.similarity(queryEmbed, m.embedding)
|
|
435
|
+
score: this.similarity(queryEmbed, m && m.embedding)
|
|
289
436
|
}));
|
|
290
437
|
return scored.sort((a, b) => b.score - a.score).slice(0, topK);
|
|
291
438
|
}
|
|
@@ -363,7 +510,7 @@ class Intelligence {
|
|
|
363
510
|
const server = new Server(
|
|
364
511
|
{
|
|
365
512
|
name: 'ruvector',
|
|
366
|
-
version: '0.2.
|
|
513
|
+
version: '0.2.30',
|
|
367
514
|
},
|
|
368
515
|
{
|
|
369
516
|
capabilities: {
|
|
@@ -1586,12 +1733,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1586
1733
|
}
|
|
1587
1734
|
|
|
1588
1735
|
case 'hooks_remember': {
|
|
1736
|
+
// ADR-210 D0: provenance refusals throw and surface via the catch-all
|
|
1737
|
+
// as isError; RUVECTOR_REEMBED=warn skips return { stored: false }.
|
|
1589
1738
|
const result = await intel.remember(args.content, args.type || 'general');
|
|
1590
1739
|
return {
|
|
1591
1740
|
content: [{
|
|
1592
1741
|
type: 'text',
|
|
1593
1742
|
text: JSON.stringify({
|
|
1594
|
-
success:
|
|
1743
|
+
success: result.stored !== false,
|
|
1595
1744
|
...result
|
|
1596
1745
|
}, null, 2)
|
|
1597
1746
|
}]
|
|
@@ -1818,6 +1967,32 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1818
1967
|
}
|
|
1819
1968
|
}
|
|
1820
1969
|
if (data.memories && Array.isArray(data.memories)) {
|
|
1970
|
+
// ADR-210 D0: imported memories carrying vectors are a vector
|
|
1971
|
+
// write — enforce the store's embedding provenance (this was a
|
|
1972
|
+
// bypass before wave 2).
|
|
1973
|
+
const withVectors = data.memories.filter(m => m && Array.isArray(m.embedding) && m.embedding.length > 0);
|
|
1974
|
+
if (withVectors.length > 0 && provenanceMod) {
|
|
1975
|
+
if (intel.isLegacyVectorStore()) {
|
|
1976
|
+
const err = new Error(
|
|
1977
|
+
`Vector store ${intel.intelPath} predates embedding provenance (ADR-210) and is read-only for vector writes. ` +
|
|
1978
|
+
`Run 'ruvector hooks reembed' before importing vector memories.`
|
|
1979
|
+
);
|
|
1980
|
+
err.code = 'ERR_LEGACY_STORE_READONLY';
|
|
1981
|
+
throw err;
|
|
1982
|
+
}
|
|
1983
|
+
const stored = intel.storedProvenance();
|
|
1984
|
+
if (stored) {
|
|
1985
|
+
const bad = withVectors.find(m => m.embedding.length !== stored.dimension);
|
|
1986
|
+
if (bad) {
|
|
1987
|
+
throw new Error(
|
|
1988
|
+
`Import refused (ADR-210): ${intel.intelPath} records embedding provenance ` +
|
|
1989
|
+
`${provenanceMod.describeProvenance(stored)}, but imported memories contain ` +
|
|
1990
|
+
`${bad.embedding.length}-dimensional vectors with undeclared provenance. ` +
|
|
1991
|
+
`Mixed stores are never created — re-embed the data or the store.`
|
|
1992
|
+
);
|
|
1993
|
+
}
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1821
1996
|
if (merge) {
|
|
1822
1997
|
intel.data.memories = [...(intel.data.memories || []), ...data.memories];
|
|
1823
1998
|
} else {
|
|
@@ -1848,7 +2023,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1848
2023
|
content: [{
|
|
1849
2024
|
type: 'text',
|
|
1850
2025
|
text: JSON.stringify({ success: false, error: e.message }, null, 2)
|
|
1851
|
-
}]
|
|
2026
|
+
}],
|
|
2027
|
+
isError: true
|
|
1852
2028
|
};
|
|
1853
2029
|
}
|
|
1854
2030
|
}
|
|
@@ -3837,18 +4013,23 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
3837
4013
|
|
|
3838
4014
|
// Start server
|
|
3839
4015
|
async function main() {
|
|
3840
|
-
const transport = new StdioServerTransport();
|
|
3841
|
-
await server.connect(transport);
|
|
3842
|
-
console.error('RuVector MCP server running on stdio');
|
|
3843
|
-
|
|
3844
4016
|
// Exit cleanly when the parent process closes the stdio pipe or sends a
|
|
3845
4017
|
// termination signal. Without these handlers, the MCP server can survive
|
|
3846
4018
|
// the parent's death (e.g. when the client is killed with SIGKILL) and
|
|
3847
4019
|
// accumulate as an orphaned process under PPID=1, consuming RSS for the
|
|
3848
|
-
// lifetime of the user session.
|
|
3849
|
-
|
|
4020
|
+
// lifetime of the user session. Registered BEFORE the (async) transport
|
|
4021
|
+
// connect: a signal arriving during startup previously hit the default
|
|
4022
|
+
// handler and died with a non-zero code — a race that made the
|
|
4023
|
+
// sigterm-cleanup suite flaky (SIGTERM and SIGINT failed alternately on
|
|
4024
|
+
// CI depending on which spawn won the 2s ready-wait).
|
|
3850
4025
|
process.on('SIGINT', () => process.exit(0));
|
|
3851
4026
|
process.on('SIGTERM', () => process.exit(0));
|
|
4027
|
+
|
|
4028
|
+
const transport = new StdioServerTransport();
|
|
4029
|
+
await server.connect(transport);
|
|
4030
|
+
console.error('RuVector MCP server running on stdio');
|
|
4031
|
+
|
|
4032
|
+
process.stdin.on('end', () => process.exit(0));
|
|
3852
4033
|
}
|
|
3853
4034
|
|
|
3854
4035
|
main().catch(console.error);
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding provenance — ADR-210 D0 cross-cutting invariant.
|
|
3
|
+
*
|
|
4
|
+
* Every persisted vector store written through the embedding path records
|
|
5
|
+
* `{ embedderKind, modelId, dimension, normalize, prefixPolicy }`. Inserts
|
|
6
|
+
* whose provenance does not match the store's recorded provenance are
|
|
7
|
+
* REFUSED (clear error naming both sides), never coerced. Stores that
|
|
8
|
+
* predate provenance metadata are treated as legacy hash stores and open
|
|
9
|
+
* read-only for vector writes until re-embedded (`ruvector hooks reembed`).
|
|
10
|
+
*
|
|
11
|
+
* This module is the single source of truth for:
|
|
12
|
+
* - the provenance record type + compare/refuse logic (D0),
|
|
13
|
+
* - legacy-default derivation for pre-ADR-210 stores (D0),
|
|
14
|
+
* - per-model query/passage prefix policies (D4),
|
|
15
|
+
* - rollout flag resolution: RUVECTOR_EMBEDDER / RUVECTOR_ONNX /
|
|
16
|
+
* RUVECTOR_REEMBED (D5),
|
|
17
|
+
* - the once-per-process loud hash-fallback warning (D1).
|
|
18
|
+
*/
|
|
19
|
+
export type PrefixPolicy = 'none' | 'required' | 'query-recommended';
|
|
20
|
+
export type EmbedTextKind = 'query' | 'passage';
|
|
21
|
+
/** Embedder identity classes. `modelId` carries the exact model. */
|
|
22
|
+
export type EmbedderKind = 'onnx-minilm' | 'onnx' | 'hash';
|
|
23
|
+
export interface EmbeddingProvenance {
|
|
24
|
+
/** Embedder family that produced the vectors. */
|
|
25
|
+
embedderKind: EmbedderKind | string;
|
|
26
|
+
/** Exact model id (e.g. 'all-MiniLM-L6-v2'); null for the hash embedder. */
|
|
27
|
+
modelId: string | null;
|
|
28
|
+
/** Vector dimension. */
|
|
29
|
+
dimension: number;
|
|
30
|
+
/** Whether vectors were L2-normalized at embed time. */
|
|
31
|
+
normalize: boolean;
|
|
32
|
+
/** Prefix convention the texts were embedded under (D4). */
|
|
33
|
+
prefixPolicy: PrefixPolicy;
|
|
34
|
+
}
|
|
35
|
+
export type EmbedderSelection = 'auto' | 'minilm' | 'hash';
|
|
36
|
+
export type ReembedPolicy = 'refuse' | 'warn' | 'auto';
|
|
37
|
+
export interface ModelPrefixSpec {
|
|
38
|
+
prefixPolicy: PrefixPolicy;
|
|
39
|
+
queryPrefix: string;
|
|
40
|
+
passagePrefix: string;
|
|
41
|
+
}
|
|
42
|
+
/** BGE en v1.5 documented query instruction (short query → long passage). */
|
|
43
|
+
export declare const BGE_QUERY_INSTRUCTION = "Represent this sentence for searching relevant passages: ";
|
|
44
|
+
/**
|
|
45
|
+
* Prefix conventions per model card:
|
|
46
|
+
* - all-MiniLM-L6-v2 / L12: general semantic search, NO prefixes.
|
|
47
|
+
* - e5-small-v2: REQUIRES 'query: ' / 'passage: ' (quality degrades without).
|
|
48
|
+
* - bge-small/base-en-v1.5: query instruction recommended for retrieval;
|
|
49
|
+
* passages need no instruction.
|
|
50
|
+
* - gte-small: no prefixes documented.
|
|
51
|
+
*/
|
|
52
|
+
export declare const MODEL_PREFIXES: Record<string, ModelPrefixSpec>;
|
|
53
|
+
/**
|
|
54
|
+
* Prefix spec for a model; unknown models get the no-prefix policy.
|
|
55
|
+
* Own-property lookup only: a hostile model id like '__proto__' or
|
|
56
|
+
* 'constructor' must resolve to NO_PREFIX, not to a prototype member
|
|
57
|
+
* (ADR-210 security pass).
|
|
58
|
+
*/
|
|
59
|
+
export declare function getModelPrefixSpec(modelId: string | null | undefined): ModelPrefixSpec;
|
|
60
|
+
/**
|
|
61
|
+
* Pure prefix application (D4): the exact text handed to the tokenizer for a
|
|
62
|
+
* query/passage embed of `text` under `modelId`'s registered policy.
|
|
63
|
+
* MiniLM applies NO prefix on either entry point (acceptance gates 6–7).
|
|
64
|
+
*/
|
|
65
|
+
export declare function prefixText(modelId: string | null | undefined, kind: EmbedTextKind, text: string): string;
|
|
66
|
+
/** Embedder family for an ONNX model id. */
|
|
67
|
+
export declare function embedderKindForModel(modelId: string | null | undefined): EmbedderKind;
|
|
68
|
+
/**
|
|
69
|
+
* Legacy default for stores that predate provenance metadata: hash-embedded,
|
|
70
|
+
* un-normalized as far as we can prove, no prefixes. Such stores open
|
|
71
|
+
* READ-ONLY for vector writes until re-embedded.
|
|
72
|
+
*/
|
|
73
|
+
export declare function legacyHashProvenance(dimension?: number): EmbeddingProvenance;
|
|
74
|
+
/** Human-readable one-liner for error messages. */
|
|
75
|
+
export declare function describeProvenance(p: EmbeddingProvenance): string;
|
|
76
|
+
/** Field names on which two provenance records disagree (empty = match). */
|
|
77
|
+
export declare function compareProvenance(a: EmbeddingProvenance, b: EmbeddingProvenance): string[];
|
|
78
|
+
/** Upper bound accepted for a provenance dimension read from disk. */
|
|
79
|
+
export declare const MAX_PROVENANCE_DIMENSION = 65536;
|
|
80
|
+
/**
|
|
81
|
+
* Sanitize a provenance record read from DISK (a `.meta.json` sidecar or
|
|
82
|
+
* `intelligence.json`). On-disk JSON is untrusted input: a malformed or
|
|
83
|
+
* adversarial record must never crash the caller. Anything that is not a
|
|
84
|
+
* plausibly-valid record is treated as ABSENT (returns null), which callers
|
|
85
|
+
* already handle as the no-provenance / legacy path — conservative for a
|
|
86
|
+
* corrupted stamp (the store degrades to read-only for vector writes rather
|
|
87
|
+
* than accepting writes under a fabricated identity).
|
|
88
|
+
*/
|
|
89
|
+
export declare function sanitizeProvenance(value: unknown): EmbeddingProvenance | null;
|
|
90
|
+
/** Thrown when an insert's provenance does not match the store's (D0). */
|
|
91
|
+
export declare class ProvenanceMismatchError extends Error {
|
|
92
|
+
code: string;
|
|
93
|
+
store: EmbeddingProvenance;
|
|
94
|
+
active: EmbeddingProvenance;
|
|
95
|
+
mismatches: string[];
|
|
96
|
+
constructor(store: EmbeddingProvenance, active: EmbeddingProvenance, mismatches: string[], storeName: string);
|
|
97
|
+
}
|
|
98
|
+
/** Refuse mismatched inserts with an error naming both sides (D0). */
|
|
99
|
+
export declare function assertProvenanceMatch(store: EmbeddingProvenance, active: EmbeddingProvenance, storeName?: string): void;
|
|
100
|
+
/**
|
|
101
|
+
* Resolve RUVECTOR_EMBEDDER / RUVECTOR_ONNX.
|
|
102
|
+
* Precedence: RUVECTOR_EMBEDDER wins when both are set; RUVECTOR_ONNX=0 is
|
|
103
|
+
* shorthand for `hash`, =1 for `minilm`. Unrecognized values fall back to
|
|
104
|
+
* 'auto' (MiniLM when loadable, loud hash fallback otherwise).
|
|
105
|
+
*/
|
|
106
|
+
export declare function resolveEmbedderSelection(env?: NodeJS.ProcessEnv): EmbedderSelection;
|
|
107
|
+
/**
|
|
108
|
+
* Resolve RUVECTOR_REEMBED: what happens when opening a store whose
|
|
109
|
+
* provenance mismatches the active embedder.
|
|
110
|
+
* refuse (default) — error;
|
|
111
|
+
* warn — open read-only with a single warning;
|
|
112
|
+
* auto — re-embed in place when source text exists, refuse otherwise.
|
|
113
|
+
*/
|
|
114
|
+
export declare function resolveReembedPolicy(env?: NodeJS.ProcessEnv): ReembedPolicy;
|
|
115
|
+
/**
|
|
116
|
+
* Emit exactly ONE stderr warning per process the first time the hash
|
|
117
|
+
* fallback serves an embed that the ONNX embedder was supposed to handle
|
|
118
|
+
* (acceptance gate 2). Returns true when the warning was emitted by this call.
|
|
119
|
+
*/
|
|
120
|
+
export declare function warnHashFallbackOnce(reason?: string): boolean;
|
|
121
|
+
/** Whether the once-per-process fallback warning has fired. */
|
|
122
|
+
export declare function hashFallbackWarned(): boolean;
|
|
123
|
+
/** Test hook: reset the once-per-process warning latch. */
|
|
124
|
+
export declare function resetHashFallbackWarningForTests(): void;
|
|
125
|
+
declare const _default: {
|
|
126
|
+
MODEL_PREFIXES: Record<string, ModelPrefixSpec>;
|
|
127
|
+
BGE_QUERY_INSTRUCTION: string;
|
|
128
|
+
getModelPrefixSpec: typeof getModelPrefixSpec;
|
|
129
|
+
prefixText: typeof prefixText;
|
|
130
|
+
embedderKindForModel: typeof embedderKindForModel;
|
|
131
|
+
legacyHashProvenance: typeof legacyHashProvenance;
|
|
132
|
+
describeProvenance: typeof describeProvenance;
|
|
133
|
+
compareProvenance: typeof compareProvenance;
|
|
134
|
+
sanitizeProvenance: typeof sanitizeProvenance;
|
|
135
|
+
MAX_PROVENANCE_DIMENSION: number;
|
|
136
|
+
ProvenanceMismatchError: typeof ProvenanceMismatchError;
|
|
137
|
+
assertProvenanceMatch: typeof assertProvenanceMatch;
|
|
138
|
+
resolveEmbedderSelection: typeof resolveEmbedderSelection;
|
|
139
|
+
resolveReembedPolicy: typeof resolveReembedPolicy;
|
|
140
|
+
warnHashFallbackOnce: typeof warnHashFallbackOnce;
|
|
141
|
+
hashFallbackWarned: typeof hashFallbackWarned;
|
|
142
|
+
resetHashFallbackWarningForTests: typeof resetHashFallbackWarningForTests;
|
|
143
|
+
};
|
|
144
|
+
export default _default;
|
|
145
|
+
//# sourceMappingURL=embedding-provenance.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding-provenance.d.ts","sourceRoot":"","sources":["../../src/core/embedding-provenance.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,MAAM,YAAY,GAAG,MAAM,GAAG,UAAU,GAAG,mBAAmB,CAAC;AACrE,MAAM,MAAM,aAAa,GAAG,OAAO,GAAG,SAAS,CAAC;AAEhD,oEAAoE;AACpE,MAAM,MAAM,YAAY,GAAG,aAAa,GAAG,MAAM,GAAG,MAAM,CAAC;AAE3D,MAAM,WAAW,mBAAmB;IAClC,iDAAiD;IACjD,YAAY,EAAE,YAAY,GAAG,MAAM,CAAC;IACpC,4EAA4E;IAC5E,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,wBAAwB;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,wDAAwD;IACxD,SAAS,EAAE,OAAO,CAAC;IACnB,4DAA4D;IAC5D,YAAY,EAAE,YAAY,CAAC;CAC5B;AAED,MAAM,MAAM,iBAAiB,GAAG,MAAM,GAAG,QAAQ,GAAG,MAAM,CAAC;AAC3D,MAAM,MAAM,aAAa,GAAG,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;AAMvD,MAAM,WAAW,eAAe;IAC9B,YAAY,EAAE,YAAY,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,aAAa,EAAE,MAAM,CAAC;CACvB;AAID,6EAA6E;AAC7E,eAAO,MAAM,qBAAqB,8DAC2B,CAAC;AAE9D;;;;;;;GAOG;AACH,eAAO,MAAM,cAAc,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAO1D,CAAC;AAEF;;;;;GAKG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,eAAe,CAKtF;AAED;;;;GAIG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,EAAE,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,GAAG,MAAM,CAIxG;AAED,4CAA4C;AAC5C,wBAAgB,oBAAoB,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,YAAY,CAErF;AAMD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,SAAS,GAAE,MAAY,GAAG,mBAAmB,CAEjF;AAED,mDAAmD;AACnD,wBAAgB,kBAAkB,CAAC,CAAC,EAAE,mBAAmB,GAAG,MAAM,CAGjE;AAED,4EAA4E;AAC5E,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,mBAAmB,EAAE,CAAC,EAAE,mBAAmB,GAAG,MAAM,EAAE,CAQ1F;AAED,sEAAsE;AACtE,eAAO,MAAM,wBAAwB,QAAQ,CAAC;AAI9C;;;;;;;;GAQG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,OAAO,GAAG,mBAAmB,GAAG,IAAI,CAoC7E;AAED,0EAA0E;AAC1E,qBAAa,uBAAwB,SAAQ,KAAK;IAChD,IAAI,SAA8B;IAClC,KAAK,EAAE,mBAAmB,CAAC;IAC3B,MAAM,EAAE,mBAAmB,CAAC;IAC5B,UAAU,EAAE,MAAM,EAAE,CAAC;gBAET,KAAK,EAAE,mBAAmB,EAAE,MAAM,EAAE,mBAAmB,EAAE,UAAU,EAAE,MAAM,EAAE,EAAE,SAAS,EAAE,MAAM;CAa7G;AAED,sEAAsE;AACtE,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,mBAAmB,EAC1B,MAAM,EAAE,mBAAmB,EAC3B,SAAS,GAAE,MAAuB,GACjC,IAAI,CAKN;AAMD;;;;;GAKG;AACH,wBAAgB,wBAAwB,CAAC,GAAG,GAAE,MAAM,CAAC,UAAwB,GAAG,iBAAiB,CAOhG;AAED;;;;;;GAMG;AACH,wBAAgB,oBAAoB,CAAC,GAAG,GAAE,MAAM,CAAC,UAAwB,GAAG,aAAa,CAIxF;AAQD;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAW7D;AAED,+DAA+D;AAC/D,wBAAgB,kBAAkB,IAAI,OAAO,CAE5C;AAED,2DAA2D;AAC3D,wBAAgB,gCAAgC,IAAI,IAAI,CAEvD;;;;;;;;;;;;;;;;;;;;AAED,wBAkBE"}
|