agentic-flow 2.0.12-fix.8 → 2.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/.tsbuildinfo +1 -1
- package/dist/billing/cli.js +0 -0
- package/dist/cli-proxy.js +0 -0
- package/dist/reasoningbank/utils/embeddings.d.ts +6 -0
- package/dist/reasoningbank/utils/embeddings.d.ts.map +1 -1
- package/dist/reasoningbank/utils/embeddings.js +43 -9
- package/dist/reasoningbank/utils/embeddings.js.map +1 -1
- package/dist/router/providers/onnx-local-optimized.d.ts +10 -1
- package/dist/router/providers/onnx-local-optimized.d.ts.map +1 -1
- package/dist/router/providers/onnx-local-optimized.js +10 -9
- package/dist/router/providers/onnx-local-optimized.js.map +1 -1
- package/dist/router/providers/onnx-local.d.ts +9 -0
- package/dist/router/providers/onnx-local.d.ts.map +1 -1
- package/dist/router/providers/onnx-local.js +22 -7
- package/dist/router/providers/onnx-local.js.map +1 -1
- package/dist/transport/index.d.ts +0 -1
- package/dist/transport/index.d.ts.map +1 -1
- package/dist/transport/index.js +0 -1
- package/dist/transport/index.js.map +1 -1
- package/package.json +4 -5
- package/wasm/reasoningbank/reasoningbank_wasm_bg.js +28 -28
- package/wasm/reasoningbank/reasoningbank_wasm_bg.wasm +0 -0
- package/wasm/reasoningbank/reasoningbank_wasm_bg.wasm.d.ts +2 -2
- package/dist/transport/quic-loader.d.ts +0 -217
- package/dist/transport/quic-loader.d.ts.map +0 -1
- package/dist/transport/quic-loader.js +0 -412
- package/dist/transport/quic-loader.js.map +0 -1
package/dist/billing/cli.js
CHANGED
|
File without changes
|
package/dist/cli-proxy.js
CHANGED
|
File without changes
|
|
@@ -1,6 +1,12 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Embedding generation for semantic similarity
|
|
3
3
|
* Uses local transformers.js - no API key required!
|
|
4
|
+
*
|
|
5
|
+
* `@xenova/transformers` is an OPTIONAL dependency. The module is loaded
|
|
6
|
+
* dynamically inside `initializeEmbeddings()` so the rest of this file is
|
|
7
|
+
* importable even when transformers.js is absent (e.g. when consumers
|
|
8
|
+
* pass `npm install --omit=optional`). Code paths that don't call
|
|
9
|
+
* `computeEmbedding()` continue to work without ever loading the module.
|
|
4
10
|
*/
|
|
5
11
|
/**
|
|
6
12
|
* Compute embedding for text using local model
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../../src/reasoningbank/utils/embeddings.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"embeddings.d.ts","sourceRoot":"","sources":["../../../src/reasoningbank/utils/embeddings.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAyFH;;GAEG;AACH,wBAAsB,gBAAgB,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAuE1E;AAED;;GAEG;AACH,wBAAsB,qBAAqB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,YAAY,EAAE,CAAC,CAEpF;AAED;;GAEG;AACH,wBAAgB,sBAAsB,IAAI,MAAM,CAE/C;AA+CD;;;GAGG;AACH,wBAAgB,mBAAmB,IAAI,IAAI,CAO1C"}
|
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Embedding generation for semantic similarity
|
|
3
3
|
* Uses local transformers.js - no API key required!
|
|
4
|
+
*
|
|
5
|
+
* `@xenova/transformers` is an OPTIONAL dependency. The module is loaded
|
|
6
|
+
* dynamically inside `initializeEmbeddings()` so the rest of this file is
|
|
7
|
+
* importable even when transformers.js is absent (e.g. when consumers
|
|
8
|
+
* pass `npm install --omit=optional`). Code paths that don't call
|
|
9
|
+
* `computeEmbedding()` continue to work without ever loading the module.
|
|
4
10
|
*/
|
|
5
|
-
import { pipeline, env } from '@xenova/transformers';
|
|
6
11
|
import { loadConfig } from './config.js';
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
|
|
10
|
-
|
|
12
|
+
// Cached references resolved at first call to initializeEmbeddings(). Types
|
|
13
|
+
// are imported as `type-only` so TypeScript can typecheck the file without
|
|
14
|
+
// requiring @xenova/transformers to be installed at build time — the actual
|
|
15
|
+
// runtime import is dynamic below.
|
|
16
|
+
let pipeline = null;
|
|
17
|
+
let env = null;
|
|
11
18
|
let embeddingPipeline = null;
|
|
12
19
|
let initializationPromise = null;
|
|
13
20
|
const embeddingCache = new Map();
|
|
@@ -37,15 +44,38 @@ async function initializeEmbeddings() {
|
|
|
37
44
|
}
|
|
38
45
|
// RACE CONDITION FIX: Create promise for concurrent callers to await
|
|
39
46
|
initializationPromise = (async () => {
|
|
47
|
+
// Optional-dep load: try to import @xenova/transformers. If absent,
|
|
48
|
+
// emit a clear warning and let callers fall back to hash-based embeddings.
|
|
49
|
+
if (!pipeline || !env) {
|
|
50
|
+
try {
|
|
51
|
+
const transformers = await import('@xenova/transformers');
|
|
52
|
+
pipeline = transformers.pipeline;
|
|
53
|
+
env = transformers.env;
|
|
54
|
+
// Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)
|
|
55
|
+
// The native ONNX runtime causes "DefaultLogger not registered" errors in Node.js
|
|
56
|
+
env.backends.onnx.wasm.proxy = false; // Disable ONNX runtime proxy
|
|
57
|
+
env.backends.onnx.wasm.numThreads = 1; // Single thread for stability
|
|
58
|
+
}
|
|
59
|
+
catch (err) {
|
|
60
|
+
console.warn('[Embeddings] @xenova/transformers not installed (optional dependency).');
|
|
61
|
+
console.warn('[Embeddings] Install with: npm install @xenova/transformers');
|
|
62
|
+
console.warn('[Embeddings] Falling back to hash-based embeddings');
|
|
63
|
+
initializationPromise = null;
|
|
64
|
+
return;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
40
67
|
console.log('[Embeddings] Initializing local embedding model (Xenova/all-MiniLM-L6-v2)...');
|
|
41
68
|
console.log('[Embeddings] First run will download ~23MB model...');
|
|
42
69
|
try {
|
|
43
|
-
|
|
44
|
-
|
|
70
|
+
// `pipeline('feature-extraction', ...)` returns a union; narrow to
|
|
71
|
+
// FeatureExtractionPipeline so call-sites can use .pooling / .normalize.
|
|
72
|
+
embeddingPipeline = (await pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2', { quantized: true } // Smaller, faster
|
|
73
|
+
));
|
|
45
74
|
console.log('[Embeddings] Local model ready! (384 dimensions)');
|
|
46
75
|
}
|
|
47
76
|
catch (error) {
|
|
48
|
-
|
|
77
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
78
|
+
console.error('[Embeddings] Failed to initialize:', msg);
|
|
49
79
|
console.warn('[Embeddings] Falling back to hash-based embeddings');
|
|
50
80
|
// Reset promise so retry is possible
|
|
51
81
|
initializationPromise = null;
|
|
@@ -73,10 +103,14 @@ export async function computeEmbedding(text) {
|
|
|
73
103
|
pooling: 'mean',
|
|
74
104
|
normalize: true
|
|
75
105
|
});
|
|
106
|
+
// output.data is a Tensor.data typed-array union; cast to a Float32-
|
|
107
|
+
// compatible source. The model is feature-extraction with normalize:true
|
|
108
|
+
// so the underlying buffer is always Float32 at runtime.
|
|
76
109
|
embedding = new Float32Array(output.data);
|
|
77
110
|
}
|
|
78
111
|
catch (error) {
|
|
79
|
-
|
|
112
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
113
|
+
console.error('[Embeddings] Generation failed:', msg);
|
|
80
114
|
embedding = hashEmbed(text, 384); // Fallback
|
|
81
115
|
}
|
|
82
116
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../../src/reasoningbank/utils/embeddings.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,iFAAiF;AACjF,kFAAkF;AAClF,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,6BAA6B;AACnE,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,8BAA8B;AAErE,IAAI,iBAAiB,GAAQ,IAAI,CAAC;AAClC,IAAI,qBAAqB,GAAyB,IAAI,CAAC;AACvD,MAAM,cAAc,GAAG,IAAI,GAAG,EAAwB,CAAC;AACvD,8DAA8D;AAC9D,MAAM,eAAe,GAAG,IAAI,GAAG,EAA0B,CAAC;AAE1D;;;GAGG;AACH,KAAK,UAAU,oBAAoB;IACjC,sBAAsB;IACtB,IAAI,iBAAiB;QAAE,OAAO;IAE9B,sDAAsD;IACtD,IAAI,qBAAqB,EAAE,CAAC;QAC1B,OAAO,qBAAqB,CAAC;IAC/B,CAAC;IAED,mEAAmE;IACnE,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,KAAK;QACzC,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC;QACzC,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAChC,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;IAEpD,IAAI,QAAQ,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,qEAAqE,CAAC,CAAC;QACnF,OAAO,CAAC,GAAG,CAAC,gFAAgF,CAAC,CAAC;QAC9F,OAAO;IACT,CAAC;IAED,qEAAqE;IACrE,qBAAqB,GAAG,CAAC,KAAK,IAAI,EAAE;QAClC,OAAO,CAAC,GAAG,CAAC,8EAA8E,CAAC,CAAC;QAC5F,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;QAEnE,IAAI,CAAC;YACH,iBAAiB,GAAG,MAAM,QAAQ,CAChC,oBAAoB,EACpB,yBAAyB,EACzB,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,kBAAkB;aACvC,CAAC;YACF,OAAO,CAAC,GAAG,CAAC,kDAAkD,CAAC,CAAC;QAClE,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,CAAC;YAC7E,OAAO,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;YACnE,qCAAqC;YACrC,qBAAqB,GAAG,IAAI,CAAC;QAC/B,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,qBAAqB,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAAY;IACjD,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,cAAc;IACd,MAAM,QAAQ,GAAG,SAAS,IAAI,EAAE,CAAC;IACjC,IAAI,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjC,OAAO,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;IACvC,CAAC;IAED,IAAI,SAAuB,CAAC;IAE5B,uBAAuB;IACvB,MAAM,oBAAoB,EAAE,CAAC;IAE7B,IAAI,iBAAiB,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,IAAI,EAAE;gBAC3C,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YACH,SAAS,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,OAAO,CAAC,KAAK,CAAC,iCAAiC,EAAE,KAAK,EAAE,OAAO,IAAI,KAAK,CAAC,CAAC;YAC1E,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,WAAW;QAC/C,CAAC;IACH,CAAC;SAAM,CAAC;QACN,oCAAoC;QACpC,MAAM,IAAI,GAAG,MAAM,EAAE,UAAU,EAAE,UAAU,IAAI,GAAG,CAAC;QACnD,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,sDAAsD;IACtD,MAAM,aAAa,GAAG,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACpD,IAAI,aAAa,EAAE,CAAC;QAClB,YAAY,CAAC,aAAa,CAAC,CAAC;QAC5B,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC;IAED,sCAAsC;IACtC,2DAA2D;IAC3D,IAAI,cAAc,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC;QAChC,8DAA8D;QAC9D,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;QACpD,IAAI,QAAQ,EAAE,CAAC;YACb,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAChC,uBAAuB;YACvB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC5C,IAAI,KAAK,EAAE,CAAC;gBACV,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IACD,cAAc,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAExC,6CAA6C;IAC7C,MAAM,GAAG,GAAG,MAAM,EAAE,UAAU,EAAE,iBAAiB,IAAI,IAAI,CAAC;IAC1D,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE;QAC9B,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAChC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC;IAEf,2CAA2C;IAC3C,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAEvC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAe;IACzD,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB;IACpC,OAAO,GAAG,CAAC,CAAC,uCAAuC;AACrD,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY,EAAE,IAAY;IAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC;IAEnC,wDAAwD;IACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACvE,CAAC;IAED,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,IAAI,CAAC,CAAC;IACZ,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,GAAiB;IAClC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IACD,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAErB,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAE1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IAChB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,mBAAmB;IACjC,iDAAiD;IACjD,KAAK,MAAM,KAAK,IAAI,eAAe,CAAC,MAAM,EAAE,EAAE,CAAC;QAC7C,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;IACD,eAAe,CAAC,KAAK,EAAE,CAAC;IACxB,cAAc,CAAC,KAAK,EAAE,CAAC;AACzB,CAAC","sourcesContent":["/**\n * Embedding generation for semantic similarity\n * Uses local transformers.js - no API key required!\n */\n\nimport { pipeline, env } from '@xenova/transformers';\nimport { loadConfig } from './config.js';\n\n// Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)\n// The native ONNX runtime causes \"DefaultLogger not registered\" errors in Node.js\nenv.backends.onnx.wasm.proxy = false; // Disable ONNX runtime proxy\nenv.backends.onnx.wasm.numThreads = 1; // Single thread for stability\n\nlet embeddingPipeline: any = null;\nlet initializationPromise: Promise<void> | null = null;\nconst embeddingCache = new Map<string, Float32Array>();\n// MEMORY LEAK FIX: Track TTL timers so they can be cleaned up\nconst embeddingTimers = new Map<string, NodeJS.Timeout>();\n\n/**\n * Initialize the embedding pipeline (lazy load)\n * RACE CONDITION FIX: Use promise-based initialization instead of busy-wait\n */\nasync function initializeEmbeddings(): Promise<void> {\n // Already initialized\n if (embeddingPipeline) return;\n\n // Initialization in progress - await existing promise\n if (initializationPromise) {\n return initializationPromise;\n }\n\n // Detect npx environment (known transformer initialization issues)\n const isNpxEnv = process.env.npm_lifecycle_event === 'npx' ||\n process.env.npm_execpath?.includes('npx') ||\n process.cwd().includes('/_npx/') ||\n process.cwd().includes('\\\\_npx\\\\');\n\n if (isNpxEnv && !process.env.FORCE_TRANSFORMERS) {\n console.log('[Embeddings] NPX environment detected - using hash-based embeddings');\n console.log('[Embeddings] For semantic search, install globally: npm install -g claude-flow');\n return;\n }\n\n // RACE CONDITION FIX: Create promise for concurrent callers to await\n initializationPromise = (async () => {\n console.log('[Embeddings] Initializing local embedding model (Xenova/all-MiniLM-L6-v2)...');\n console.log('[Embeddings] First run will download ~23MB model...');\n\n try {\n embeddingPipeline = await pipeline(\n 'feature-extraction',\n 'Xenova/all-MiniLM-L6-v2',\n { quantized: true } // Smaller, faster\n );\n console.log('[Embeddings] Local model ready! (384 dimensions)');\n } catch (error: any) {\n console.error('[Embeddings] Failed to initialize:', error?.message || error);\n console.warn('[Embeddings] Falling back to hash-based embeddings');\n // Reset promise so retry is possible\n initializationPromise = null;\n }\n })();\n\n return initializationPromise;\n}\n\n/**\n * Compute embedding for text using local model\n */\nexport async function computeEmbedding(text: string): Promise<Float32Array> {\n const config = loadConfig();\n\n // Check cache\n const cacheKey = `local:${text}`;\n if (embeddingCache.has(cacheKey)) {\n return embeddingCache.get(cacheKey)!;\n }\n\n let embedding: Float32Array;\n\n // Initialize if needed\n await initializeEmbeddings();\n\n if (embeddingPipeline) {\n try {\n // Use transformers.js for real embeddings\n const output = await embeddingPipeline(text, {\n pooling: 'mean',\n normalize: true\n });\n embedding = new Float32Array(output.data);\n } catch (error: any) {\n console.error('[Embeddings] Generation failed:', error?.message || error);\n embedding = hashEmbed(text, 384); // Fallback\n }\n } else {\n // Fallback to hash-based embeddings\n const dims = config?.embeddings?.dimensions || 384;\n embedding = hashEmbed(text, dims);\n }\n\n // MEMORY LEAK FIX: Clear existing timer if key exists\n const existingTimer = embeddingTimers.get(cacheKey);\n if (existingTimer) {\n clearTimeout(existingTimer);\n embeddingTimers.delete(cacheKey);\n }\n\n // Cache with LRU (limit 1000 entries)\n // PERFORMANCE FIX: Use proper LRU by tracking access order\n if (embeddingCache.size >= 1000) {\n // Find and remove oldest entry (first key in iteration order)\n const firstKey = embeddingCache.keys().next().value;\n if (firstKey) {\n embeddingCache.delete(firstKey);\n // Also clear its timer\n const timer = embeddingTimers.get(firstKey);\n if (timer) {\n clearTimeout(timer);\n embeddingTimers.delete(firstKey);\n }\n }\n }\n embeddingCache.set(cacheKey, embedding);\n\n // Set TTL for cache entry with tracked timer\n const ttl = config?.embeddings?.cache_ttl_seconds || 3600;\n const timerId = setTimeout(() => {\n embeddingCache.delete(cacheKey);\n embeddingTimers.delete(cacheKey);\n }, ttl * 1000);\n\n // MEMORY LEAK FIX: Track timer for cleanup\n embeddingTimers.set(cacheKey, timerId);\n\n return embedding;\n}\n\n/**\n * Batch compute embeddings (more efficient)\n */\nexport async function computeEmbeddingBatch(texts: string[]): Promise<Float32Array[]> {\n return Promise.all(texts.map(text => computeEmbedding(text)));\n}\n\n/**\n * Get embedding dimensions\n */\nexport function getEmbeddingDimensions(): number {\n return 384; // all-MiniLM-L6-v2 uses 384 dimensions\n}\n\n/**\n * Deterministic hash-based embedding (fallback)\n */\nfunction hashEmbed(text: string, dims: number): Float32Array {\n const hash = simpleHash(text);\n const vec = new Float32Array(dims);\n\n // Generate deterministic pseudo-random vector from hash\n for (let i = 0; i < dims; i++) {\n vec[i] = Math.sin(hash * (i + 1) * 0.01) + Math.cos(hash * i * 0.02);\n }\n\n return normalize(vec);\n}\n\n/**\n * Simple string hash function\n */\nfunction simpleHash(str: string): number {\n let hash = 0;\n for (let i = 0; i < str.length; i++) {\n hash = ((hash << 5) - hash) + str.charCodeAt(i);\n hash |= 0;\n }\n return Math.abs(hash);\n}\n\n/**\n * Normalize vector to unit length\n */\nfunction normalize(vec: Float32Array): Float32Array {\n let mag = 0;\n for (let i = 0; i < vec.length; i++) {\n mag += vec[i] * vec[i];\n }\n mag = Math.sqrt(mag);\n\n if (mag === 0) return vec;\n\n for (let i = 0; i < vec.length; i++) {\n vec[i] /= mag;\n }\n return vec;\n}\n\n/**\n * Clear embedding cache\n * MEMORY LEAK FIX: Also clear all TTL timers\n */\nexport function clearEmbeddingCache(): void {\n // Clear all timers first to prevent memory leaks\n for (const timer of embeddingTimers.values()) {\n clearTimeout(timer);\n }\n embeddingTimers.clear();\n embeddingCache.clear();\n}\n"]}
|
|
1
|
+
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../../src/reasoningbank/utils/embeddings.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAGH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,4EAA4E;AAC5E,2EAA2E;AAC3E,4EAA4E;AAC5E,mCAAmC;AACnC,IAAI,QAAQ,GAA2B,IAAI,CAAC;AAC5C,IAAI,GAAG,GAAsB,IAAI,CAAC;AAElC,IAAI,iBAAiB,GAAqC,IAAI,CAAC;AAC/D,IAAI,qBAAqB,GAAyB,IAAI,CAAC;AACvD,MAAM,cAAc,GAAG,IAAI,GAAG,EAAwB,CAAC;AACvD,8DAA8D;AAC9D,MAAM,eAAe,GAAG,IAAI,GAAG,EAA0B,CAAC;AAE1D;;;GAGG;AACH,KAAK,UAAU,oBAAoB;IACjC,sBAAsB;IACtB,IAAI,iBAAiB;QAAE,OAAO;IAE9B,sDAAsD;IACtD,IAAI,qBAAqB,EAAE,CAAC;QAC1B,OAAO,qBAAqB,CAAC;IAC/B,CAAC;IAED,mEAAmE;IACnE,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,KAAK;QACzC,OAAO,CAAC,GAAG,CAAC,YAAY,EAAE,QAAQ,CAAC,KAAK,CAAC;QACzC,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC;QAChC,OAAO,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;IAEpD,IAAI,QAAQ,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,EAAE,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,qEAAqE,CAAC,CAAC;QACnF,OAAO,CAAC,GAAG,CAAC,gFAAgF,CAAC,CAAC;QAC9F,OAAO;IACT,CAAC;IAED,qEAAqE;IACrE,qBAAqB,GAAG,CAAC,KAAK,IAAI,EAAE;QAClC,oEAAoE;QACpE,2EAA2E;QAC3E,IAAI,CAAC,QAAQ,IAAI,CAAC,GAAG,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,YAAY,GAAG,MAAM,MAAM,CAAC,sBAAsB,CAAC,CAAC;gBAC1D,QAAQ,GAAG,YAAY,CAAC,QAAQ,CAAC;gBACjC,GAAG,GAAG,YAAY,CAAC,GAAG,CAAC;gBACvB,iFAAiF;gBACjF,kFAAkF;gBAClF,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC,CAAK,6BAA6B;gBACvE,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAI,8BAA8B;YAC1E,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACtB,OAAO,CAAC,IAAI,CAAC,wEAAwE,CAAC,CAAC;gBACvF,OAAO,CAAC,IAAI,CAAC,6DAA6D,CAAC,CAAC;gBAC5E,OAAO,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;gBACnE,qBAAqB,GAAG,IAAI,CAAC;gBAC7B,OAAO;YACT,CAAC;QACH,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,8EAA8E,CAAC,CAAC;QAC5F,OAAO,CAAC,GAAG,CAAC,qDAAqD,CAAC,CAAC;QAEnE,IAAI,CAAC;YACH,mEAAmE;YACnE,yEAAyE;YACzE,iBAAiB,GAAG,CAAC,MAAM,QAAQ,CACjC,oBAAoB,EACpB,yBAAyB,EACzB,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,kBAAkB;aACvC,CAA8B,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,kDAAkD,CAAC,CAAC;QAClE,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACnE,OAAO,CAAC,KAAK,CAAC,oCAAoC,EAAE,GAAG,CAAC,CAAC;YACzD,OAAO,CAAC,IAAI,CAAC,oDAAoD,CAAC,CAAC;YACnE,qCAAqC;YACrC,qBAAqB,GAAG,IAAI,CAAC;QAC/B,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,OAAO,qBAAqB,CAAC;AAC/B,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB,CAAC,IAAY;IACjD,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,cAAc;IACd,MAAM,QAAQ,GAAG,SAAS,IAAI,EAAE,CAAC;IACjC,IAAI,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;QACjC,OAAO,cAAc,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC;IACvC,CAAC;IAED,IAAI,SAAuB,CAAC;IAE5B,uBAAuB;IACvB,MAAM,oBAAoB,EAAE,CAAC;IAE7B,IAAI,iBAAiB,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,IAAI,EAAE;gBAC3C,OAAO,EAAE,MAAM;gBACf,SAAS,EAAE,IAAI;aAChB,CAAC,CAAC;YACH,qEAAqE;YACrE,yEAAyE;YACzE,yDAAyD;YACzD,SAAS,GAAG,IAAI,YAAY,CAAC,MAAM,CAAC,IAAoC,CAAC,CAAC;QAC5E,CAAC;QAAC,OAAO,KAAc,EAAE,CAAC;YACxB,MAAM,GAAG,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACnE,OAAO,CAAC,KAAK,CAAC,iCAAiC,EAAE,GAAG,CAAC,CAAC;YACtD,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,CAAC,WAAW;QAC/C,CAAC;IACH,CAAC;SAAM,CAAC;QACN,oCAAoC;QACpC,MAAM,IAAI,GAAG,MAAM,EAAE,UAAU,EAAE,UAAU,IAAI,GAAG,CAAC;QACnD,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,sDAAsD;IACtD,MAAM,aAAa,GAAG,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACpD,IAAI,aAAa,EAAE,CAAC;QAClB,YAAY,CAAC,aAAa,CAAC,CAAC;QAC5B,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC;IAED,sCAAsC;IACtC,2DAA2D;IAC3D,IAAI,cAAc,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC;QAChC,8DAA8D;QAC9D,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC;QACpD,IAAI,QAAQ,EAAE,CAAC;YACb,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YAChC,uBAAuB;YACvB,MAAM,KAAK,GAAG,eAAe,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC5C,IAAI,KAAK,EAAE,CAAC;gBACV,YAAY,CAAC,KAAK,CAAC,CAAC;gBACpB,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;IACH,CAAC;IACD,cAAc,CAAC,GAAG,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;IAExC,6CAA6C;IAC7C,MAAM,GAAG,GAAG,MAAM,EAAE,UAAU,EAAE,iBAAiB,IAAI,IAAI,CAAC;IAC1D,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE;QAC9B,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAChC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC,EAAE,GAAG,GAAG,IAAI,CAAC,CAAC;IAEf,2CAA2C;IAC3C,eAAe,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAEvC,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAe;IACzD,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAChE,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,sBAAsB;IACpC,OAAO,GAAG,CAAC,CAAC,uCAAuC;AACrD,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY,EAAE,IAAY;IAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;IAC9B,MAAM,GAAG,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC;IAEnC,wDAAwD;IACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC;QAC9B,GAAG,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,GAAG,IAAI,CAAC,CAAC;IACvE,CAAC;IAED,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC7B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,IAAI,GAAG,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAChD,IAAI,IAAI,CAAC,CAAC;IACZ,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,GAAiB;IAClC,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IACD,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAErB,IAAI,GAAG,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAE1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAAC;IAChB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,mBAAmB;IACjC,iDAAiD;IACjD,KAAK,MAAM,KAAK,IAAI,eAAe,CAAC,MAAM,EAAE,EAAE,CAAC;QAC7C,YAAY,CAAC,KAAK,CAAC,CAAC;IACtB,CAAC;IACD,eAAe,CAAC,KAAK,EAAE,CAAC;IACxB,cAAc,CAAC,KAAK,EAAE,CAAC;AACzB,CAAC","sourcesContent":["/**\n * Embedding generation for semantic similarity\n * Uses local transformers.js - no API key required!\n *\n * `@xenova/transformers` is an OPTIONAL dependency. The module is loaded\n * dynamically inside `initializeEmbeddings()` so the rest of this file is\n * importable even when transformers.js is absent (e.g. when consumers\n * pass `npm install --omit=optional`). Code paths that don't call\n * `computeEmbedding()` continue to work without ever loading the module.\n */\n\nimport type { pipeline as Pipeline, env as Env, FeatureExtractionPipeline } from '@xenova/transformers';\nimport { loadConfig } from './config.js';\n\n// Cached references resolved at first call to initializeEmbeddings(). Types\n// are imported as `type-only` so TypeScript can typecheck the file without\n// requiring @xenova/transformers to be installed at build time — the actual\n// runtime import is dynamic below.\nlet pipeline: typeof Pipeline | null = null;\nlet env: typeof Env | null = null;\n\nlet embeddingPipeline: FeatureExtractionPipeline | null = null;\nlet initializationPromise: Promise<void> | null = null;\nconst embeddingCache = new Map<string, Float32Array>();\n// MEMORY LEAK FIX: Track TTL timers so they can be cleaned up\nconst embeddingTimers = new Map<string, NodeJS.Timeout>();\n\n/**\n * Initialize the embedding pipeline (lazy load)\n * RACE CONDITION FIX: Use promise-based initialization instead of busy-wait\n */\nasync function initializeEmbeddings(): Promise<void> {\n // Already initialized\n if (embeddingPipeline) return;\n\n // Initialization in progress - await existing promise\n if (initializationPromise) {\n return initializationPromise;\n }\n\n // Detect npx environment (known transformer initialization issues)\n const isNpxEnv = process.env.npm_lifecycle_event === 'npx' ||\n process.env.npm_execpath?.includes('npx') ||\n process.cwd().includes('/_npx/') ||\n process.cwd().includes('\\\\_npx\\\\');\n\n if (isNpxEnv && !process.env.FORCE_TRANSFORMERS) {\n console.log('[Embeddings] NPX environment detected - using hash-based embeddings');\n console.log('[Embeddings] For semantic search, install globally: npm install -g claude-flow');\n return;\n }\n\n // RACE CONDITION FIX: Create promise for concurrent callers to await\n initializationPromise = (async () => {\n // Optional-dep load: try to import @xenova/transformers. If absent,\n // emit a clear warning and let callers fall back to hash-based embeddings.\n if (!pipeline || !env) {\n try {\n const transformers = await import('@xenova/transformers');\n pipeline = transformers.pipeline;\n env = transformers.env;\n // Configure transformers.js to use WASM backend only (avoid ONNX runtime issues)\n // The native ONNX runtime causes \"DefaultLogger not registered\" errors in Node.js\n env.backends.onnx.wasm.proxy = false; // Disable ONNX runtime proxy\n env.backends.onnx.wasm.numThreads = 1; // Single thread for stability\n } catch (err: unknown) {\n console.warn('[Embeddings] @xenova/transformers not installed (optional dependency).');\n console.warn('[Embeddings] Install with: npm install @xenova/transformers');\n console.warn('[Embeddings] Falling back to hash-based embeddings');\n initializationPromise = null;\n return;\n }\n }\n\n console.log('[Embeddings] Initializing local embedding model (Xenova/all-MiniLM-L6-v2)...');\n console.log('[Embeddings] First run will download ~23MB model...');\n\n try {\n // `pipeline('feature-extraction', ...)` returns a union; narrow to\n // FeatureExtractionPipeline so call-sites can use .pooling / .normalize.\n embeddingPipeline = (await pipeline(\n 'feature-extraction',\n 'Xenova/all-MiniLM-L6-v2',\n { quantized: true } // Smaller, faster\n )) as FeatureExtractionPipeline;\n console.log('[Embeddings] Local model ready! (384 dimensions)');\n } catch (error: unknown) {\n const msg = error instanceof Error ? error.message : String(error);\n console.error('[Embeddings] Failed to initialize:', msg);\n console.warn('[Embeddings] Falling back to hash-based embeddings');\n // Reset promise so retry is possible\n initializationPromise = null;\n }\n })();\n\n return initializationPromise;\n}\n\n/**\n * Compute embedding for text using local model\n */\nexport async function computeEmbedding(text: string): Promise<Float32Array> {\n const config = loadConfig();\n\n // Check cache\n const cacheKey = `local:${text}`;\n if (embeddingCache.has(cacheKey)) {\n return embeddingCache.get(cacheKey)!;\n }\n\n let embedding: Float32Array;\n\n // Initialize if needed\n await initializeEmbeddings();\n\n if (embeddingPipeline) {\n try {\n // Use transformers.js for real embeddings\n const output = await embeddingPipeline(text, {\n pooling: 'mean',\n normalize: true\n });\n // output.data is a Tensor.data typed-array union; cast to a Float32-\n // compatible source. The model is feature-extraction with normalize:true\n // so the underlying buffer is always Float32 at runtime.\n embedding = new Float32Array(output.data as unknown as ArrayLike<number>);\n } catch (error: unknown) {\n const msg = error instanceof Error ? error.message : String(error);\n console.error('[Embeddings] Generation failed:', msg);\n embedding = hashEmbed(text, 384); // Fallback\n }\n } else {\n // Fallback to hash-based embeddings\n const dims = config?.embeddings?.dimensions || 384;\n embedding = hashEmbed(text, dims);\n }\n\n // MEMORY LEAK FIX: Clear existing timer if key exists\n const existingTimer = embeddingTimers.get(cacheKey);\n if (existingTimer) {\n clearTimeout(existingTimer);\n embeddingTimers.delete(cacheKey);\n }\n\n // Cache with LRU (limit 1000 entries)\n // PERFORMANCE FIX: Use proper LRU by tracking access order\n if (embeddingCache.size >= 1000) {\n // Find and remove oldest entry (first key in iteration order)\n const firstKey = embeddingCache.keys().next().value;\n if (firstKey) {\n embeddingCache.delete(firstKey);\n // Also clear its timer\n const timer = embeddingTimers.get(firstKey);\n if (timer) {\n clearTimeout(timer);\n embeddingTimers.delete(firstKey);\n }\n }\n }\n embeddingCache.set(cacheKey, embedding);\n\n // Set TTL for cache entry with tracked timer\n const ttl = config?.embeddings?.cache_ttl_seconds || 3600;\n const timerId = setTimeout(() => {\n embeddingCache.delete(cacheKey);\n embeddingTimers.delete(cacheKey);\n }, ttl * 1000);\n\n // MEMORY LEAK FIX: Track timer for cleanup\n embeddingTimers.set(cacheKey, timerId);\n\n return embedding;\n}\n\n/**\n * Batch compute embeddings (more efficient)\n */\nexport async function computeEmbeddingBatch(texts: string[]): Promise<Float32Array[]> {\n return Promise.all(texts.map(text => computeEmbedding(text)));\n}\n\n/**\n * Get embedding dimensions\n */\nexport function getEmbeddingDimensions(): number {\n return 384; // all-MiniLM-L6-v2 uses 384 dimensions\n}\n\n/**\n * Deterministic hash-based embedding (fallback)\n */\nfunction hashEmbed(text: string, dims: number): Float32Array {\n const hash = simpleHash(text);\n const vec = new Float32Array(dims);\n\n // Generate deterministic pseudo-random vector from hash\n for (let i = 0; i < dims; i++) {\n vec[i] = Math.sin(hash * (i + 1) * 0.01) + Math.cos(hash * i * 0.02);\n }\n\n return normalize(vec);\n}\n\n/**\n * Simple string hash function\n */\nfunction simpleHash(str: string): number {\n let hash = 0;\n for (let i = 0; i < str.length; i++) {\n hash = ((hash << 5) - hash) + str.charCodeAt(i);\n hash |= 0;\n }\n return Math.abs(hash);\n}\n\n/**\n * Normalize vector to unit length\n */\nfunction normalize(vec: Float32Array): Float32Array {\n let mag = 0;\n for (let i = 0; i < vec.length; i++) {\n mag += vec[i] * vec[i];\n }\n mag = Math.sqrt(mag);\n\n if (mag === 0) return vec;\n\n for (let i = 0; i < vec.length; i++) {\n vec[i] /= mag;\n }\n return vec;\n}\n\n/**\n * Clear embedding cache\n * MEMORY LEAK FIX: Also clear all TTL timers\n */\nexport function clearEmbeddingCache(): void {\n // Clear all timers first to prevent memory leaks\n for (const timer of embeddingTimers.values()) {\n clearTimeout(timer);\n }\n embeddingTimers.clear();\n embeddingCache.clear();\n}\n"]}
|
|
@@ -8,7 +8,16 @@
|
|
|
8
8
|
* - Better generation parameters for code tasks
|
|
9
9
|
* - System prompt caching
|
|
10
10
|
*
|
|
11
|
-
* Note: onnxruntime-node is optional - will error if not installed
|
|
11
|
+
* Note: onnxruntime-node is optional - will error if not installed.
|
|
12
|
+
*
|
|
13
|
+
* NOTE (ruvnet/ruflo#2048): the previous top-level `await import('onnxruntime-node')`
|
|
14
|
+
* fired the native-binding load (`onnxruntime_binding.node`) at module
|
|
15
|
+
* import time. On Windows this crashes with "OS cannot run %1" — and the
|
|
16
|
+
* crash propagated to any consumer that transitively imports this file
|
|
17
|
+
* (e.g. `agentic-flow/reasoningbank` via `core/distill → router/router`).
|
|
18
|
+
* This file does not use `ort` directly — the base `ONNXLocalProvider`
|
|
19
|
+
* it extends does, and that file now lazy-loads ort on first session
|
|
20
|
+
* init. So we just drop the eager top-level load here.
|
|
12
21
|
*/
|
|
13
22
|
import type { ChatParams, ChatResponse } from '../types.js';
|
|
14
23
|
import { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local-optimized.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"onnx-local-optimized.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAIH,OAAO,KAAK,EACV,UAAU,EACV,YAAY,EAIb,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAErE,MAAM,WAAW,mBAAoB,SAAQ,eAAe;IAC1D,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,qBAAsB,SAAQ,iBAAiB;IAC1D,OAAO,CAAC,eAAe,CAAgC;IACvD,OAAO,CAAC,WAAW,CAA+B;IAClD,OAAO,CAAC,iBAAiB,CAAmE;gBAEhF,MAAM,GAAE,mBAAwB;IAkB5C;;OAEG;IACH,OAAO,CAAC,cAAc;IAKtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAmDvB;;OAEG;IACH,OAAO,CAAC,cAAc;IA8BtB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA+BrD;;OAEG;IACH,mBAAmB;;;;;;;;;;;;;;;;;;;;IAoBnB;;OAEG;IACH,WAAW;CAKZ"}
|
|
@@ -8,16 +8,17 @@
|
|
|
8
8
|
* - Better generation parameters for code tasks
|
|
9
9
|
* - System prompt caching
|
|
10
10
|
*
|
|
11
|
-
* Note: onnxruntime-node is optional - will error if not installed
|
|
11
|
+
* Note: onnxruntime-node is optional - will error if not installed.
|
|
12
|
+
*
|
|
13
|
+
* NOTE (ruvnet/ruflo#2048): the previous top-level `await import('onnxruntime-node')`
|
|
14
|
+
* fired the native-binding load (`onnxruntime_binding.node`) at module
|
|
15
|
+
* import time. On Windows this crashes with "OS cannot run %1" — and the
|
|
16
|
+
* crash propagated to any consumer that transitively imports this file
|
|
17
|
+
* (e.g. `agentic-flow/reasoningbank` via `core/distill → router/router`).
|
|
18
|
+
* This file does not use `ort` directly — the base `ONNXLocalProvider`
|
|
19
|
+
* it extends does, and that file now lazy-loads ort on first session
|
|
20
|
+
* init. So we just drop the eager top-level load here.
|
|
12
21
|
*/
|
|
13
|
-
let ort = null;
|
|
14
|
-
// Dynamic import for optional onnxruntime-node
|
|
15
|
-
try {
|
|
16
|
-
ort = await import('onnxruntime-node');
|
|
17
|
-
}
|
|
18
|
-
catch {
|
|
19
|
-
// Will be handled at runtime
|
|
20
|
-
}
|
|
21
22
|
import { ONNXLocalProvider } from './onnx-local.js';
|
|
22
23
|
export class OptimizedONNXProvider extends ONNXLocalProvider {
|
|
23
24
|
optimizedConfig;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local-optimized.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AAEpB,+CAA+C;AAC/C,IAAI,CAAC;IACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;AACzC,CAAC;AAAC,MAAM,CAAC;IACP,6BAA6B;AAC/B,CAAC;AAWD,OAAO,EAAE,iBAAiB,EAAmB,MAAM,iBAAiB,CAAC;AAYrE,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAClD,eAAe,CAAgC;IAC/C,WAAW,GAAqB,IAAI,GAAG,EAAE,CAAC;IAC1C,iBAAiB,GAAyD,IAAI,GAAG,EAAE,CAAC;IAE5F,YAAY,SAA8B,EAAE;QAC1C,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,eAAe,GAAG;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,iFAAiF;YAChH,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG,EAAG,sCAAsC;YAC/E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI,EAAG,sBAAsB;YAC1E,aAAa,EAAE,MAAM,CAAC,aAAa,KAAK,KAAK,EAAG,eAAe;YAC/D,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,GAAG;YACxB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,GAAG;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,qDAAqD;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAmB;QACzC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC;YACxC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC;QACxD,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,SAAS,GAAc,EAAE,CAAC;QAEhC,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ;gBACnD,CAAC,CAAC,SAAS,CAAC,OAAO;gBACnB,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEzE,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAExB,yCAAyC;YACzC,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;gBAAE,SAAS;YAEpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAE5C,IAAI,WAAW,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,eAAe,WAAW,cAAc,CAAC,CAAC;gBACpH,MAAM;YACR,CAAC;YAED,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvB,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;YACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAC5E,IAAI,WAAW;gBAAE,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,kBAAkB,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;oBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEnE,wCAAwC;gBACxC,MAAM,UAAU,GAAG,0DAA0D,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE5F,IAAI,UAAU,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5E,MAAM,eAAe,GAAG,GAAG,OAAO,kHAAkH,CAAC;oBAErJ,OAAO;wBACL,GAAG,GAAG;wBACN,OAAO,EAAE,eAAe;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,4CAA4C;QAC5C,IAAI,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAErD,uCAAuC;QACvC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QAEzC,2DAA2D;QAC3D,MAAM,cAAc,GAAG;YACrB,GAAG,MAAM;YACT,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW;YACnE,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS;SAC9D,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACtB,QAAQ,CAAC,QAAQ,CAAC,aAAa,GAAG;gBAChC,cAAc,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBAClD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,mBAAmB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC5D,oBAAoB,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;gBAC5C,qBAAqB,EAAE,QAAQ,CAAC,MAAM;aACvC,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO;YACL,GAAG,KAAK,CAAC,YAAY,EAAE;YACvB,aAAa,EAAE;gBACb,gBAAgB,EAAE,IAAI,CAAC,eAAe,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBACjD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,WAAW,EAAE,IAAI,CAAC,eAAe,CAAC,WAAW;gBAC7C,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,iBAAiB,EAAE,IAAI,CAAC,eAAe,CAAC,iBAAiB;aAC1D;YACD,UAAU,EAAE;gBACV,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;gBACtC,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI;aACnD;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;CACF","sourcesContent":["/**\n * Optimized ONNX Runtime Local Inference Provider\n *\n * Improvements over base implementation:\n * - Context pruning for 2-4x speed improvement\n * - Prompt optimization for 30-50% quality improvement\n * - KV cache pooling for 20-30% faster generation\n * - Better generation parameters for code tasks\n * - System prompt caching\n *\n * Note: onnxruntime-node is optional - will error if not installed\n */\n\nlet ort: any = null;\n\n// Dynamic import for optional onnxruntime-node\ntry {\n ort = await import('onnxruntime-node');\n} catch {\n // Will be handled at runtime\n}\n\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n ChatParams,\n ChatResponse,\n Message,\n ContentBlock,\n ProviderError\n} from '../types.js';\nimport { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';\n\nexport interface OptimizedONNXConfig extends ONNXLocalConfig {\n maxContextTokens?: number;\n slidingWindow?: boolean;\n cacheSystemPrompts?: boolean;\n promptOptimization?: boolean;\n topK?: number;\n topP?: number;\n repetitionPenalty?: number;\n}\n\nexport class OptimizedONNXProvider extends ONNXLocalProvider {\n private optimizedConfig: Required<OptimizedONNXConfig>;\n private kvCachePool: Map<string, any> = new Map();\n private systemPromptCache: Map<string, { tokens: number[]; timestamp: number }> = new Map();\n\n constructor(config: OptimizedONNXConfig = {}) {\n super(config);\n\n this.optimizedConfig = {\n modelPath: config.modelPath || './models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 200,\n temperature: config.temperature || 0.3, // Lower for code (more deterministic)\n maxContextTokens: config.maxContextTokens || 2048, // Keep under 4K limit\n slidingWindow: config.slidingWindow !== false, // Default true\n cacheSystemPrompts: config.cacheSystemPrompts !== false, // Default true\n promptOptimization: config.promptOptimization !== false, // Default true\n topK: config.topK || 50,\n topP: config.topP || 0.9,\n repetitionPenalty: config.repetitionPenalty || 1.1\n };\n }\n\n /**\n * Estimate token count for a string\n */\n private estimateTokens(text: string): number {\n // Rough estimate: 1 token ≈ 4 characters for English\n return Math.ceil(text.length / 4);\n }\n\n /**\n * Optimize messages using sliding window context pruning\n */\n private optimizeContext(messages: Message[]): Message[] {\n if (!this.optimizedConfig.slidingWindow) {\n return messages;\n }\n\n const maxTokens = this.optimizedConfig.maxContextTokens;\n let totalTokens = 0;\n const optimized: Message[] = [];\n\n // Always keep system message if present\n const systemMsg = messages.find(m => m.role === 'system');\n if (systemMsg) {\n const content = typeof systemMsg.content === 'string'\n ? systemMsg.content\n : systemMsg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n optimized.push(systemMsg);\n totalTokens += this.estimateTokens(content);\n }\n\n // Add recent messages from end (most relevant)\n for (let i = messages.length - 1; i >= 0; i--) {\n const msg = messages[i];\n\n // Skip if already added (system message)\n if (msg.role === 'system') continue;\n\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n const tokens = this.estimateTokens(content);\n\n if (totalTokens + tokens > maxTokens) {\n console.log(`📊 Context pruned: Saved ${messages.length - optimized.length} messages, ~${totalTokens} tokens kept`);\n break;\n }\n\n optimized.unshift(msg);\n totalTokens += tokens;\n }\n\n // Ensure at least user message exists\n if (optimized.length === 0 || !optimized.some(m => m.role === 'user')) {\n const lastUserMsg = messages.slice().reverse().find(m => m.role === 'user');\n if (lastUserMsg) optimized.push(lastUserMsg);\n }\n\n return optimized;\n }\n\n /**\n * Optimize prompt for better quality output\n */\n private optimizePrompt(messages: Message[]): Message[] {\n if (!this.optimizedConfig.promptOptimization) {\n return messages;\n }\n\n const optimized = messages.map(msg => {\n if (msg.role === 'user') {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n // Add quality indicators for code tasks\n const isCodeTask = /write|create|implement|generate|code|function|class|api/i.test(content);\n\n if (isCodeTask && !content.includes('include') && !content.includes('with')) {\n const enhancedContent = `${content}. Include: proper error handling, type hints/types, and edge case handling. Return clean, production-ready code.`;\n\n return {\n ...msg,\n content: enhancedContent\n };\n }\n }\n\n return msg;\n });\n\n return optimized;\n }\n\n /**\n * Enhanced chat with optimization\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n // Step 1: Optimize context (sliding window)\n let messages = this.optimizeContext(params.messages);\n\n // Step 2: Optimize prompts for quality\n messages = this.optimizePrompt(messages);\n\n // Step 3: Call base implementation with optimized messages\n const enhancedParams = {\n ...params,\n messages,\n temperature: params.temperature || this.optimizedConfig.temperature,\n maxTokens: params.maxTokens || this.optimizedConfig.maxTokens\n };\n\n const response = await super.chat(enhancedParams);\n\n // Add optimization metadata\n if (response.metadata) {\n response.metadata.optimizations = {\n contextPruning: this.optimizedConfig.slidingWindow,\n promptOptimization: this.optimizedConfig.promptOptimization,\n systemPromptCaching: this.optimizedConfig.cacheSystemPrompts,\n originalMessageCount: params.messages.length,\n optimizedMessageCount: messages.length\n };\n }\n\n return response;\n }\n\n /**\n * Get optimization info\n */\n getOptimizationInfo() {\n return {\n ...super.getModelInfo(),\n optimizations: {\n maxContextTokens: this.optimizedConfig.maxContextTokens,\n slidingWindow: this.optimizedConfig.slidingWindow,\n cacheSystemPrompts: this.optimizedConfig.cacheSystemPrompts,\n promptOptimization: this.optimizedConfig.promptOptimization,\n temperature: this.optimizedConfig.temperature,\n topK: this.optimizedConfig.topK,\n topP: this.optimizedConfig.topP,\n repetitionPenalty: this.optimizedConfig.repetitionPenalty\n },\n cacheStats: {\n kvCachePoolSize: this.kvCachePool.size,\n systemPromptCacheSize: this.systemPromptCache.size\n }\n };\n }\n\n /**\n * Clear caches\n */\n clearCaches() {\n this.kvCachePool.clear();\n this.systemPromptCache.clear();\n console.log('🧹 Caches cleared');\n }\n}\n"]}
|
|
1
|
+
{"version":3,"file":"onnx-local-optimized.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAWH,OAAO,EAAE,iBAAiB,EAAmB,MAAM,iBAAiB,CAAC;AAYrE,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAClD,eAAe,CAAgC;IAC/C,WAAW,GAAqB,IAAI,GAAG,EAAE,CAAC;IAC1C,iBAAiB,GAAyD,IAAI,GAAG,EAAE,CAAC;IAE5F,YAAY,SAA8B,EAAE;QAC1C,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,eAAe,GAAG;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,iFAAiF;YAChH,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG,EAAG,sCAAsC;YAC/E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI,EAAG,sBAAsB;YAC1E,aAAa,EAAE,MAAM,CAAC,aAAa,KAAK,KAAK,EAAG,eAAe;YAC/D,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,GAAG;YACxB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,GAAG;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,qDAAqD;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAmB;QACzC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC;YACxC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC;QACxD,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,SAAS,GAAc,EAAE,CAAC;QAEhC,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ;gBACnD,CAAC,CAAC,SAAS,CAAC,OAAO;gBACnB,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEzE,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAExB,yCAAyC;YACzC,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;gBAAE,SAAS;YAEpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAE5C,IAAI,WAAW,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,eAAe,WAAW,cAAc,CAAC,CAAC;gBACpH,MAAM;YACR,CAAC;YAED,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvB,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;YACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAC5E,IAAI,WAAW;gBAAE,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,kBAAkB,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;oBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEnE,wCAAwC;gBACxC,MAAM,UAAU,GAAG,0DAA0D,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE5F,IAAI,UAAU,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5E,MAAM,eAAe,GAAG,GAAG,OAAO,kHAAkH,CAAC;oBAErJ,OAAO;wBACL,GAAG,GAAG;wBACN,OAAO,EAAE,eAAe;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,4CAA4C;QAC5C,IAAI,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAErD,uCAAuC;QACvC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QAEzC,2DAA2D;QAC3D,MAAM,cAAc,GAAG;YACrB,GAAG,MAAM;YACT,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW;YACnE,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS;SAC9D,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACtB,QAAQ,CAAC,QAAQ,CAAC,aAAa,GAAG;gBAChC,cAAc,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBAClD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,mBAAmB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC5D,oBAAoB,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;gBAC5C,qBAAqB,EAAE,QAAQ,CAAC,MAAM;aACvC,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO;YACL,GAAG,KAAK,CAAC,YAAY,EAAE;YACvB,aAAa,EAAE;gBACb,gBAAgB,EAAE,IAAI,CAAC,eAAe,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBACjD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,WAAW,EAAE,IAAI,CAAC,eAAe,CAAC,WAAW;gBAC7C,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,iBAAiB,EAAE,IAAI,CAAC,eAAe,CAAC,iBAAiB;aAC1D;YACD,UAAU,EAAE;gBACV,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;gBACtC,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI;aACnD;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;CACF","sourcesContent":["/**\n * Optimized ONNX Runtime Local Inference Provider\n *\n * Improvements over base implementation:\n * - Context pruning for 2-4x speed improvement\n * - Prompt optimization for 30-50% quality improvement\n * - KV cache pooling for 20-30% faster generation\n * - Better generation parameters for code tasks\n * - System prompt caching\n *\n * Note: onnxruntime-node is optional - will error if not installed.\n *\n * NOTE (ruvnet/ruflo#2048): the previous top-level `await import('onnxruntime-node')`\n * fired the native-binding load (`onnxruntime_binding.node`) at module\n * import time. On Windows this crashes with \"OS cannot run %1\" — and the\n * crash propagated to any consumer that transitively imports this file\n * (e.g. `agentic-flow/reasoningbank` via `core/distill → router/router`).\n * This file does not use `ort` directly — the base `ONNXLocalProvider`\n * it extends does, and that file now lazy-loads ort on first session\n * init. So we just drop the eager top-level load here.\n */\n\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n ChatParams,\n ChatResponse,\n Message,\n ContentBlock,\n ProviderError\n} from '../types.js';\nimport { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';\n\nexport interface OptimizedONNXConfig extends ONNXLocalConfig {\n maxContextTokens?: number;\n slidingWindow?: boolean;\n cacheSystemPrompts?: boolean;\n promptOptimization?: boolean;\n topK?: number;\n topP?: number;\n repetitionPenalty?: number;\n}\n\nexport class OptimizedONNXProvider extends ONNXLocalProvider {\n private optimizedConfig: Required<OptimizedONNXConfig>;\n private kvCachePool: Map<string, any> = new Map();\n private systemPromptCache: Map<string, { tokens: number[]; timestamp: number }> = new Map();\n\n constructor(config: OptimizedONNXConfig = {}) {\n super(config);\n\n this.optimizedConfig = {\n modelPath: config.modelPath || './models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 200,\n temperature: config.temperature || 0.3, // Lower for code (more deterministic)\n maxContextTokens: config.maxContextTokens || 2048, // Keep under 4K limit\n slidingWindow: config.slidingWindow !== false, // Default true\n cacheSystemPrompts: config.cacheSystemPrompts !== false, // Default true\n promptOptimization: config.promptOptimization !== false, // Default true\n topK: config.topK || 50,\n topP: config.topP || 0.9,\n repetitionPenalty: config.repetitionPenalty || 1.1\n };\n }\n\n /**\n * Estimate token count for a string\n */\n private estimateTokens(text: string): number {\n // Rough estimate: 1 token ≈ 4 characters for English\n return Math.ceil(text.length / 4);\n }\n\n /**\n * Optimize messages using sliding window context pruning\n */\n private optimizeContext(messages: Message[]): Message[] {\n if (!this.optimizedConfig.slidingWindow) {\n return messages;\n }\n\n const maxTokens = this.optimizedConfig.maxContextTokens;\n let totalTokens = 0;\n const optimized: Message[] = [];\n\n // Always keep system message if present\n const systemMsg = messages.find(m => m.role === 'system');\n if (systemMsg) {\n const content = typeof systemMsg.content === 'string'\n ? systemMsg.content\n : systemMsg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n optimized.push(systemMsg);\n totalTokens += this.estimateTokens(content);\n }\n\n // Add recent messages from end (most relevant)\n for (let i = messages.length - 1; i >= 0; i--) {\n const msg = messages[i];\n\n // Skip if already added (system message)\n if (msg.role === 'system') continue;\n\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n const tokens = this.estimateTokens(content);\n\n if (totalTokens + tokens > maxTokens) {\n console.log(`📊 Context pruned: Saved ${messages.length - optimized.length} messages, ~${totalTokens} tokens kept`);\n break;\n }\n\n optimized.unshift(msg);\n totalTokens += tokens;\n }\n\n // Ensure at least user message exists\n if (optimized.length === 0 || !optimized.some(m => m.role === 'user')) {\n const lastUserMsg = messages.slice().reverse().find(m => m.role === 'user');\n if (lastUserMsg) optimized.push(lastUserMsg);\n }\n\n return optimized;\n }\n\n /**\n * Optimize prompt for better quality output\n */\n private optimizePrompt(messages: Message[]): Message[] {\n if (!this.optimizedConfig.promptOptimization) {\n return messages;\n }\n\n const optimized = messages.map(msg => {\n if (msg.role === 'user') {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n // Add quality indicators for code tasks\n const isCodeTask = /write|create|implement|generate|code|function|class|api/i.test(content);\n\n if (isCodeTask && !content.includes('include') && !content.includes('with')) {\n const enhancedContent = `${content}. Include: proper error handling, type hints/types, and edge case handling. Return clean, production-ready code.`;\n\n return {\n ...msg,\n content: enhancedContent\n };\n }\n }\n\n return msg;\n });\n\n return optimized;\n }\n\n /**\n * Enhanced chat with optimization\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n // Step 1: Optimize context (sliding window)\n let messages = this.optimizeContext(params.messages);\n\n // Step 2: Optimize prompts for quality\n messages = this.optimizePrompt(messages);\n\n // Step 3: Call base implementation with optimized messages\n const enhancedParams = {\n ...params,\n messages,\n temperature: params.temperature || this.optimizedConfig.temperature,\n maxTokens: params.maxTokens || this.optimizedConfig.maxTokens\n };\n\n const response = await super.chat(enhancedParams);\n\n // Add optimization metadata\n if (response.metadata) {\n response.metadata.optimizations = {\n contextPruning: this.optimizedConfig.slidingWindow,\n promptOptimization: this.optimizedConfig.promptOptimization,\n systemPromptCaching: this.optimizedConfig.cacheSystemPrompts,\n originalMessageCount: params.messages.length,\n optimizedMessageCount: messages.length\n };\n }\n\n return response;\n }\n\n /**\n * Get optimization info\n */\n getOptimizationInfo() {\n return {\n ...super.getModelInfo(),\n optimizations: {\n maxContextTokens: this.optimizedConfig.maxContextTokens,\n slidingWindow: this.optimizedConfig.slidingWindow,\n cacheSystemPrompts: this.optimizedConfig.cacheSystemPrompts,\n promptOptimization: this.optimizedConfig.promptOptimization,\n temperature: this.optimizedConfig.temperature,\n topK: this.optimizedConfig.topK,\n topP: this.optimizedConfig.topP,\n repetitionPenalty: this.optimizedConfig.repetitionPenalty\n },\n cacheStats: {\n kvCachePoolSize: this.kvCachePool.size,\n systemPromptCacheSize: this.systemPromptCache.size\n }\n };\n }\n\n /**\n * Clear caches\n */\n clearCaches() {\n this.kvCachePool.clear();\n this.systemPromptCache.clear();\n console.log('🧹 Caches cleared');\n }\n}\n"]}
|
|
@@ -3,6 +3,15 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Uses onnxruntime-node for true local CPU/GPU inference
|
|
5
5
|
* Falls back gracefully when native module isn't available (Windows)
|
|
6
|
+
*
|
|
7
|
+
* NOTE (ruvnet/ruflo#2048): `onnxruntime-node` is loaded LAZILY on first
|
|
8
|
+
* `initializeSession()` call, not at module import. The previous top-level
|
|
9
|
+
* `await import('onnxruntime-node')` fired the native-binding load
|
|
10
|
+
* (`onnxruntime_binding.node`) at module load time, which crashed Windows
|
|
11
|
+
* environments where the NAPI binary cannot be loaded — even when the
|
|
12
|
+
* consumer (e.g. `agentic-flow/reasoningbank`) never actually invokes
|
|
13
|
+
* the router. Moving the import inside `loadOrt()` keeps importing
|
|
14
|
+
* `reasoningbank` side-effect-free with respect to native bindings.
|
|
6
15
|
*/
|
|
7
16
|
import type { LLMProvider, ChatParams, ChatResponse, StreamChunk } from '../types.js';
|
|
8
17
|
export interface ONNXLocalConfig {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAqBH,OAAO,KAAK,EACV,WAAW,EACX,UAAU,EACV,YAAY,EACZ,WAAW,EAIZ,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,WAAW;IACnD,IAAI,SAAgB;IACpB,IAAI,EAAG,QAAQ,CAAU;IACzB,iBAAiB,UAAS;IAC1B,aAAa,UAAS;IACtB,WAAW,UAAS;IAEpB,OAAO,CAAC,OAAO,CAAa;IAC5B,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,GAAE,eAAoB;IASxC;;OAEG;YACW,aAAa;IAc3B;;OAEG;IACH,OAAO,CAAC,MAAM;IAId;;OAEG;IACH,OAAO,CAAC,MAAM;IAgBd;;OAEG;YACW,iBAAiB;IAkD/B;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IA8BzB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA8IrD;;OAEG;IACI,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,cAAc,CAAC,WAAW,CAAC;IAI9D;;OAEG;IACH,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO;IAKjD;;OAEG;IACH,YAAY;;;;;;IASZ;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAU/B"}
|
|
@@ -3,16 +3,30 @@
|
|
|
3
3
|
*
|
|
4
4
|
* Uses onnxruntime-node for true local CPU/GPU inference
|
|
5
5
|
* Falls back gracefully when native module isn't available (Windows)
|
|
6
|
+
*
|
|
7
|
+
* NOTE (ruvnet/ruflo#2048): `onnxruntime-node` is loaded LAZILY on first
|
|
8
|
+
* `initializeSession()` call, not at module import. The previous top-level
|
|
9
|
+
* `await import('onnxruntime-node')` fired the native-binding load
|
|
10
|
+
* (`onnxruntime_binding.node`) at module load time, which crashed Windows
|
|
11
|
+
* environments where the NAPI binary cannot be loaded — even when the
|
|
12
|
+
* consumer (e.g. `agentic-flow/reasoningbank`) never actually invokes
|
|
13
|
+
* the router. Moving the import inside `loadOrt()` keeps importing
|
|
14
|
+
* `reasoningbank` side-effect-free with respect to native bindings.
|
|
6
15
|
*/
|
|
7
16
|
let ort = null;
|
|
8
17
|
let ortAvailable = false;
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
18
|
+
let ortLoaded = false;
|
|
19
|
+
async function loadOrt() {
|
|
20
|
+
if (ortLoaded)
|
|
21
|
+
return;
|
|
22
|
+
ortLoaded = true;
|
|
23
|
+
try {
|
|
24
|
+
ort = await import('onnxruntime-node');
|
|
25
|
+
ortAvailable = true;
|
|
26
|
+
}
|
|
27
|
+
catch {
|
|
28
|
+
console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
|
|
29
|
+
}
|
|
16
30
|
}
|
|
17
31
|
import { get_encoding } from 'tiktoken';
|
|
18
32
|
import { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';
|
|
@@ -82,6 +96,7 @@ export class ONNXLocalProvider {
|
|
|
82
96
|
async initializeSession() {
|
|
83
97
|
if (this.session)
|
|
84
98
|
return;
|
|
99
|
+
await loadOrt();
|
|
85
100
|
if (!ortAvailable || !ort) {
|
|
86
101
|
throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');
|
|
87
102
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AACpB,IAAI,YAAY,GAAG,KAAK,CAAC;AAEzB,+CAA+C;AAC/C,IAAI,CAAC;IACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;IACvC,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC;AAAC,MAAM,CAAC;IACP,OAAO,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;AACnF,CAAC;AAID,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAQ,IAAI,CAAC;IACpB,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,IAAI,CAAC,YAAY,IAAI,CAAC,GAAG,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;QACjG,CAAC;QAED,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAAwB,EAAE,CAAC;QAExC,uDAAuD;QACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;QAExC,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,WAAW,CACnD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,WAAW,CACrD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uDAAuD;gBACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;gBAExC,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,WAAW,CACjC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,WAAW,CACnC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAAwB;oBACjC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n * Falls back gracefully when native module isn't available (Windows)\n */\n\nlet ort: any = null;\nlet ortAvailable = false;\n\n// Dynamic import for optional onnxruntime-node\ntry {\n ort = await import('onnxruntime-node');\n ortAvailable = true;\n} catch {\n console.warn('[ONNX] onnxruntime-node not available - local inference disabled');\n}\n\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: any = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n if (!ortAvailable || !ort) {\n throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');\n }\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, any> = {};\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Create input tensor for current step\n const inputTensor = new TensorClass(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new TensorClass(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, any> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
|
|
1
|
+
{"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AACpB,IAAI,YAAY,GAAG,KAAK,CAAC;AACzB,IAAI,SAAS,GAAG,KAAK,CAAC;AAEtB,KAAK,UAAU,OAAO;IACpB,IAAI,SAAS;QAAE,OAAO;IACtB,SAAS,GAAG,IAAI,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QACvC,YAAY,GAAG,IAAI,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;IACnF,CAAC;AACH,CAAC;AAID,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAQ,IAAI,CAAC;IACpB,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,MAAM,OAAO,EAAE,CAAC;QAChB,IAAI,CAAC,YAAY,IAAI,CAAC,GAAG,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;QACjG,CAAC;QAED,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAAwB,EAAE,CAAC;QAExC,uDAAuD;QACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;QAExC,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,WAAW,CACnD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,WAAW,CACrD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uDAAuD;gBACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;gBAExC,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,WAAW,CACjC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,WAAW,CACnC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAAwB;oBACjC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n * Falls back gracefully when native module isn't available (Windows)\n *\n * NOTE (ruvnet/ruflo#2048): `onnxruntime-node` is loaded LAZILY on first\n * `initializeSession()` call, not at module import. The previous top-level\n * `await import('onnxruntime-node')` fired the native-binding load\n * (`onnxruntime_binding.node`) at module load time, which crashed Windows\n * environments where the NAPI binary cannot be loaded — even when the\n * consumer (e.g. `agentic-flow/reasoningbank`) never actually invokes\n * the router. Moving the import inside `loadOrt()` keeps importing\n * `reasoningbank` side-effect-free with respect to native bindings.\n */\n\nlet ort: any = null;\nlet ortAvailable = false;\nlet ortLoaded = false;\n\nasync function loadOrt(): Promise<void> {\n if (ortLoaded) return;\n ortLoaded = true;\n try {\n ort = await import('onnxruntime-node');\n ortAvailable = true;\n } catch {\n console.warn('[ONNX] onnxruntime-node not available - local inference disabled');\n }\n}\n\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: any = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n await loadOrt();\n if (!ortAvailable || !ort) {\n throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');\n }\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, any> = {};\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Create input tensor for current step\n const inputTensor = new TensorClass(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new TensorClass(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, any> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
|
|
@@ -1,3 +1,2 @@
|
|
|
1
1
|
export * from './quic.js';
|
|
2
|
-
export { loadQuicTransport, isQuicAvailable, getTransportCapabilities, WebSocketFallbackTransport, DEFAULT_STREAM_ID, type AgentTransport, type AgentMessage, type InboundMessageHandler, type OnMessageOptions, type PoolStatistics, type TransportCapabilities, type QuicTransportConfig as LoaderQuicTransportConfig, type TlsConfig, } from './quic-loader.js';
|
|
3
2
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/transport/index.ts"],"names":[],"mappings":"AACA,cAAc,WAAW,CAAC
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/transport/index.ts"],"names":[],"mappings":"AACA,cAAc,WAAW,CAAC"}
|
package/dist/transport/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/transport/index.ts"],"names":[],"mappings":"AAAA,0BAA0B;AAC1B,cAAc,WAAW,CAAC
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/transport/index.ts"],"names":[],"mappings":"AAAA,0BAA0B;AAC1B,cAAc,WAAW,CAAC","sourcesContent":["// Transport Layer Exports\nexport * from './quic.js';\n"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agentic-flow",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.13",
|
|
4
4
|
"description": "Production-ready AI agent orchestration platform with 66 specialized agents, 213 MCP tools, ReasoningBank learning memory, and autonomous multi-agent swarms. Built by @ruvnet with Claude Agent SDK, neural networks, memory persistence, GitHub integration, and distributed consensus protocols.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -23,8 +23,7 @@
|
|
|
23
23
|
"./router": "./dist/router/index.js",
|
|
24
24
|
"./agent-booster": "./dist/agent-booster/index.js",
|
|
25
25
|
"./transport/quic": "./dist/transport/quic.js",
|
|
26
|
-
"./embeddings": "./dist/embeddings/index.js"
|
|
27
|
-
"./transport/loader": "./dist/transport/quic-loader.js"
|
|
26
|
+
"./embeddings": "./dist/embeddings/index.js"
|
|
28
27
|
},
|
|
29
28
|
"scripts": {
|
|
30
29
|
"postinstall": "node scripts/postinstall.js || true",
|
|
@@ -158,7 +157,6 @@
|
|
|
158
157
|
"@ruvector/ruvllm": "^0.2.3",
|
|
159
158
|
"@ruvector/tiny-dancer": "^0.1.17",
|
|
160
159
|
"@supabase/supabase-js": "^2.78.0",
|
|
161
|
-
"@xenova/transformers": "^2.17.2",
|
|
162
160
|
"axios": "^1.12.2",
|
|
163
161
|
"dotenv": "^16.4.5",
|
|
164
162
|
"express": "^5.1.0",
|
|
@@ -170,7 +168,7 @@
|
|
|
170
168
|
"ruvector-onnx-embeddings-wasm": "^0.1.2",
|
|
171
169
|
"tiktoken": "^1.0.22",
|
|
172
170
|
"ulid": "^3.0.1",
|
|
173
|
-
"ws": "^8.
|
|
171
|
+
"ws": "^8.18.3",
|
|
174
172
|
"yaml": "^2.8.1",
|
|
175
173
|
"zod": "^3.25.76"
|
|
176
174
|
},
|
|
@@ -178,6 +176,7 @@
|
|
|
178
176
|
"@rollup/rollup-darwin-arm64": "^4.59.0",
|
|
179
177
|
"@ruvector/attention": "^0.1.4",
|
|
180
178
|
"@ruvector/sona": "^0.1.4",
|
|
179
|
+
"@xenova/transformers": "^2.17.2",
|
|
181
180
|
"agentdb": "^3.0.0-alpha.14",
|
|
182
181
|
"better-sqlite3": "^11.10.0",
|
|
183
182
|
"onnxruntime-node": "^1.23.2",
|