agentic-flow 2.0.12 → 2.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,16 @@
8
8
  * - Better generation parameters for code tasks
9
9
  * - System prompt caching
10
10
  *
11
- * Note: onnxruntime-node is optional - will error if not installed
11
+ * Note: onnxruntime-node is optional - will error if not installed.
12
+ *
13
+ * NOTE (ruvnet/ruflo#2048): the previous top-level `await import('onnxruntime-node')`
14
+ * fired the native-binding load (`onnxruntime_binding.node`) at module
15
+ * import time. On Windows this crashes with "OS cannot run %1" — and the
16
+ * crash propagated to any consumer that transitively imports this file
17
+ * (e.g. `agentic-flow/reasoningbank` via `core/distill → router/router`).
18
+ * This file does not use `ort` directly — the base `ONNXLocalProvider`
19
+ * it extends does, and that file now lazy-loads ort on first session
20
+ * init. So we just drop the eager top-level load here.
12
21
  */
13
22
  import type { ChatParams, ChatResponse } from '../types.js';
14
23
  import { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-local-optimized.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAaH,OAAO,KAAK,EACV,UAAU,EACV,YAAY,EAIb,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAErE,MAAM,WAAW,mBAAoB,SAAQ,eAAe;IAC1D,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,qBAAsB,SAAQ,iBAAiB;IAC1D,OAAO,CAAC,eAAe,CAAgC;IACvD,OAAO,CAAC,WAAW,CAA+B;IAClD,OAAO,CAAC,iBAAiB,CAAmE;gBAEhF,MAAM,GAAE,mBAAwB;IAkB5C;;OAEG;IACH,OAAO,CAAC,cAAc;IAKtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAmDvB;;OAEG;IACH,OAAO,CAAC,cAAc;IA8BtB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA+BrD;;OAEG;IACH,mBAAmB;;;;;;;;;;;;;;;;;;;;IAoBnB;;OAEG;IACH,WAAW;CAKZ"}
1
+ {"version":3,"file":"onnx-local-optimized.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAIH,OAAO,KAAK,EACV,UAAU,EACV,YAAY,EAIb,MAAM,aAAa,CAAC;AACrB,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAErE,MAAM,WAAW,mBAAoB,SAAQ,eAAe;IAC1D,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC5B;AAED,qBAAa,qBAAsB,SAAQ,iBAAiB;IAC1D,OAAO,CAAC,eAAe,CAAgC;IACvD,OAAO,CAAC,WAAW,CAA+B;IAClD,OAAO,CAAC,iBAAiB,CAAmE;gBAEhF,MAAM,GAAE,mBAAwB;IAkB5C;;OAEG;IACH,OAAO,CAAC,cAAc;IAKtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAmDvB;;OAEG;IACH,OAAO,CAAC,cAAc;IA8BtB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA+BrD;;OAEG;IACH,mBAAmB;;;;;;;;;;;;;;;;;;;;IAoBnB;;OAEG;IACH,WAAW;CAKZ"}
@@ -8,16 +8,17 @@
8
8
  * - Better generation parameters for code tasks
9
9
  * - System prompt caching
10
10
  *
11
- * Note: onnxruntime-node is optional - will error if not installed
11
+ * Note: onnxruntime-node is optional - will error if not installed.
12
+ *
13
+ * NOTE (ruvnet/ruflo#2048): the previous top-level `await import('onnxruntime-node')`
14
+ * fired the native-binding load (`onnxruntime_binding.node`) at module
15
+ * import time. On Windows this crashes with "OS cannot run %1" — and the
16
+ * crash propagated to any consumer that transitively imports this file
17
+ * (e.g. `agentic-flow/reasoningbank` via `core/distill → router/router`).
18
+ * This file does not use `ort` directly — the base `ONNXLocalProvider`
19
+ * it extends does, and that file now lazy-loads ort on first session
20
+ * init. So we just drop the eager top-level load here.
12
21
  */
13
- let ort = null;
14
- // Dynamic import for optional onnxruntime-node
15
- try {
16
- ort = await import('onnxruntime-node');
17
- }
18
- catch {
19
- // Will be handled at runtime
20
- }
21
22
  import { ONNXLocalProvider } from './onnx-local.js';
22
23
  export class OptimizedONNXProvider extends ONNXLocalProvider {
23
24
  optimizedConfig;
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-local-optimized.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AAEpB,+CAA+C;AAC/C,IAAI,CAAC;IACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;AACzC,CAAC;AAAC,MAAM,CAAC;IACP,6BAA6B;AAC/B,CAAC;AAWD,OAAO,EAAE,iBAAiB,EAAmB,MAAM,iBAAiB,CAAC;AAYrE,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAClD,eAAe,CAAgC;IAC/C,WAAW,GAAqB,IAAI,GAAG,EAAE,CAAC;IAC1C,iBAAiB,GAAyD,IAAI,GAAG,EAAE,CAAC;IAE5F,YAAY,SAA8B,EAAE;QAC1C,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,eAAe,GAAG;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,iFAAiF;YAChH,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG,EAAG,sCAAsC;YAC/E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI,EAAG,sBAAsB;YAC1E,aAAa,EAAE,MAAM,CAAC,aAAa,KAAK,KAAK,EAAG,eAAe;YAC/D,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,GAAG;YACxB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,GAAG;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,qDAAqD;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAmB;QACzC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC;YACxC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC;QACxD,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,SAAS,GAAc,EAAE,CAAC;QAEhC,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ;gBACnD,CAAC,CAAC,SAAS,CAAC,OAAO;gBACnB,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEzE,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAExB,yCAAyC;YACzC,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;gBAAE,SAAS;YAEpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAE5C,IAAI,WAAW,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,eAAe,WAAW,cAAc,CAAC,CAAC;gBACpH,MAAM;YACR,CAAC;YAED,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvB,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;YACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAC5E,IAAI,WAAW;gBAAE,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,kBAAkB,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;oBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEnE,wCAAwC;gBACxC,MAAM,UAAU,GAAG,0DAA0D,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE5F,IAAI,UAAU,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5E,MAAM,eAAe,GAAG,GAAG,OAAO,kHAAkH,CAAC;oBAErJ,OAAO;wBACL,GAAG,GAAG;wBACN,OAAO,EAAE,eAAe;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,4CAA4C;QAC5C,IAAI,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAErD,uCAAuC;QACvC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QAEzC,2DAA2D;QAC3D,MAAM,cAAc,GAAG;YACrB,GAAG,MAAM;YACT,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW;YACnE,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS;SAC9D,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACtB,QAAQ,CAAC,QAAQ,CAAC,aAAa,GAAG;gBAChC,cAAc,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBAClD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,mBAAmB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC5D,oBAAoB,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;gBAC5C,qBAAqB,EAAE,QAAQ,CAAC,MAAM;aACvC,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO;YACL,GAAG,KAAK,CAAC,YAAY,EAAE;YACvB,aAAa,EAAE;gBACb,gBAAgB,EAAE,IAAI,CAAC,eAAe,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBACjD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,WAAW,EAAE,IAAI,CAAC,eAAe,CAAC,WAAW;gBAC7C,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,iBAAiB,EAAE,IAAI,CAAC,eAAe,CAAC,iBAAiB;aAC1D;YACD,UAAU,EAAE;gBACV,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;gBACtC,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI;aACnD;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;CACF","sourcesContent":["/**\n * Optimized ONNX Runtime Local Inference Provider\n *\n * Improvements over base implementation:\n * - Context pruning for 2-4x speed improvement\n * - Prompt optimization for 30-50% quality improvement\n * - KV cache pooling for 20-30% faster generation\n * - Better generation parameters for code tasks\n * - System prompt caching\n *\n * Note: onnxruntime-node is optional - will error if not installed\n */\n\nlet ort: any = null;\n\n// Dynamic import for optional onnxruntime-node\ntry {\n ort = await import('onnxruntime-node');\n} catch {\n // Will be handled at runtime\n}\n\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n ChatParams,\n ChatResponse,\n Message,\n ContentBlock,\n ProviderError\n} from '../types.js';\nimport { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';\n\nexport interface OptimizedONNXConfig extends ONNXLocalConfig {\n maxContextTokens?: number;\n slidingWindow?: boolean;\n cacheSystemPrompts?: boolean;\n promptOptimization?: boolean;\n topK?: number;\n topP?: number;\n repetitionPenalty?: number;\n}\n\nexport class OptimizedONNXProvider extends ONNXLocalProvider {\n private optimizedConfig: Required<OptimizedONNXConfig>;\n private kvCachePool: Map<string, any> = new Map();\n private systemPromptCache: Map<string, { tokens: number[]; timestamp: number }> = new Map();\n\n constructor(config: OptimizedONNXConfig = {}) {\n super(config);\n\n this.optimizedConfig = {\n modelPath: config.modelPath || './models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 200,\n temperature: config.temperature || 0.3, // Lower for code (more deterministic)\n maxContextTokens: config.maxContextTokens || 2048, // Keep under 4K limit\n slidingWindow: config.slidingWindow !== false, // Default true\n cacheSystemPrompts: config.cacheSystemPrompts !== false, // Default true\n promptOptimization: config.promptOptimization !== false, // Default true\n topK: config.topK || 50,\n topP: config.topP || 0.9,\n repetitionPenalty: config.repetitionPenalty || 1.1\n };\n }\n\n /**\n * Estimate token count for a string\n */\n private estimateTokens(text: string): number {\n // Rough estimate: 1 token ≈ 4 characters for English\n return Math.ceil(text.length / 4);\n }\n\n /**\n * Optimize messages using sliding window context pruning\n */\n private optimizeContext(messages: Message[]): Message[] {\n if (!this.optimizedConfig.slidingWindow) {\n return messages;\n }\n\n const maxTokens = this.optimizedConfig.maxContextTokens;\n let totalTokens = 0;\n const optimized: Message[] = [];\n\n // Always keep system message if present\n const systemMsg = messages.find(m => m.role === 'system');\n if (systemMsg) {\n const content = typeof systemMsg.content === 'string'\n ? systemMsg.content\n : systemMsg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n optimized.push(systemMsg);\n totalTokens += this.estimateTokens(content);\n }\n\n // Add recent messages from end (most relevant)\n for (let i = messages.length - 1; i >= 0; i--) {\n const msg = messages[i];\n\n // Skip if already added (system message)\n if (msg.role === 'system') continue;\n\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n const tokens = this.estimateTokens(content);\n\n if (totalTokens + tokens > maxTokens) {\n console.log(`📊 Context pruned: Saved ${messages.length - optimized.length} messages, ~${totalTokens} tokens kept`);\n break;\n }\n\n optimized.unshift(msg);\n totalTokens += tokens;\n }\n\n // Ensure at least user message exists\n if (optimized.length === 0 || !optimized.some(m => m.role === 'user')) {\n const lastUserMsg = messages.slice().reverse().find(m => m.role === 'user');\n if (lastUserMsg) optimized.push(lastUserMsg);\n }\n\n return optimized;\n }\n\n /**\n * Optimize prompt for better quality output\n */\n private optimizePrompt(messages: Message[]): Message[] {\n if (!this.optimizedConfig.promptOptimization) {\n return messages;\n }\n\n const optimized = messages.map(msg => {\n if (msg.role === 'user') {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n // Add quality indicators for code tasks\n const isCodeTask = /write|create|implement|generate|code|function|class|api/i.test(content);\n\n if (isCodeTask && !content.includes('include') && !content.includes('with')) {\n const enhancedContent = `${content}. Include: proper error handling, type hints/types, and edge case handling. Return clean, production-ready code.`;\n\n return {\n ...msg,\n content: enhancedContent\n };\n }\n }\n\n return msg;\n });\n\n return optimized;\n }\n\n /**\n * Enhanced chat with optimization\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n // Step 1: Optimize context (sliding window)\n let messages = this.optimizeContext(params.messages);\n\n // Step 2: Optimize prompts for quality\n messages = this.optimizePrompt(messages);\n\n // Step 3: Call base implementation with optimized messages\n const enhancedParams = {\n ...params,\n messages,\n temperature: params.temperature || this.optimizedConfig.temperature,\n maxTokens: params.maxTokens || this.optimizedConfig.maxTokens\n };\n\n const response = await super.chat(enhancedParams);\n\n // Add optimization metadata\n if (response.metadata) {\n response.metadata.optimizations = {\n contextPruning: this.optimizedConfig.slidingWindow,\n promptOptimization: this.optimizedConfig.promptOptimization,\n systemPromptCaching: this.optimizedConfig.cacheSystemPrompts,\n originalMessageCount: params.messages.length,\n optimizedMessageCount: messages.length\n };\n }\n\n return response;\n }\n\n /**\n * Get optimization info\n */\n getOptimizationInfo() {\n return {\n ...super.getModelInfo(),\n optimizations: {\n maxContextTokens: this.optimizedConfig.maxContextTokens,\n slidingWindow: this.optimizedConfig.slidingWindow,\n cacheSystemPrompts: this.optimizedConfig.cacheSystemPrompts,\n promptOptimization: this.optimizedConfig.promptOptimization,\n temperature: this.optimizedConfig.temperature,\n topK: this.optimizedConfig.topK,\n topP: this.optimizedConfig.topP,\n repetitionPenalty: this.optimizedConfig.repetitionPenalty\n },\n cacheStats: {\n kvCachePoolSize: this.kvCachePool.size,\n systemPromptCacheSize: this.systemPromptCache.size\n }\n };\n }\n\n /**\n * Clear caches\n */\n clearCaches() {\n this.kvCachePool.clear();\n this.systemPromptCache.clear();\n console.log('🧹 Caches cleared');\n }\n}\n"]}
1
+ {"version":3,"file":"onnx-local-optimized.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local-optimized.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAWH,OAAO,EAAE,iBAAiB,EAAmB,MAAM,iBAAiB,CAAC;AAYrE,MAAM,OAAO,qBAAsB,SAAQ,iBAAiB;IAClD,eAAe,CAAgC;IAC/C,WAAW,GAAqB,IAAI,GAAG,EAAE,CAAC;IAC1C,iBAAiB,GAAyD,IAAI,GAAG,EAAE,CAAC;IAE5F,YAAY,SAA8B,EAAE;QAC1C,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,eAAe,GAAG;YACrB,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,iFAAiF;YAChH,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG,EAAG,sCAAsC;YAC/E,gBAAgB,EAAE,MAAM,CAAC,gBAAgB,IAAI,IAAI,EAAG,sBAAsB;YAC1E,aAAa,EAAE,MAAM,CAAC,aAAa,KAAK,KAAK,EAAG,eAAe;YAC/D,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,KAAK,KAAK,EAAG,eAAe;YACzE,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,EAAE;YACvB,IAAI,EAAE,MAAM,CAAC,IAAI,IAAI,GAAG;YACxB,iBAAiB,EAAE,MAAM,CAAC,iBAAiB,IAAI,GAAG;SACnD,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,qDAAqD;QACrD,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpC,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAmB;QACzC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC;YACxC,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,gBAAgB,CAAC;QACxD,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,SAAS,GAAc,EAAE,CAAC;QAEhC,wCAAwC;QACxC,MAAM,SAAS,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;QAC1D,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,OAAO,GAAG,OAAO,SAAS,CAAC,OAAO,KAAK,QAAQ;gBACnD,CAAC,CAAC,SAAS,CAAC,OAAO;gBACnB,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEzE,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;QAC9C,CAAC;QAED,+CAA+C;QAC/C,KAAK,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC9C,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;YAExB,yCAAyC;YACzC,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ;gBAAE,SAAS;YAEpC,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;YAE5C,IAAI,WAAW,GAAG,MAAM,GAAG,SAAS,EAAE,CAAC;gBACrC,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,eAAe,WAAW,cAAc,CAAC,CAAC;gBACpH,MAAM;YACR,CAAC;YAED,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACvB,WAAW,IAAI,MAAM,CAAC;QACxB,CAAC;QAED,sCAAsC;QACtC,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,EAAE,CAAC;YACtE,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,OAAO,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;YAC5E,IAAI,WAAW;gBAAE,SAAS,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAC/C,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,kBAAkB,EAAE,CAAC;YAC7C,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,SAAS,GAAG,QAAQ,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE;YACnC,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;oBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;oBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEnE,wCAAwC;gBACxC,MAAM,UAAU,GAAG,0DAA0D,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBAE5F,IAAI,UAAU,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;oBAC5E,MAAM,eAAe,GAAG,GAAG,OAAO,kHAAkH,CAAC;oBAErJ,OAAO;wBACL,GAAG,GAAG;wBACN,OAAO,EAAE,eAAe;qBACzB,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,4CAA4C;QAC5C,IAAI,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAErD,uCAAuC;QACvC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;QAEzC,2DAA2D;QAC3D,MAAM,cAAc,GAAG;YACrB,GAAG,MAAM;YACT,QAAQ;YACR,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,eAAe,CAAC,WAAW;YACnE,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,eAAe,CAAC,SAAS;SAC9D,CAAC;QAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAElD,4BAA4B;QAC5B,IAAI,QAAQ,CAAC,QAAQ,EAAE,CAAC;YACtB,QAAQ,CAAC,QAAQ,CAAC,aAAa,GAAG;gBAChC,cAAc,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBAClD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,mBAAmB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC5D,oBAAoB,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM;gBAC5C,qBAAqB,EAAE,QAAQ,CAAC,MAAM;aACvC,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,mBAAmB;QACjB,OAAO;YACL,GAAG,KAAK,CAAC,YAAY,EAAE;YACvB,aAAa,EAAE;gBACb,gBAAgB,EAAE,IAAI,CAAC,eAAe,CAAC,gBAAgB;gBACvD,aAAa,EAAE,IAAI,CAAC,eAAe,CAAC,aAAa;gBACjD,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,kBAAkB,EAAE,IAAI,CAAC,eAAe,CAAC,kBAAkB;gBAC3D,WAAW,EAAE,IAAI,CAAC,eAAe,CAAC,WAAW;gBAC7C,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,IAAI,EAAE,IAAI,CAAC,eAAe,CAAC,IAAI;gBAC/B,iBAAiB,EAAE,IAAI,CAAC,eAAe,CAAC,iBAAiB;aAC1D;YACD,UAAU,EAAE;gBACV,eAAe,EAAE,IAAI,CAAC,WAAW,CAAC,IAAI;gBACtC,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC,IAAI;aACnD;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,WAAW;QACT,IAAI,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC;QACzB,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IACnC,CAAC;CACF","sourcesContent":["/**\n * Optimized ONNX Runtime Local Inference Provider\n *\n * Improvements over base implementation:\n * - Context pruning for 2-4x speed improvement\n * - Prompt optimization for 30-50% quality improvement\n * - KV cache pooling for 20-30% faster generation\n * - Better generation parameters for code tasks\n * - System prompt caching\n *\n * Note: onnxruntime-node is optional - will error if not installed.\n *\n * NOTE (ruvnet/ruflo#2048): the previous top-level `await import('onnxruntime-node')`\n * fired the native-binding load (`onnxruntime_binding.node`) at module\n * import time. On Windows this crashes with \"OS cannot run %1\" — and the\n * crash propagated to any consumer that transitively imports this file\n * (e.g. `agentic-flow/reasoningbank` via `core/distill → router/router`).\n * This file does not use `ort` directly — the base `ONNXLocalProvider`\n * it extends does, and that file now lazy-loads ort on first session\n * init. So we just drop the eager top-level load here.\n */\n\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n ChatParams,\n ChatResponse,\n Message,\n ContentBlock,\n ProviderError\n} from '../types.js';\nimport { ONNXLocalProvider, ONNXLocalConfig } from './onnx-local.js';\n\nexport interface OptimizedONNXConfig extends ONNXLocalConfig {\n maxContextTokens?: number;\n slidingWindow?: boolean;\n cacheSystemPrompts?: boolean;\n promptOptimization?: boolean;\n topK?: number;\n topP?: number;\n repetitionPenalty?: number;\n}\n\nexport class OptimizedONNXProvider extends ONNXLocalProvider {\n private optimizedConfig: Required<OptimizedONNXConfig>;\n private kvCachePool: Map<string, any> = new Map();\n private systemPromptCache: Map<string, { tokens: number[]; timestamp: number }> = new Map();\n\n constructor(config: OptimizedONNXConfig = {}) {\n super(config);\n\n this.optimizedConfig = {\n modelPath: config.modelPath || './models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 200,\n temperature: config.temperature || 0.3, // Lower for code (more deterministic)\n maxContextTokens: config.maxContextTokens || 2048, // Keep under 4K limit\n slidingWindow: config.slidingWindow !== false, // Default true\n cacheSystemPrompts: config.cacheSystemPrompts !== false, // Default true\n promptOptimization: config.promptOptimization !== false, // Default true\n topK: config.topK || 50,\n topP: config.topP || 0.9,\n repetitionPenalty: config.repetitionPenalty || 1.1\n };\n }\n\n /**\n * Estimate token count for a string\n */\n private estimateTokens(text: string): number {\n // Rough estimate: 1 token ≈ 4 characters for English\n return Math.ceil(text.length / 4);\n }\n\n /**\n * Optimize messages using sliding window context pruning\n */\n private optimizeContext(messages: Message[]): Message[] {\n if (!this.optimizedConfig.slidingWindow) {\n return messages;\n }\n\n const maxTokens = this.optimizedConfig.maxContextTokens;\n let totalTokens = 0;\n const optimized: Message[] = [];\n\n // Always keep system message if present\n const systemMsg = messages.find(m => m.role === 'system');\n if (systemMsg) {\n const content = typeof systemMsg.content === 'string'\n ? systemMsg.content\n : systemMsg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n optimized.push(systemMsg);\n totalTokens += this.estimateTokens(content);\n }\n\n // Add recent messages from end (most relevant)\n for (let i = messages.length - 1; i >= 0; i--) {\n const msg = messages[i];\n\n // Skip if already added (system message)\n if (msg.role === 'system') continue;\n\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n const tokens = this.estimateTokens(content);\n\n if (totalTokens + tokens > maxTokens) {\n console.log(`📊 Context pruned: Saved ${messages.length - optimized.length} messages, ~${totalTokens} tokens kept`);\n break;\n }\n\n optimized.unshift(msg);\n totalTokens += tokens;\n }\n\n // Ensure at least user message exists\n if (optimized.length === 0 || !optimized.some(m => m.role === 'user')) {\n const lastUserMsg = messages.slice().reverse().find(m => m.role === 'user');\n if (lastUserMsg) optimized.push(lastUserMsg);\n }\n\n return optimized;\n }\n\n /**\n * Optimize prompt for better quality output\n */\n private optimizePrompt(messages: Message[]): Message[] {\n if (!this.optimizedConfig.promptOptimization) {\n return messages;\n }\n\n const optimized = messages.map(msg => {\n if (msg.role === 'user') {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n // Add quality indicators for code tasks\n const isCodeTask = /write|create|implement|generate|code|function|class|api/i.test(content);\n\n if (isCodeTask && !content.includes('include') && !content.includes('with')) {\n const enhancedContent = `${content}. Include: proper error handling, type hints/types, and edge case handling. Return clean, production-ready code.`;\n\n return {\n ...msg,\n content: enhancedContent\n };\n }\n }\n\n return msg;\n });\n\n return optimized;\n }\n\n /**\n * Enhanced chat with optimization\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n // Step 1: Optimize context (sliding window)\n let messages = this.optimizeContext(params.messages);\n\n // Step 2: Optimize prompts for quality\n messages = this.optimizePrompt(messages);\n\n // Step 3: Call base implementation with optimized messages\n const enhancedParams = {\n ...params,\n messages,\n temperature: params.temperature || this.optimizedConfig.temperature,\n maxTokens: params.maxTokens || this.optimizedConfig.maxTokens\n };\n\n const response = await super.chat(enhancedParams);\n\n // Add optimization metadata\n if (response.metadata) {\n response.metadata.optimizations = {\n contextPruning: this.optimizedConfig.slidingWindow,\n promptOptimization: this.optimizedConfig.promptOptimization,\n systemPromptCaching: this.optimizedConfig.cacheSystemPrompts,\n originalMessageCount: params.messages.length,\n optimizedMessageCount: messages.length\n };\n }\n\n return response;\n }\n\n /**\n * Get optimization info\n */\n getOptimizationInfo() {\n return {\n ...super.getModelInfo(),\n optimizations: {\n maxContextTokens: this.optimizedConfig.maxContextTokens,\n slidingWindow: this.optimizedConfig.slidingWindow,\n cacheSystemPrompts: this.optimizedConfig.cacheSystemPrompts,\n promptOptimization: this.optimizedConfig.promptOptimization,\n temperature: this.optimizedConfig.temperature,\n topK: this.optimizedConfig.topK,\n topP: this.optimizedConfig.topP,\n repetitionPenalty: this.optimizedConfig.repetitionPenalty\n },\n cacheStats: {\n kvCachePoolSize: this.kvCachePool.size,\n systemPromptCacheSize: this.systemPromptCache.size\n }\n };\n }\n\n /**\n * Clear caches\n */\n clearCaches() {\n this.kvCachePool.clear();\n this.systemPromptCache.clear();\n console.log('🧹 Caches cleared');\n }\n}\n"]}
@@ -3,6 +3,15 @@
3
3
  *
4
4
  * Uses onnxruntime-node for true local CPU/GPU inference
5
5
  * Falls back gracefully when native module isn't available (Windows)
6
+ *
7
+ * NOTE (ruvnet/ruflo#2048): `onnxruntime-node` is loaded LAZILY on first
8
+ * `initializeSession()` call, not at module import. The previous top-level
9
+ * `await import('onnxruntime-node')` fired the native-binding load
10
+ * (`onnxruntime_binding.node`) at module load time, which crashed Windows
11
+ * environments where the NAPI binary cannot be loaded — even when the
12
+ * consumer (e.g. `agentic-flow/reasoningbank`) never actually invokes
13
+ * the router. Moving the import inside `loadOrt()` keeps importing
14
+ * `reasoningbank` side-effect-free with respect to native bindings.
6
15
  */
7
16
  import type { LLMProvider, ChatParams, ChatResponse, StreamChunk } from '../types.js';
8
17
  export interface ONNXLocalConfig {
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAiBH,OAAO,KAAK,EACV,WAAW,EACX,UAAU,EACV,YAAY,EACZ,WAAW,EAIZ,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,WAAW;IACnD,IAAI,SAAgB;IACpB,IAAI,EAAG,QAAQ,CAAU;IACzB,iBAAiB,UAAS;IAC1B,aAAa,UAAS;IACtB,WAAW,UAAS;IAEpB,OAAO,CAAC,OAAO,CAAa;IAC5B,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,GAAE,eAAoB;IASxC;;OAEG;YACW,aAAa;IAc3B;;OAEG;IACH,OAAO,CAAC,MAAM;IAId;;OAEG;IACH,OAAO,CAAC,MAAM;IAgBd;;OAEG;YACW,iBAAiB;IAiD/B;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IA8BzB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA8IrD;;OAEG;IACI,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,cAAc,CAAC,WAAW,CAAC;IAI9D;;OAEG;IACH,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO;IAKjD;;OAEG;IACH,YAAY;;;;;;IASZ;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAU/B"}
1
+ {"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAqBH,OAAO,KAAK,EACV,WAAW,EACX,UAAU,EACV,YAAY,EACZ,WAAW,EAIZ,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,WAAW;IACnD,IAAI,SAAgB;IACpB,IAAI,EAAG,QAAQ,CAAU;IACzB,iBAAiB,UAAS;IAC1B,aAAa,UAAS;IACtB,WAAW,UAAS;IAEpB,OAAO,CAAC,OAAO,CAAa;IAC5B,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,GAAE,eAAoB;IASxC;;OAEG;YACW,aAAa;IAc3B;;OAEG;IACH,OAAO,CAAC,MAAM;IAId;;OAEG;IACH,OAAO,CAAC,MAAM;IAgBd;;OAEG;YACW,iBAAiB;IAkD/B;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IA8BzB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA8IrD;;OAEG;IACI,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,cAAc,CAAC,WAAW,CAAC;IAI9D;;OAEG;IACH,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO;IAKjD;;OAEG;IACH,YAAY;;;;;;IASZ;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAU/B"}
@@ -3,16 +3,30 @@
3
3
  *
4
4
  * Uses onnxruntime-node for true local CPU/GPU inference
5
5
  * Falls back gracefully when native module isn't available (Windows)
6
+ *
7
+ * NOTE (ruvnet/ruflo#2048): `onnxruntime-node` is loaded LAZILY on first
8
+ * `initializeSession()` call, not at module import. The previous top-level
9
+ * `await import('onnxruntime-node')` fired the native-binding load
10
+ * (`onnxruntime_binding.node`) at module load time, which crashed Windows
11
+ * environments where the NAPI binary cannot be loaded — even when the
12
+ * consumer (e.g. `agentic-flow/reasoningbank`) never actually invokes
13
+ * the router. Moving the import inside `loadOrt()` keeps importing
14
+ * `reasoningbank` side-effect-free with respect to native bindings.
6
15
  */
7
16
  let ort = null;
8
17
  let ortAvailable = false;
9
- // Dynamic import for optional onnxruntime-node
10
- try {
11
- ort = await import('onnxruntime-node');
12
- ortAvailable = true;
13
- }
14
- catch {
15
- console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
18
+ let ortLoaded = false;
19
+ async function loadOrt() {
20
+ if (ortLoaded)
21
+ return;
22
+ ortLoaded = true;
23
+ try {
24
+ ort = await import('onnxruntime-node');
25
+ ortAvailable = true;
26
+ }
27
+ catch {
28
+ console.warn('[ONNX] onnxruntime-node not available - local inference disabled');
29
+ }
16
30
  }
17
31
  import { get_encoding } from 'tiktoken';
18
32
  import { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';
@@ -82,6 +96,7 @@ export class ONNXLocalProvider {
82
96
  async initializeSession() {
83
97
  if (this.session)
84
98
  return;
99
+ await loadOrt();
85
100
  if (!ortAvailable || !ort) {
86
101
  throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');
87
102
  }
@@ -1 +1 @@
1
- {"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AACpB,IAAI,YAAY,GAAG,KAAK,CAAC;AAEzB,+CAA+C;AAC/C,IAAI,CAAC;IACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;IACvC,YAAY,GAAG,IAAI,CAAC;AACtB,CAAC;AAAC,MAAM,CAAC;IACP,OAAO,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;AACnF,CAAC;AAID,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAQ,IAAI,CAAC;IACpB,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,IAAI,CAAC,YAAY,IAAI,CAAC,GAAG,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;QACjG,CAAC;QAED,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAAwB,EAAE,CAAC;QAExC,uDAAuD;QACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;QAExC,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,WAAW,CACnD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,WAAW,CACrD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uDAAuD;gBACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;gBAExC,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,WAAW,CACjC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,WAAW,CACnC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAAwB;oBACjC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n * Falls back gracefully when native module isn't available (Windows)\n */\n\nlet ort: any = null;\nlet ortAvailable = false;\n\n// Dynamic import for optional onnxruntime-node\ntry {\n ort = await import('onnxruntime-node');\n ortAvailable = true;\n} catch {\n console.warn('[ONNX] onnxruntime-node not available - local inference disabled');\n}\n\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: any = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n if (!ortAvailable || !ort) {\n throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');\n }\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, any> = {};\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Create input tensor for current step\n const inputTensor = new TensorClass(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new TensorClass(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, any> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
1
+ {"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAEH,IAAI,GAAG,GAAQ,IAAI,CAAC;AACpB,IAAI,YAAY,GAAG,KAAK,CAAC;AACzB,IAAI,SAAS,GAAG,KAAK,CAAC;AAEtB,KAAK,UAAU,OAAO;IACpB,IAAI,SAAS;QAAE,OAAO;IACtB,SAAS,GAAG,IAAI,CAAC;IACjB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QACvC,YAAY,GAAG,IAAI,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,IAAI,CAAC,kEAAkE,CAAC,CAAC;IACnF,CAAC;AACH,CAAC;AAID,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAQ,IAAI,CAAC;IACpB,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,MAAM,OAAO,EAAE,CAAC;QAChB,IAAI,CAAC,YAAY,IAAI,CAAC,GAAG,EAAE,CAAC;YAC1B,MAAM,IAAI,KAAK,CAAC,6EAA6E,CAAC,CAAC;QACjG,CAAC;QAED,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAAwB,EAAE,CAAC;QAExC,uDAAuD;QACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;QAExC,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,WAAW,CACnD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,WAAW,CACrD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uDAAuD;gBACvD,MAAM,WAAW,GAAI,GAAW,CAAC,MAAM,CAAC;gBAExC,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,WAAW,CACjC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,WAAW,CACnC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAAwB;oBACjC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n * Falls back gracefully when native module isn't available (Windows)\n *\n * NOTE (ruvnet/ruflo#2048): `onnxruntime-node` is loaded LAZILY on first\n * `initializeSession()` call, not at module import. The previous top-level\n * `await import('onnxruntime-node')` fired the native-binding load\n * (`onnxruntime_binding.node`) at module load time, which crashed Windows\n * environments where the NAPI binary cannot be loaded — even when the\n * consumer (e.g. `agentic-flow/reasoningbank`) never actually invokes\n * the router. Moving the import inside `loadOrt()` keeps importing\n * `reasoningbank` side-effect-free with respect to native bindings.\n */\n\nlet ort: any = null;\nlet ortAvailable = false;\nlet ortLoaded = false;\n\nasync function loadOrt(): Promise<void> {\n if (ortLoaded) return;\n ortLoaded = true;\n try {\n ort = await import('onnxruntime-node');\n ortAvailable = true;\n } catch {\n console.warn('[ONNX] onnxruntime-node not available - local inference disabled');\n }\n}\n\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: any = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n await loadOrt();\n if (!ortAvailable || !ort) {\n throw new Error('onnxruntime-node not available - install with: npm install onnxruntime-node');\n }\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, any> = {};\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new TensorClass(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Get Tensor constructor - use any for flexible access\n const TensorClass = (ort as any).Tensor;\n\n // Create input tensor for current step\n const inputTensor = new TensorClass(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new TensorClass(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, any> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentic-flow",
3
- "version": "2.0.12",
3
+ "version": "2.0.13",
4
4
  "description": "Production-ready AI agent orchestration platform with 66 specialized agents, 213 MCP tools, ReasoningBank learning memory, and autonomous multi-agent swarms. Built by @ruvnet with Claude Agent SDK, neural networks, memory persistence, GitHub integration, and distributed consensus protocols.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",