agentic-flow 2.0.1-alpha.4 → 2.0.1-alpha.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/dist/.tsbuildinfo +1 -1
- package/dist/agentdb/controllers/EmbeddingService.d.ts +37 -0
- package/dist/agentdb/controllers/EmbeddingService.d.ts.map +1 -0
- package/dist/agentdb/controllers/EmbeddingService.js +1 -0
- package/dist/agentdb/controllers/EmbeddingService.js.map +1 -0
- package/dist/billing/mcp/tools.d.ts.map +1 -1
- package/dist/billing/mcp/tools.js +2 -0
- package/dist/billing/mcp/tools.js.map +1 -1
- package/dist/cli/commands/hooks.d.ts +18 -0
- package/dist/cli/commands/hooks.d.ts.map +1 -0
- package/dist/cli/commands/hooks.js +750 -0
- package/dist/cli/commands/hooks.js.map +1 -0
- package/dist/cli-proxy.js +26 -1
- package/dist/cli-proxy.js.map +1 -1
- package/dist/core/agentdb-fast.js +3 -3
- package/dist/core/agentdb-fast.js.map +1 -1
- package/dist/core/agentdb-wrapper-enhanced.d.ts.map +1 -1
- package/dist/core/agentdb-wrapper-enhanced.js +32 -17
- package/dist/core/agentdb-wrapper-enhanced.js.map +1 -1
- package/dist/core/attention-native.d.ts +1 -0
- package/dist/core/attention-native.d.ts.map +1 -1
- package/dist/core/attention-native.js +6 -1
- package/dist/core/attention-native.js.map +1 -1
- package/dist/federation/integrations/supabase-adapter-debug.js +3 -3
- package/dist/federation/integrations/supabase-adapter-debug.js.map +1 -1
- package/dist/intelligence/RuVectorIntelligence.d.ts +362 -0
- package/dist/intelligence/RuVectorIntelligence.d.ts.map +1 -0
- package/dist/intelligence/RuVectorIntelligence.js +852 -0
- package/dist/intelligence/RuVectorIntelligence.js.map +1 -0
- package/dist/intelligence/index.d.ts +14 -0
- package/dist/intelligence/index.d.ts.map +1 -0
- package/dist/intelligence/index.js +14 -0
- package/dist/intelligence/index.js.map +1 -0
- package/dist/llm/RuvLLMOrchestrator.d.ts +184 -0
- package/dist/llm/RuvLLMOrchestrator.d.ts.map +1 -0
- package/dist/llm/RuvLLMOrchestrator.js +442 -0
- package/dist/llm/RuvLLMOrchestrator.js.map +1 -0
- package/dist/llm/index.d.ts +9 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +8 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/mcp/claudeFlowSdkServer.d.ts.map +1 -1
- package/dist/mcp/claudeFlowSdkServer.js +86 -21
- package/dist/mcp/claudeFlowSdkServer.js.map +1 -1
- package/dist/mcp/fastmcp/servers/hooks-server.d.ts +15 -0
- package/dist/mcp/fastmcp/servers/hooks-server.d.ts.map +1 -0
- package/dist/mcp/fastmcp/servers/hooks-server.js +63 -0
- package/dist/mcp/fastmcp/servers/hooks-server.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/benchmark.d.ts +20 -0
- package/dist/mcp/fastmcp/tools/hooks/benchmark.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/benchmark.js +110 -0
- package/dist/mcp/fastmcp/tools/hooks/benchmark.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/build-agents.d.ts +7 -0
- package/dist/mcp/fastmcp/tools/hooks/build-agents.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/build-agents.js +276 -0
- package/dist/mcp/fastmcp/tools/hooks/build-agents.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/explain.d.ts +6 -0
- package/dist/mcp/fastmcp/tools/hooks/explain.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/explain.js +164 -0
- package/dist/mcp/fastmcp/tools/hooks/explain.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/index.d.ts +28 -0
- package/dist/mcp/fastmcp/tools/hooks/index.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/index.js +59 -0
- package/dist/mcp/fastmcp/tools/hooks/index.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.d.ts +91 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.js +269 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-bridge.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-tools.d.ts +58 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-tools.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-tools.js +416 -0
- package/dist/mcp/fastmcp/tools/hooks/intelligence-tools.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/metrics.d.ts +6 -0
- package/dist/mcp/fastmcp/tools/hooks/metrics.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/metrics.js +137 -0
- package/dist/mcp/fastmcp/tools/hooks/metrics.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/post-command.d.ts +7 -0
- package/dist/mcp/fastmcp/tools/hooks/post-command.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/post-command.js +91 -0
- package/dist/mcp/fastmcp/tools/hooks/post-command.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/post-edit.d.ts +12 -0
- package/dist/mcp/fastmcp/tools/hooks/post-edit.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/post-edit.js +146 -0
- package/dist/mcp/fastmcp/tools/hooks/post-edit.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/pre-command.d.ts +7 -0
- package/dist/mcp/fastmcp/tools/hooks/pre-command.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/pre-command.js +70 -0
- package/dist/mcp/fastmcp/tools/hooks/pre-command.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/pre-edit.d.ts +14 -0
- package/dist/mcp/fastmcp/tools/hooks/pre-edit.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/pre-edit.js +121 -0
- package/dist/mcp/fastmcp/tools/hooks/pre-edit.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/pretrain.d.ts +7 -0
- package/dist/mcp/fastmcp/tools/hooks/pretrain.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/pretrain.js +171 -0
- package/dist/mcp/fastmcp/tools/hooks/pretrain.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/route.d.ts +12 -0
- package/dist/mcp/fastmcp/tools/hooks/route.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/route.js +267 -0
- package/dist/mcp/fastmcp/tools/hooks/route.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/shared.d.ts +46 -0
- package/dist/mcp/fastmcp/tools/hooks/shared.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/shared.js +159 -0
- package/dist/mcp/fastmcp/tools/hooks/shared.js.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/transfer.d.ts +7 -0
- package/dist/mcp/fastmcp/tools/hooks/transfer.d.ts.map +1 -0
- package/dist/mcp/fastmcp/tools/hooks/transfer.js +151 -0
- package/dist/mcp/fastmcp/tools/hooks/transfer.js.map +1 -0
- package/dist/mcp/tools/agent-booster-tools.d.ts +10 -1
- package/dist/mcp/tools/agent-booster-tools.d.ts.map +1 -1
- package/dist/mcp/tools/agent-booster-tools.js.map +1 -1
- package/dist/mcp/tools/sona-tools.d.ts.map +1 -1
- package/dist/mcp/tools/sona-tools.js +15 -3
- package/dist/mcp/tools/sona-tools.js.map +1 -1
- package/dist/memory/SharedMemoryPool.d.ts +16 -3
- package/dist/memory/SharedMemoryPool.d.ts.map +1 -1
- package/dist/memory/SharedMemoryPool.js +33 -1
- package/dist/memory/SharedMemoryPool.js.map +1 -1
- package/dist/middleware/auth.middleware.d.ts +114 -0
- package/dist/middleware/auth.middleware.d.ts.map +1 -0
- package/dist/middleware/auth.middleware.js +222 -0
- package/dist/middleware/auth.middleware.js.map +1 -0
- package/dist/optimizations/agent-booster-migration.d.ts.map +1 -1
- package/dist/optimizations/agent-booster-migration.js.map +1 -1
- package/dist/proxy/anthropic-to-gemini.d.ts.map +1 -1
- package/dist/proxy/anthropic-to-gemini.js.map +1 -1
- package/dist/proxy/anthropic-to-openrouter.d.ts.map +1 -1
- package/dist/proxy/anthropic-to-openrouter.js.map +1 -1
- package/dist/proxy/anthropic-to-requesty.d.ts.map +1 -1
- package/dist/proxy/anthropic-to-requesty.js.map +1 -1
- package/dist/proxy/quic-proxy.d.ts +0 -1
- package/dist/proxy/quic-proxy.d.ts.map +1 -1
- package/dist/proxy/quic-proxy.js +2 -1
- package/dist/proxy/quic-proxy.js.map +1 -1
- package/dist/reasoningbank/AdvancedMemory.d.ts.map +1 -1
- package/dist/reasoningbank/AdvancedMemory.js +12 -1
- package/dist/reasoningbank/AdvancedMemory.js.map +1 -1
- package/dist/reasoningbank/HybridBackend.d.ts +9 -0
- package/dist/reasoningbank/HybridBackend.d.ts.map +1 -1
- package/dist/reasoningbank/HybridBackend.js +48 -4
- package/dist/reasoningbank/HybridBackend.js.map +1 -1
- package/dist/reasoningbank/backend-selector.d.ts +1 -1
- package/dist/reasoningbank/backend-selector.d.ts.map +1 -1
- package/dist/reasoningbank/backend-selector.js.map +1 -1
- package/dist/reasoningbank/index-new.d.ts +0 -6
- package/dist/reasoningbank/index-new.d.ts.map +1 -1
- package/dist/reasoningbank/index-new.js +9 -7
- package/dist/reasoningbank/index-new.js.map +1 -1
- package/dist/reasoningbank/index.d.ts +1 -6
- package/dist/reasoningbank/index.d.ts.map +1 -1
- package/dist/reasoningbank/index.js +10 -7
- package/dist/reasoningbank/index.js.map +1 -1
- package/dist/router/providers/onnx-local.d.ts.map +1 -1
- package/dist/router/providers/onnx-local.js +3 -1
- package/dist/router/providers/onnx-local.js.map +1 -1
- package/dist/routing/CircuitBreakerRouter.d.ts +187 -0
- package/dist/routing/CircuitBreakerRouter.d.ts.map +1 -0
- package/dist/routing/CircuitBreakerRouter.js +460 -0
- package/dist/routing/CircuitBreakerRouter.js.map +1 -0
- package/dist/routing/SemanticRouter.d.ts +164 -0
- package/dist/routing/SemanticRouter.d.ts.map +1 -0
- package/dist/routing/SemanticRouter.js +291 -0
- package/dist/routing/SemanticRouter.js.map +1 -0
- package/dist/routing/index.d.ts +12 -0
- package/dist/routing/index.d.ts.map +1 -0
- package/dist/routing/index.js +10 -0
- package/dist/routing/index.js.map +1 -0
- package/dist/services/embedding-service.d.ts.map +1 -1
- package/dist/services/embedding-service.js +5 -2
- package/dist/services/embedding-service.js.map +1 -1
- package/dist/services/sona-agent-training.js +1 -1
- package/dist/services/sona-agent-training.js.map +1 -1
- package/dist/services/sona-agentdb-integration.d.ts.map +1 -1
- package/dist/services/sona-agentdb-integration.js +10 -5
- package/dist/services/sona-agentdb-integration.js.map +1 -1
- package/dist/services/sona-service.d.ts +6 -6
- package/dist/services/sona-service.d.ts.map +1 -1
- package/dist/services/sona-service.js +3 -1
- package/dist/services/sona-service.js.map +1 -1
- package/dist/utils/audit-logger.d.ts +115 -0
- package/dist/utils/audit-logger.d.ts.map +1 -0
- package/dist/utils/audit-logger.js +228 -0
- package/dist/utils/audit-logger.js.map +1 -0
- package/dist/utils/cli.d.ts +1 -1
- package/dist/utils/cli.d.ts.map +1 -1
- package/dist/utils/cli.js +5 -0
- package/dist/utils/cli.js.map +1 -1
- package/dist/utils/input-validator.d.ts +116 -0
- package/dist/utils/input-validator.d.ts.map +1 -0
- package/dist/utils/input-validator.js +299 -0
- package/dist/utils/input-validator.js.map +1 -0
- package/dist/utils/rate-limiter.js +2 -2
- package/dist/utils/rate-limiter.js.map +1 -1
- package/package.json +5 -2
- package/wasm/reasoningbank/reasoningbank_wasm_bg.js +2 -2
- package/wasm/reasoningbank/reasoningbank_wasm_bg.wasm +0 -0
|
@@ -8,14 +8,17 @@
|
|
|
8
8
|
import '../utils/agentdb-runtime-patch.js';
|
|
9
9
|
// New hybrid backend (recommended for new code)
|
|
10
10
|
export { HybridReasoningBank } from './HybridBackend.js';
|
|
11
|
+
export { HybridReasoningBank as ReasoningBank } from './HybridBackend.js'; // Backwards compatibility alias
|
|
11
12
|
export { AdvancedMemorySystem } from './AdvancedMemory.js';
|
|
12
|
-
//
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
export {
|
|
16
|
-
export {
|
|
17
|
-
export {
|
|
18
|
-
export {
|
|
13
|
+
// AgentDB controllers - these are available in agentdb v3.x
|
|
14
|
+
// For now, use HybridReasoningBank which provides equivalent functionality
|
|
15
|
+
// TODO: Re-enable when agentdb v3.x controllers are available
|
|
16
|
+
// export { ReflexionMemory } from 'agentdb/controllers/ReflexionMemory';
|
|
17
|
+
// export { SkillLibrary } from 'agentdb/controllers/SkillLibrary';
|
|
18
|
+
// export { CausalMemoryGraph } from 'agentdb/controllers/CausalMemoryGraph';
|
|
19
|
+
// export { CausalRecall } from 'agentdb/controllers/CausalRecall';
|
|
20
|
+
// export { NightlyLearner } from 'agentdb/controllers/NightlyLearner';
|
|
21
|
+
// export { EmbeddingService } from 'agentdb/controllers/EmbeddingService';
|
|
19
22
|
// Original ReasoningBank implementations (backwards compatibility)
|
|
20
23
|
export { retrieveMemories, formatMemoriesForPrompt } from './core/retrieve.js';
|
|
21
24
|
export { judgeTrajectory } from './core/judge.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/reasoningbank/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,yDAAyD;AACzD,OAAO,mCAAmC,CAAC;AAE3C,gDAAgD;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/reasoningbank/index.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,yDAAyD;AACzD,OAAO,mCAAmC,CAAC;AAE3C,gDAAgD;AAChD,OAAO,EAAE,mBAAmB,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,EAAE,mBAAmB,IAAI,aAAa,EAAE,MAAM,oBAAoB,CAAC,CAAC,gCAAgC;AAC3G,OAAO,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAI3D,4DAA4D;AAC5D,2EAA2E;AAC3E,8DAA8D;AAC9D,yEAAyE;AACzE,mEAAmE;AACnE,6EAA6E;AAC7E,mEAAmE;AACnE,uEAAuE;AACvE,2EAA2E;AAE3E,mEAAmE;AACnE,OAAO,EAAE,gBAAgB,EAAE,uBAAuB,EAAE,MAAM,oBAAoB,CAAC;AAG/E,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAGlD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAGpD,OAAO,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,uBAAuB,CAAC;AAGvE,OAAO,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAGjE,OAAO,EAAE,gBAAgB,EAAE,mBAAmB,EAAE,MAAM,uBAAuB,CAAC;AAC9E,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAChE,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAE/C,+BAA+B;AAC/B,OAAO,KAAK,EAAE,MAAM,iBAAiB,CAAC;AACtC,OAAO,EAAE,EAAE,EAAE,CAAC;AAYd,+CAA+C;AAC/C,OAAO,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACtD,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AACpD,OAAO,EAAE,iBAAiB,IAAI,UAAU,EAAE,WAAW,IAAI,IAAI,EAAE,MAAM,uBAAuB,CAAC;AAE7F,MAAM,CAAC,KAAK,UAAU,UAAU;IAC9B,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAC5B,OAAO,CAAC,GAAG,CAAC,iCAAiC,CAAC,CAAC;IAC/C,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CAAC,6BAA6B,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,kBAAkB,EAAE,CAAC,CAAC;IAClG,OAAO,CAAC,GAAG,CAAC,+BAA+B,MAAM,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;IACzE,OAAO,CAAC,GAAG,CAAC,gCAAgC,MAAM,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAC;IAEjE,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,aAAa,EAAE,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;IAChE,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;QACzD,MAAM,IAAI,KAAK,CAAC,+DAA+D,CAAC,CAAC;IACnF,CAAC;IAED,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,EAAE,CAAC,KAAK,EAAE,CAAC;QAC1B,MAAM,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,4EAA4E,CAAC,CAAC,GAAG,EAAE,CAAC;QAClH,OAAO,CAAC,GAAG,CAAC,gCAAgC,MAAM,CAAC,MAAM,eAAe,CAAC,CAAC;IAC5E,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,iCAAiC,EAAE,KAAK,CAAC,CAAC;QACxD,MAAM,IAAI,KAAK,CAAC,8DAA8D,CAAC,CAAC;IAClF,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,yCAAyC,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAAC,OAM7B;IAMC,OAAO,CAAC,GAAG,CAAC,iCAAiC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IAC/D,MAAM,QAAQ,GAAG,MAAM,gBAAgB,CAAC,OAAO,CAAC,KAAK,EAAE;QACrD,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,KAAK,EAAE,OAAO,CAAC,OAAO;KACvB,CAAC,CAAC;IACH,OAAO,CAAC,GAAG,CAAC,6BAA6B,QAAQ,CAAC,MAAM,WAAW,CAAC,CAAC;IACrE,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;IACrD,MAAM,OAAO,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC;IACjE,OAAO,CAAC,GAAG,CAAC,4BAA4B,OAAO,CAAC,KAAK,KAAK,OAAO,CAAC,UAAU,GAAG,CAAC,CAAC;IACjF,MAAM,WAAW,GAAG,MAAM,eAAe,CAAC,UAAU,EAAE,OAAO,EAAE,OAAO,CAAC,KAAK,EAAE;QAC5E,MAAM,EAAE,OAAO,CAAC,MAAM;QACtB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,MAAM,EAAE,OAAO,CAAC,MAAM;KACvB,CAAC,CAAC;IACH,OAAO,CAAC,GAAG,CAAC,6BAA6B,WAAW,CAAC,MAAM,eAAe,CAAC,CAAC;IAC5E,IAAI,YAAY,GAAG,KAAK,CAAC;IACzB,IAAI,UAAU,EAAE,EAAE,CAAC;QACjB,OAAO,CAAC,GAAG,CAAC,0CAA0C,CAAC,CAAC;QACxD,MAAM,IAAI,EAAE,CAAC;QACb,YAAY,GAAG,IAAI,CAAC;IACtB,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC;AACxE,CAAC;AAED,MAAM,CAAC,MAAM,OAAO,GAAG,OAAO,CAAC;AAC/B,MAAM,CAAC,MAAM,SAAS,GAAG,qCAAqC,CAAC","sourcesContent":["/**\n * ReasoningBank - Closed-loop memory system for AI agents\n * Based on arXiv:2509.25140 (Google DeepMind)\n *\n * @since v1.7.0 - Integrated AgentDB for optimal performance\n */\n\n// Apply AgentDB runtime patch before any AgentDB imports\nimport '../utils/agentdb-runtime-patch.js';\n\n// New hybrid backend (recommended for new code)\nexport { HybridReasoningBank } from './HybridBackend.js';\nexport { HybridReasoningBank as ReasoningBank } from './HybridBackend.js'; // Backwards compatibility alias\nexport { AdvancedMemorySystem } from './AdvancedMemory.js';\nexport type { PatternData, RetrievalOptions, CausalInsight } from './HybridBackend.js';\nexport type { FailureAnalysis, SkillComposition } from './AdvancedMemory.js';\n\n// AgentDB controllers - these are available in agentdb v3.x\n// For now, use HybridReasoningBank which provides equivalent functionality\n// TODO: Re-enable when agentdb v3.x controllers are available\n// export { ReflexionMemory } from 'agentdb/controllers/ReflexionMemory';\n// export { SkillLibrary } from 'agentdb/controllers/SkillLibrary';\n// export { CausalMemoryGraph } from 'agentdb/controllers/CausalMemoryGraph';\n// export { CausalRecall } from 'agentdb/controllers/CausalRecall';\n// export { NightlyLearner } from 'agentdb/controllers/NightlyLearner';\n// export { EmbeddingService } from 'agentdb/controllers/EmbeddingService';\n\n// Original ReasoningBank implementations (backwards compatibility)\nexport { retrieveMemories, formatMemoriesForPrompt } from './core/retrieve.js';\nexport type { RetrievedMemory } from './core/retrieve.js';\n\nexport { judgeTrajectory } from './core/judge.js';\nexport type { Verdict } from './core/judge.js';\n\nexport { distillMemories } from './core/distill.js';\nexport type { DistilledMemory } from './core/distill.js';\n\nexport { consolidate, shouldConsolidate } from './core/consolidate.js';\nexport type { ConsolidationResult } from './core/consolidate.js';\n\nexport { mattsParallel, mattsSequential } from './core/matts.js';\nexport type { MattsResult } from './core/matts.js';\n\nexport { computeEmbedding, clearEmbeddingCache } from './utils/embeddings.js';\nexport { mmrSelection, cosineSimilarity } from './utils/mmr.js';\nexport { scrubPII, containsPII, scrubMemory } from './utils/pii-scrubber.js';\nexport { loadConfig } from './utils/config.js';\n\n// Re-export database utilities\nimport * as db from './db/queries.js';\nexport { db };\nexport type {\n ReasoningMemory,\n PatternEmbedding,\n PatternLink,\n TaskTrajectory,\n MattsRun,\n ConsolidationRun,\n Trajectory,\n TrajectoryStep\n} from './db/schema.js';\n\n// Original functions (backwards compatibility)\nimport { loadConfig } from './utils/config.js';\nimport { retrieveMemories } from './core/retrieve.js';\nimport { judgeTrajectory } from './core/judge.js';\nimport { distillMemories } from './core/distill.js';\nimport { shouldConsolidate as shouldCons, consolidate as cons } from './core/consolidate.js';\n\nexport async function initialize(): Promise<void> {\n const config = loadConfig();\n console.log('[ReasoningBank] Initializing...');\n console.log('[ReasoningBank] Enabled: true (initializing...)');\n console.log(`[ReasoningBank] Database: ${process.env.CLAUDE_FLOW_DB_PATH || '.swarm/memory.db'}`);\n console.log(`[ReasoningBank] Embeddings: ${config.embeddings.provider}`);\n console.log(`[ReasoningBank] Retrieval k: ${config.retrieve.k}`);\n\n try {\n await db.runMigrations();\n console.log(`[ReasoningBank] Database migrated successfully`);\n } catch (error) {\n console.error('[ReasoningBank] Migration error:', error);\n throw new Error('ReasoningBank initialization failed: could not run migrations');\n }\n\n try {\n const dbConn = db.getDb();\n const tables = dbConn.prepare(\"SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'pattern%'\").all();\n console.log(`[ReasoningBank] Database OK: ${tables.length} tables found`);\n } catch (error) {\n console.error('[ReasoningBank] Database error:', error);\n throw new Error('ReasoningBank initialization failed: database not accessible');\n }\n\n console.log('[ReasoningBank] Initialization complete');\n}\n\nexport async function runTask(options: {\n taskId: string;\n agentId: string;\n query: string;\n domain?: string;\n executeFn: (memories: any[]) => Promise<any>;\n}): Promise<{\n verdict: any;\n usedMemories: any[];\n newMemories: string[];\n consolidated: boolean;\n}> {\n console.log(`[ReasoningBank] Running task: ${options.taskId}`);\n const memories = await retrieveMemories(options.query, {\n domain: options.domain,\n agent: options.agentId\n });\n console.log(`[ReasoningBank] Retrieved ${memories.length} memories`);\n const trajectory = await options.executeFn(memories);\n const verdict = await judgeTrajectory(trajectory, options.query);\n console.log(`[ReasoningBank] Verdict: ${verdict.label} (${verdict.confidence})`);\n const newMemories = await distillMemories(trajectory, verdict, options.query, {\n taskId: options.taskId,\n agentId: options.agentId,\n domain: options.domain\n });\n console.log(`[ReasoningBank] Distilled ${newMemories.length} new memories`);\n let consolidated = false;\n if (shouldCons()) {\n console.log('[ReasoningBank] Running consolidation...');\n await cons();\n consolidated = true;\n }\n return { verdict, usedMemories: memories, newMemories, consolidated };\n}\n\nexport const VERSION = '1.7.1';\nexport const PAPER_URL = 'https://arxiv.org/html/2509.25140v1';\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH,OAAO,KAAK,EACV,WAAW,EACX,UAAU,EACV,YAAY,EACZ,WAAW,EAIZ,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,WAAW;IACnD,IAAI,SAAgB;IACpB,IAAI,EAAG,QAAQ,CAAU;IACzB,iBAAiB,UAAS;IAC1B,aAAa,UAAS;IACtB,WAAW,UAAS;IAEpB,OAAO,CAAC,OAAO,CAAqC;IACpD,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,GAAE,eAAoB;IASxC;;OAEG;YACW,aAAa;IAc3B;;OAEG;IACH,OAAO,CAAC,MAAM;IAId;;OAEG;IACH,OAAO,CAAC,MAAM;IAgBd;;OAEG;YACW,iBAAiB;IA6C/B;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;
|
|
1
|
+
{"version":3,"file":"onnx-local.d.ts","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAOH,OAAO,KAAK,EACV,WAAW,EACX,UAAU,EACV,YAAY,EACZ,WAAW,EAIZ,MAAM,aAAa,CAAC;AAErB,MAAM,WAAW,eAAe;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,kBAAkB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC9B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,iBAAkB,YAAW,WAAW;IACnD,IAAI,SAAgB;IACpB,IAAI,EAAG,QAAQ,CAAU;IACzB,iBAAiB,UAAS;IAC1B,aAAa,UAAS;IACtB,WAAW,UAAS;IAEpB,OAAO,CAAC,OAAO,CAAqC;IACpD,OAAO,CAAC,MAAM,CAA4B;IAC1C,OAAO,CAAC,SAAS,CAAa;IAC9B,OAAO,CAAC,QAAQ,CAAa;gBAEjB,MAAM,GAAE,eAAoB;IASxC;;OAEG;YACW,aAAa;IAc3B;;OAEG;IACH,OAAO,CAAC,MAAM;IAId;;OAEG;IACH,OAAO,CAAC,MAAM;IAgBd;;OAEG;YACW,iBAAiB;IA6C/B;;OAEG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IA6BzB;;OAEG;IACG,IAAI,CAAC,MAAM,EAAE,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC;IA2IrD;;OAEG;IACI,MAAM,CAAC,MAAM,EAAE,UAAU,GAAG,cAAc,CAAC,WAAW,CAAC;IAI9D;;OAEG;IACH,oBAAoB,CAAC,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO;IAKjD;;OAEG;IACH,YAAY;;;;;;IASZ;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAU/B"}
|
|
@@ -134,11 +134,13 @@ export class ONNXLocalProvider {
|
|
|
134
134
|
const numLayers = 32;
|
|
135
135
|
const numKVHeads = 8;
|
|
136
136
|
const headDim = 128; // 3072 / 24 = 128
|
|
137
|
+
// Use any type for KV cache since ort.Tensor is a value, not a type
|
|
137
138
|
const kvCache = {};
|
|
138
139
|
// Initialize empty cache for each layer (key and value)
|
|
139
140
|
for (let i = 0; i < numLayers; i++) {
|
|
140
141
|
// Empty cache: [batch_size, num_kv_heads, 0, head_dim]
|
|
141
142
|
const emptyCache = new Float32Array(0);
|
|
143
|
+
// Cast to any to avoid TS issues with onnxruntime-node Tensor constructor
|
|
142
144
|
kvCache[`past_key_values.${i}.key`] = new ort.Tensor('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
|
|
143
145
|
kvCache[`past_key_values.${i}.value`] = new ort.Tensor('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
|
|
144
146
|
}
|
|
@@ -168,7 +170,7 @@ export class ONNXLocalProvider {
|
|
|
168
170
|
// For first step, use all input tokens; for subsequent steps, use only last token
|
|
169
171
|
const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];
|
|
170
172
|
const currentSeqLen = currentInputIds.length;
|
|
171
|
-
// Create input tensor for current step
|
|
173
|
+
// Create input tensor for current step (cast to any for TS compatibility)
|
|
172
174
|
const inputTensor = new ort.Tensor('int64', BigInt64Array.from(currentInputIds.map(BigInt)), [1, currentSeqLen]);
|
|
173
175
|
// Create attention mask for current step
|
|
174
176
|
const totalSeqLen = allTokenIds.length;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,GAAG,MAAM,kBAAkB,CAAC;AAGxC,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAgC,IAAI,CAAC;IAC5C,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,MAAM,OAAO,GAA+B,EAAE,CAAC;QAE/C,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CAClD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAI,GAAG,CAAC,MAAM,CACpD,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,uCAAuC;gBACvC,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,MAAM,CAChC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,MAAM,CAClC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAA+B;oBACxC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n */\n\nimport * as ort from 'onnxruntime-node';\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: ort.InferenceSession | null = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n const kvCache: Record<string, ort.Tensor> = {};\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n kvCache[`past_key_values.${i}.key`] = new ort.Tensor(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new ort.Tensor(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Create input tensor for current step\n const inputTensor = new ort.Tensor(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new ort.Tensor(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, ort.Tensor> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
|
|
1
|
+
{"version":3,"file":"onnx-local.js","sourceRoot":"","sources":["../../../src/router/providers/onnx-local.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,GAAG,MAAM,kBAAkB,CAAC;AAGxC,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AACxC,OAAO,EAAE,eAAe,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAkBnF,MAAM,OAAO,iBAAiB;IAC5B,IAAI,GAAG,YAAY,CAAC;IACpB,IAAI,GAAG,QAAiB,CAAC;IACzB,iBAAiB,GAAG,KAAK,CAAC,CAAC,mDAAmD;IAC9E,aAAa,GAAG,KAAK,CAAC;IACtB,WAAW,GAAG,KAAK,CAAC;IAEZ,OAAO,GAAgC,IAAI,CAAC;IAC5C,MAAM,CAA4B;IAClC,SAAS,GAAQ,IAAI,CAAC;IACtB,QAAQ,GAAQ,IAAI,CAAC;IAE7B,YAAY,SAA0B,EAAE;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,4EAA4E;YAC3G,kBAAkB,EAAE,MAAM,CAAC,kBAAkB,IAAI,CAAC,KAAK,CAAC;YACxD,SAAS,EAAE,MAAM,CAAC,SAAS,IAAI,GAAG;YAClC,WAAW,EAAE,MAAM,CAAC,WAAW,IAAI,GAAG;SACvC,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa;QACzB,IAAI,IAAI,CAAC,QAAQ;YAAE,OAAO;QAE1B,IAAI,CAAC;YACH,qDAAqD;YACrD,IAAI,CAAC,QAAQ,GAAG,YAAY,CAAC,aAAa,CAAC,CAAC;YAE5C,OAAO,CAAC,GAAG,CAAC,2CAA2C,CAAC,CAAC;QAC3D,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,4BAA4B,EAAE,KAAK,CAAC,CAAC;YACnD,MAAM,IAAI,KAAK,CAAC,6BAA6B,KAAK,EAAE,CAAC,CAAC;QACxD,CAAC;IACH,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,IAAY;QACzB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IAChD,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,GAAa;QAC1B,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC;YAC3D,6CAA6C;YAC7C,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE,CAAC;gBAChC,OAAO,OAAO,CAAC;YACjB,CAAC;iBAAM,IAAI,OAAO,YAAY,UAAU,IAAI,OAAO,YAAY,MAAM,EAAE,CAAC;gBACtE,OAAO,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YAC3C,CAAC;YACD,OAAO,MAAM,CAAC,OAAO,CAAC,CAAC;QACzB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;YACxD,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,iBAAiB;QAC7B,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO;QAEzB,IAAI,CAAC;YACH,6BAA6B;YAC7B,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;YAEnD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,CAAC,QAAQ,EAAE,EAAE;gBACnD,IAAI,QAAQ,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,iBAAiB;oBACnD,OAAO,CAAC,GAAG,CAAC,sBAAsB,eAAe,CAAC,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;gBAChF,CAAC;YACH,CAAC,CAAC,CAAC;YAEH,uCAAuC;YACvC,IAAI,CAAC,MAAM,CAAC,SAAS,GAAG,SAAS,CAAC;YAElC,OAAO,CAAC,GAAG,CAAC,0BAA0B,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE/D,IAAI,CAAC,OAAO,GAAG,MAAM,GAAG,CAAC,gBAAgB,CAAC,MAAM,CAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,EACrB;gBACE,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAyB;gBACzD,sBAAsB,EAAE,KAAK;gBAC7B,iBAAiB,EAAE,IAAI;gBACvB,gBAAgB,EAAE,IAAI;aACvB,CACF,CAAC;YAEF,OAAO,CAAC,GAAG,CAAC,qBAAqB,CAAC,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,2BAA2B,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEpF,iBAAiB;YACjB,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAE7B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,oCAAoC,KAAK,EAAE;gBACpD,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,KAAK;aACjB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAmB;QACxC,IAAI,MAAM,GAAG,EAAE,CAAC;QAEhB,KAAK,MAAM,GAAG,IAAI,QAAQ,EAAE,CAAC;YAC3B,MAAM,OAAO,GAAG,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ;gBAC7C,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAEnE,IAAI,GAAG,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBAC1B,MAAM,IAAI,eAAe,OAAO,WAAW,CAAC;YAC9C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;gBAC/B,MAAM,IAAI,aAAa,OAAO,WAAW,CAAC;YAC5C,CAAC;iBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBACpC,MAAM,IAAI,kBAAkB,OAAO,WAAW,CAAC;YACjD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,iBAAiB,CAAC;QAC5B,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,SAAiB,EAAE,cAAsB;QACjE,MAAM,SAAS,GAAG,EAAE,CAAC;QACrB,MAAM,UAAU,GAAG,CAAC,CAAC;QACrB,MAAM,OAAO,GAAG,GAAG,CAAC,CAAC,kBAAkB;QACvC,oEAAoE;QACpE,MAAM,OAAO,GAAwB,EAAE,CAAC;QAExC,wDAAwD;QACxD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,uDAAuD;YACvD,MAAM,UAAU,GAAG,IAAI,YAAY,CAAC,CAAC,CAAC,CAAC;YAEvC,0EAA0E;YAC1E,OAAO,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,IAAK,GAAG,CAAC,MAAc,CAC3D,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;YAEF,OAAO,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,IAAK,GAAG,CAAC,MAAc,CAC7D,SAAS,EACT,UAAU,EACV,CAAC,SAAS,EAAE,UAAU,EAAE,CAAC,EAAE,OAAO,CAAC,CACpC,CAAC;QACJ,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAI,CAAC,MAAkB;QAC3B,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAC;QAE/B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAEpD,IAAI,CAAC;YACH,0CAA0C;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;YAEnD,2CAA2C;YAC3C,IAAI,WAAW,GAAG,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAE/C,6BAA6B;YAC7B,MAAM,WAAW,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC;YAClC,MAAM,SAAS,GAAa,EAAE,CAAC;YAE/B,8CAA8C;YAC9C,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAEhF,iCAAiC;YACjC,MAAM,YAAY,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;YAE/D,KAAK,IAAI,IAAI,GAAG,CAAC,EAAE,IAAI,GAAG,YAAY,EAAE,IAAI,EAAE,EAAE,CAAC;gBAC/C,kFAAkF;gBAClF,MAAM,eAAe,GAAG,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;gBAClF,MAAM,aAAa,GAAG,eAAe,CAAC,MAAM,CAAC;gBAE7C,0EAA0E;gBAC1E,MAAM,WAAW,GAAG,IAAK,GAAG,CAAC,MAAc,CACzC,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,aAAa,CAAC,CACnB,CAAC;gBAEF,yCAAyC;gBACzC,MAAM,WAAW,GAAG,WAAW,CAAC,MAAM,CAAC;gBACvC,MAAM,aAAa,GAAG,IAAK,GAAG,CAAC,MAAc,CAC3C,OAAO,EACP,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,EAC/C,CAAC,CAAC,EAAE,WAAW,CAAC,CACjB,CAAC;gBAEF,uDAAuD;gBACvD,MAAM,KAAK,GAAwB;oBACjC,SAAS,EAAE,WAAW;oBACtB,cAAc,EAAE,aAAa;oBAC7B,GAAG,WAAW;iBACf,CAAC;gBAEF,gBAAgB;gBAChB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAQ,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBAE/C,4CAA4C;gBAC5C,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,IAAoB,CAAC;gBACnD,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAEtE,gCAAgC;gBAChC,MAAM,qBAAqB,GAAG,CAAC,aAAa,GAAG,CAAC,CAAC,GAAG,SAAS,CAAC;gBAE9D,uCAAuC;gBACvC,IAAI,SAAS,GAAG,CAAC,CAAC;gBAClB,IAAI,MAAM,GAAG,CAAC,QAAQ,CAAC;gBAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,qBAAqB,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,WAAW,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC;oBAClG,IAAI,KAAK,GAAG,MAAM,EAAE,CAAC;wBACnB,MAAM,GAAG,KAAK,CAAC;wBACf,SAAS,GAAG,CAAC,CAAC;oBAChB,CAAC;gBACH,CAAC;gBAED,gBAAgB;gBAChB,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAC1B,WAAW,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;gBAE5B,wDAAwD;gBACxD,IAAI,SAAS,KAAK,CAAC,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;oBACvC,OAAO,CAAC,GAAG,CAAC,2BAA2B,SAAS,EAAE,CAAC,CAAC;oBACpD,MAAM;gBACR,CAAC;gBAED,kDAAkD;gBAClD,WAAW,GAAG,EAAE,CAAC;gBACjB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,WAAW,CAAC,mBAAmB,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;oBACtE,WAAW,CAAC,mBAAmB,CAAC,QAAQ,CAAC,GAAG,OAAO,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC5E,CAAC;gBAED,qBAAqB;gBACrB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,CAAC;oBAC1B,OAAO,CAAC,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,YAAY,CAAC,CAAC;gBACpD,CAAC;YACH,CAAC;YAED,yCAAyC;YACzC,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC7C,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,eAAe,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAEzE,OAAO,CAAC,GAAG,CAAC,gBAAgB,aAAa,EAAE,CAAC,CAAC;YAC7C,OAAO,CAAC,GAAG,CAAC,gBAAgB,OAAO,OAAO,eAAe,cAAc,CAAC,CAAC;YAEzE,MAAM,OAAO,GAAmB,CAAC;oBAC/B,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,aAAa,CAAC,IAAI,EAAE;iBAC3B,CAAC,CAAC;YAEH,OAAO;gBACL,EAAE,EAAE,cAAc,IAAI,CAAC,GAAG,EAAE,EAAE;gBAC9B,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;gBAC5B,OAAO;gBACP,UAAU,EAAE,UAAU;gBACtB,KAAK,EAAE;oBACL,WAAW,EAAE,QAAQ,CAAC,MAAM;oBAC5B,YAAY,EAAE,SAAS,CAAC,MAAM;iBAC/B;gBACD,QAAQ,EAAE;oBACR,QAAQ,EAAE,YAAY;oBACtB,KAAK,EAAE,0BAA0B;oBACjC,OAAO;oBACP,IAAI,EAAE,CAAC,EAAE,0BAA0B;oBACnC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;oBAClD,eAAe,EAAE,UAAU,CAAC,eAAe,CAAC;iBAC7C;aACF,CAAC;QAEJ,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,aAAa,GAAkB;gBACnC,IAAI,EAAE,oBAAoB;gBAC1B,OAAO,EAAE,0BAA0B,KAAK,EAAE;gBAC1C,QAAQ,EAAE,YAAY;gBACtB,SAAS,EAAE,IAAI;aAChB,CAAC;YACF,MAAM,aAAa,CAAC;QACtB,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,CAAC,MAAM,CAAC,MAAkB;QAC9B,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED;;OAEG;IACH,oBAAoB,CAAC,QAAkB;QACrC,MAAM,SAAS,GAAG,CAAC,MAAM,CAAC,CAAC;QAC3B,OAAO,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,kBAAkB,EAAE,IAAI,CAAC,MAAM,CAAC,kBAAkB;YAClD,WAAW,EAAE,IAAI,CAAC,OAAO,KAAK,IAAI;YAClC,eAAe,EAAE,IAAI,CAAC,QAAQ,KAAK,IAAI;SACxC,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,gEAAgE;YAChE,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;QACD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YAClB,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrB,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC;QACvB,CAAC;IACH,CAAC;CACF","sourcesContent":["/**\n * ONNX Runtime Local Inference Provider for Phi-4\n *\n * Uses onnxruntime-node for true local CPU/GPU inference\n */\n\nimport * as ort from 'onnxruntime-node';\nimport * as fs from 'fs';\nimport * as path from 'path';\nimport { get_encoding } from 'tiktoken';\nimport { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';\nimport type {\n LLMProvider,\n ChatParams,\n ChatResponse,\n StreamChunk,\n ProviderError,\n Message,\n ContentBlock\n} from '../types.js';\n\nexport interface ONNXLocalConfig {\n modelPath?: string;\n executionProviders?: string[];\n maxTokens?: number;\n temperature?: number;\n}\n\nexport class ONNXLocalProvider implements LLMProvider {\n name = 'onnx-local';\n type = 'custom' as const;\n supportsStreaming = false; // Streaming requires complex token generation loop\n supportsTools = false;\n supportsMCP = false;\n\n private session: ort.InferenceSession | null = null;\n private config: Required<ONNXLocalConfig>;\n private tokenizer: any = null;\n private tiktoken: any = null;\n\n constructor(config: ONNXLocalConfig = {}) {\n this.config = {\n modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',\n executionProviders: config.executionProviders || ['cpu'],\n maxTokens: config.maxTokens || 100,\n temperature: config.temperature || 0.7\n };\n }\n\n /**\n * Load optimized tiktoken tokenizer (cl100k_base for Phi-4)\n */\n private async loadTokenizer(): Promise<void> {\n if (this.tiktoken) return;\n\n try {\n // Use cl100k_base encoding (GPT-4, similar to Phi-4)\n this.tiktoken = get_encoding('cl100k_base');\n\n console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');\n } catch (error) {\n console.error('❌ Failed to load tiktoken:', error);\n throw new Error(`Tokenizer loading failed: ${error}`);\n }\n }\n\n /**\n * Encode text using tiktoken (fast BPE)\n */\n private encode(text: string): number[] {\n return Array.from(this.tiktoken.encode(text));\n }\n\n /**\n * Decode tokens using tiktoken\n */\n private decode(ids: number[]): string {\n try {\n const decoded = this.tiktoken.decode(new Uint32Array(ids));\n // tiktoken returns buffer, convert to string\n if (typeof decoded === 'string') {\n return decoded;\n } else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {\n return new TextDecoder().decode(decoded);\n }\n return String(decoded);\n } catch (error) {\n console.warn('Decode error, returning raw IDs:', error);\n return ids.join(',');\n }\n }\n\n /**\n * Initialize ONNX session (with automatic model download)\n */\n private async initializeSession(): Promise<void> {\n if (this.session) return;\n\n try {\n // Ensure model is downloaded\n console.log(`🔍 Checking for Phi-4 ONNX model...`);\n\n const modelPath = await ensurePhi4Model((progress) => {\n if (progress.percentage % 10 < 1) { // Log every ~10%\n console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);\n }\n });\n\n // Update config with actual model path\n this.config.modelPath = modelPath;\n\n console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);\n\n this.session = await ort.InferenceSession.create(\n this.config.modelPath,\n {\n executionProviders: this.config.executionProviders as any,\n graphOptimizationLevel: 'all',\n enableCpuMemArena: true,\n enableMemPattern: true\n }\n );\n\n console.log(`✅ ONNX model loaded`);\n console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);\n\n // Load tokenizer\n await this.loadTokenizer();\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInitError',\n message: `Failed to initialize ONNX model: ${error}`,\n provider: 'onnx-local',\n retryable: false\n };\n throw providerError;\n }\n }\n\n /**\n * Format messages for Phi-4 chat template\n */\n private formatMessages(messages: Message[]): string {\n let prompt = '';\n\n for (const msg of messages) {\n const content = typeof msg.content === 'string'\n ? msg.content\n : msg.content.map(c => c.type === 'text' ? c.text : '').join('');\n\n if (msg.role === 'system') {\n prompt += `<|system|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'user') {\n prompt += `<|user|>\\n${content}<|end|>\\n`;\n } else if (msg.role === 'assistant') {\n prompt += `<|assistant|>\\n${content}<|end|>\\n`;\n }\n }\n\n prompt += '<|assistant|>\\n';\n return prompt;\n }\n\n /**\n * Initialize KV cache tensors for all 32 layers\n * Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim\n */\n private initializeKVCache(batchSize: number, sequenceLength: number) {\n const numLayers = 32;\n const numKVHeads = 8;\n const headDim = 128; // 3072 / 24 = 128\n // Use any type for KV cache since ort.Tensor is a value, not a type\n const kvCache: Record<string, any> = {};\n\n // Initialize empty cache for each layer (key and value)\n for (let i = 0; i < numLayers; i++) {\n // Empty cache: [batch_size, num_kv_heads, 0, head_dim]\n const emptyCache = new Float32Array(0);\n\n // Cast to any to avoid TS issues with onnxruntime-node Tensor constructor\n kvCache[`past_key_values.${i}.key`] = new (ort.Tensor as any)(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n\n kvCache[`past_key_values.${i}.value`] = new (ort.Tensor as any)(\n 'float32',\n emptyCache,\n [batchSize, numKVHeads, 0, headDim]\n );\n }\n\n return kvCache;\n }\n\n /**\n * Chat completion using ONNX with KV cache\n */\n async chat(params: ChatParams): Promise<ChatResponse> {\n await this.initializeSession();\n\n const startTime = Date.now();\n const prompt = this.formatMessages(params.messages);\n\n try {\n // Tokenize input using optimized tiktoken\n const inputIds = this.encode(prompt);\n console.log(`📝 Input tokens: ${inputIds.length}`);\n\n // Initialize KV cache (reusable for batch)\n let pastKVCache = this.initializeKVCache(1, 0);\n\n // Track all generated tokens\n const allTokenIds = [...inputIds];\n const outputIds: number[] = [];\n\n // Pre-allocate tensor buffers for performance\n const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);\n\n // Autoregressive generation loop\n const maxNewTokens = params.maxTokens || this.config.maxTokens;\n\n for (let step = 0; step < maxNewTokens; step++) {\n // For first step, use all input tokens; for subsequent steps, use only last token\n const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];\n const currentSeqLen = currentInputIds.length;\n\n // Create input tensor for current step (cast to any for TS compatibility)\n const inputTensor = new (ort.Tensor as any)(\n 'int64',\n BigInt64Array.from(currentInputIds.map(BigInt)),\n [1, currentSeqLen]\n );\n\n // Create attention mask for current step\n const totalSeqLen = allTokenIds.length;\n const attentionMask = new (ort.Tensor as any)(\n 'int64',\n BigInt64Array.from(Array(totalSeqLen).fill(1n)),\n [1, totalSeqLen]\n );\n\n // Build feeds with input, attention mask, and KV cache\n const feeds: Record<string, any> = {\n input_ids: inputTensor,\n attention_mask: attentionMask,\n ...pastKVCache\n };\n\n // Run inference\n const results = await this.session!.run(feeds);\n\n // Get logits for next token (last position)\n const logits = results.logits.data as Float32Array;\n const vocabSize = results.logits.dims[results.logits.dims.length - 1];\n\n // Extract logits for last token\n const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;\n\n // Apply temperature and get next token\n let nextToken = 0;\n let maxVal = -Infinity;\n\n for (let i = 0; i < vocabSize; i++) {\n const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);\n if (logit > maxVal) {\n maxVal = logit;\n nextToken = i;\n }\n }\n\n // Add to output\n outputIds.push(nextToken);\n allTokenIds.push(nextToken);\n\n // Check for end token (2 is typical EOS for Phi models)\n if (nextToken === 2 || nextToken === 0) {\n console.log(`🛑 Stop token detected: ${nextToken}`);\n break;\n }\n\n // Update KV cache from outputs for next iteration\n pastKVCache = {};\n for (let i = 0; i < 32; i++) {\n pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];\n pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];\n }\n\n // Progress indicator\n if ((step + 1) % 10 === 0) {\n console.log(`🔄 Generated ${step + 1} tokens...`);\n }\n }\n\n // Decode output using optimized tiktoken\n const generatedText = this.decode(outputIds);\n const latency = Date.now() - startTime;\n const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);\n\n console.log(`✅ Generated: ${generatedText}`);\n console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);\n\n const content: ContentBlock[] = [{\n type: 'text',\n text: generatedText.trim()\n }];\n\n return {\n id: `onnx-local-${Date.now()}`,\n model: this.config.modelPath,\n content,\n stopReason: 'end_turn',\n usage: {\n inputTokens: inputIds.length,\n outputTokens: outputIds.length\n },\n metadata: {\n provider: 'onnx-local',\n model: 'Phi-4-mini-instruct-onnx',\n latency,\n cost: 0, // Local inference is free\n executionProviders: this.config.executionProviders,\n tokensPerSecond: parseFloat(tokensPerSecond)\n }\n };\n\n } catch (error) {\n const providerError: ProviderError = {\n name: 'ONNXInferenceError',\n message: `ONNX inference failed: ${error}`,\n provider: 'onnx-local',\n retryable: true\n };\n throw providerError;\n }\n }\n\n /**\n * Streaming not implemented (requires complex generation loop)\n */\n async *stream(params: ChatParams): AsyncGenerator<StreamChunk> {\n throw new Error('Streaming not yet implemented for ONNX local inference');\n }\n\n /**\n * Validate capabilities\n */\n validateCapabilities(features: string[]): boolean {\n const supported = ['chat'];\n return features.every(f => supported.includes(f));\n }\n\n /**\n * Get model info\n */\n getModelInfo() {\n return {\n modelPath: this.config.modelPath,\n executionProviders: this.config.executionProviders,\n initialized: this.session !== null,\n tokenizerLoaded: this.tiktoken !== null\n };\n }\n\n /**\n * Cleanup resources\n */\n async dispose(): Promise<void> {\n if (this.session) {\n // ONNX Runtime sessions don't have explicit disposal in Node.js\n this.session = null;\n }\n if (this.tiktoken) {\n this.tiktoken.free();\n this.tiktoken = null;\n }\n }\n}\n"]}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit Breaker Router - Fault-Tolerant Agent Routing
|
|
3
|
+
*
|
|
4
|
+
* Integrates @ruvector/tiny-dancer circuit breaker pattern for 99.9% uptime.
|
|
5
|
+
*
|
|
6
|
+
* Features:
|
|
7
|
+
* - Circuit breaker states: CLOSED, OPEN, HALF_OPEN
|
|
8
|
+
* - Automatic failure detection and recovery
|
|
9
|
+
* - Fallback chains for degraded service
|
|
10
|
+
* - Hot-reload capability for configuration updates
|
|
11
|
+
* - Uncertainty estimation for routing decisions
|
|
12
|
+
* - Rate limiting for request protection
|
|
13
|
+
*
|
|
14
|
+
* Performance:
|
|
15
|
+
* - <5ms routing overhead
|
|
16
|
+
* - 99.9% uptime guarantee
|
|
17
|
+
* - Automatic failover in <100ms
|
|
18
|
+
*/
|
|
19
|
+
export interface CircuitBreakerConfig {
|
|
20
|
+
/** Failure threshold before opening circuit (default: 5) */
|
|
21
|
+
failureThreshold?: number;
|
|
22
|
+
/** Success threshold to close circuit from half-open (default: 3) */
|
|
23
|
+
successThreshold?: number;
|
|
24
|
+
/** Timeout before attempting recovery (ms, default: 30000) */
|
|
25
|
+
resetTimeout?: number;
|
|
26
|
+
/** Request timeout (ms, default: 5000) */
|
|
27
|
+
requestTimeout?: number;
|
|
28
|
+
/** Enable uncertainty estimation (default: true) */
|
|
29
|
+
enableUncertaintyEstimation?: boolean;
|
|
30
|
+
}
|
|
31
|
+
export declare enum CircuitState {
|
|
32
|
+
CLOSED = "CLOSED",// Normal operation
|
|
33
|
+
OPEN = "OPEN",// Failures detected, routing blocked
|
|
34
|
+
HALF_OPEN = "HALF_OPEN"
|
|
35
|
+
}
|
|
36
|
+
export interface RouteRequest {
|
|
37
|
+
taskDescription: string;
|
|
38
|
+
preferredAgent?: string;
|
|
39
|
+
fallbackAgents?: string[];
|
|
40
|
+
timeout?: number;
|
|
41
|
+
}
|
|
42
|
+
export interface RouteResult {
|
|
43
|
+
selectedAgent: string;
|
|
44
|
+
confidence: number;
|
|
45
|
+
circuitState: CircuitState;
|
|
46
|
+
fallbackUsed: boolean;
|
|
47
|
+
uncertainty?: number;
|
|
48
|
+
metrics: {
|
|
49
|
+
routingTimeMs: number;
|
|
50
|
+
failureCount: number;
|
|
51
|
+
successCount: number;
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
export interface AgentHealth {
|
|
55
|
+
agent: string;
|
|
56
|
+
state: CircuitState;
|
|
57
|
+
failureCount: number;
|
|
58
|
+
successCount: number;
|
|
59
|
+
lastFailureTime?: number;
|
|
60
|
+
lastSuccessTime?: number;
|
|
61
|
+
availability: number;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Circuit Breaker Router
|
|
65
|
+
*
|
|
66
|
+
* Provides fault-tolerant routing with:
|
|
67
|
+
* 1. Circuit breaker pattern for each agent
|
|
68
|
+
* 2. Automatic fallback chains
|
|
69
|
+
* 3. Health monitoring and recovery
|
|
70
|
+
* 4. Uncertainty-aware decision making
|
|
71
|
+
*/
|
|
72
|
+
export declare class CircuitBreakerRouter {
|
|
73
|
+
private config;
|
|
74
|
+
private circuitStates;
|
|
75
|
+
private failureCounts;
|
|
76
|
+
private successCounts;
|
|
77
|
+
private lastFailureTimes;
|
|
78
|
+
private lastSuccessTimes;
|
|
79
|
+
private resetTimers;
|
|
80
|
+
private rateLimiter;
|
|
81
|
+
private routingMetrics;
|
|
82
|
+
constructor(config?: CircuitBreakerConfig);
|
|
83
|
+
/**
|
|
84
|
+
* Route request to agent with circuit breaker protection
|
|
85
|
+
*
|
|
86
|
+
* Process:
|
|
87
|
+
* 1. Check circuit state for preferred agent
|
|
88
|
+
* 2. If circuit OPEN, try fallback chain
|
|
89
|
+
* 3. If circuit HALF_OPEN, allow test request
|
|
90
|
+
* 4. Estimate uncertainty if enabled
|
|
91
|
+
*
|
|
92
|
+
* @param request - Route request with task and agent preferences
|
|
93
|
+
* @returns Route result with selected agent and metrics
|
|
94
|
+
*/
|
|
95
|
+
route(request: RouteRequest): Promise<RouteResult>;
|
|
96
|
+
/**
|
|
97
|
+
* Record successful agent execution
|
|
98
|
+
*
|
|
99
|
+
* Updates circuit breaker state:
|
|
100
|
+
* - Increment success count
|
|
101
|
+
* - Reset failure count if threshold reached
|
|
102
|
+
* - Transition HALF_OPEN -> CLOSED if successful
|
|
103
|
+
*
|
|
104
|
+
* @param agent - Agent that succeeded
|
|
105
|
+
*/
|
|
106
|
+
recordSuccess(agent: string): void;
|
|
107
|
+
/**
|
|
108
|
+
* Record failed agent execution
|
|
109
|
+
*
|
|
110
|
+
* Updates circuit breaker state:
|
|
111
|
+
* - Increment failure count
|
|
112
|
+
* - Transition CLOSED -> OPEN if threshold exceeded
|
|
113
|
+
* - Transition HALF_OPEN -> OPEN on failure
|
|
114
|
+
* - Schedule automatic reset
|
|
115
|
+
*
|
|
116
|
+
* @param agent - Agent that failed
|
|
117
|
+
*/
|
|
118
|
+
recordFailure(agent: string): void;
|
|
119
|
+
/**
|
|
120
|
+
* Get circuit state for agent
|
|
121
|
+
*
|
|
122
|
+
* @param agent - Agent name
|
|
123
|
+
* @returns Current circuit state
|
|
124
|
+
*/
|
|
125
|
+
getCircuitState(agent: string): CircuitState;
|
|
126
|
+
/**
|
|
127
|
+
* Get health status for all agents
|
|
128
|
+
*
|
|
129
|
+
* @returns Array of agent health metrics
|
|
130
|
+
*/
|
|
131
|
+
getAgentHealth(): AgentHealth[];
|
|
132
|
+
/**
|
|
133
|
+
* Get routing metrics
|
|
134
|
+
*
|
|
135
|
+
* @returns Cumulative routing statistics
|
|
136
|
+
*/
|
|
137
|
+
getMetrics(): typeof this.routingMetrics;
|
|
138
|
+
/**
|
|
139
|
+
* Manually reset circuit for agent
|
|
140
|
+
*
|
|
141
|
+
* @param agent - Agent to reset
|
|
142
|
+
*/
|
|
143
|
+
resetCircuit(agent: string): void;
|
|
144
|
+
/**
|
|
145
|
+
* Hot-reload configuration
|
|
146
|
+
*
|
|
147
|
+
* Allows updating circuit breaker parameters without restart.
|
|
148
|
+
*
|
|
149
|
+
* @param config - New configuration
|
|
150
|
+
*/
|
|
151
|
+
updateConfig(config: Partial<CircuitBreakerConfig>): void;
|
|
152
|
+
/**
|
|
153
|
+
* Open circuit and schedule automatic reset
|
|
154
|
+
*/
|
|
155
|
+
private openCircuit;
|
|
156
|
+
/**
|
|
157
|
+
* Calculate confidence score
|
|
158
|
+
*/
|
|
159
|
+
private calculateConfidence;
|
|
160
|
+
/**
|
|
161
|
+
* Estimate uncertainty for routing decision
|
|
162
|
+
*
|
|
163
|
+
* Uses failure history and circuit state to estimate decision uncertainty.
|
|
164
|
+
* Higher uncertainty indicates less reliable routing.
|
|
165
|
+
*
|
|
166
|
+
* @param agent - Agent to estimate uncertainty for
|
|
167
|
+
* @returns Uncertainty score (0-1, lower is better)
|
|
168
|
+
*/
|
|
169
|
+
private estimateUncertainty;
|
|
170
|
+
/**
|
|
171
|
+
* Calculate recency factor based on time since last failure
|
|
172
|
+
*/
|
|
173
|
+
private getRecencyFactor;
|
|
174
|
+
/**
|
|
175
|
+
* Update average routing time (exponential moving average)
|
|
176
|
+
*/
|
|
177
|
+
private updateAvgRoutingTime;
|
|
178
|
+
/**
|
|
179
|
+
* Simple string hash function for rate limiting
|
|
180
|
+
*/
|
|
181
|
+
private hashString;
|
|
182
|
+
/**
|
|
183
|
+
* Cleanup resources on shutdown
|
|
184
|
+
*/
|
|
185
|
+
destroy(): void;
|
|
186
|
+
}
|
|
187
|
+
//# sourceMappingURL=CircuitBreakerRouter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"CircuitBreakerRouter.d.ts","sourceRoot":"","sources":["../../src/routing/CircuitBreakerRouter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;GAiBG;AAMH,MAAM,WAAW,oBAAoB;IACnC,4DAA4D;IAC5D,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,qEAAqE;IACrE,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,8DAA8D;IAC9D,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,0CAA0C;IAC1C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,oDAAoD;IACpD,2BAA2B,CAAC,EAAE,OAAO,CAAC;CACvC;AAED,oBAAY,YAAY;IACtB,MAAM,WAAW,CAAM,mBAAmB;IAC1C,IAAI,SAAS,CAAU,qCAAqC;IAC5D,SAAS,cAAc;CACxB;AAED,MAAM,WAAW,YAAY;IAC3B,eAAe,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAC1B,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED,MAAM,WAAW,WAAW;IAC1B,aAAa,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,YAAY,EAAE,YAAY,CAAC;IAC3B,YAAY,EAAE,OAAO,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE;QACP,aAAa,EAAE,MAAM,CAAC;QACtB,YAAY,EAAE,MAAM,CAAC;QACrB,YAAY,EAAE,MAAM,CAAC;KACtB,CAAC;CACH;AAED,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,YAAY,CAAC;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED;;;;;;;;GAQG;AACH,qBAAa,oBAAoB;IAC/B,OAAO,CAAC,MAAM,CAAiC;IAG/C,OAAO,CAAC,aAAa,CAA4B;IACjD,OAAO,CAAC,aAAa,CAAsB;IAC3C,OAAO,CAAC,aAAa,CAAsB;IAC3C,OAAO,CAAC,gBAAgB,CAAsB;IAC9C,OAAO,CAAC,gBAAgB,CAAsB;IAC9C,OAAO,CAAC,WAAW,CAA8B;IAGjD,OAAO,CAAC,WAAW,CAAc;IAGjC,OAAO,CAAC,cAAc,CAMpB;gBAEU,MAAM,CAAC,EAAE,oBAAoB;IAmEzC;;;;;;;;;;;OAWG;IACG,KAAK,CAAC,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,WAAW,CAAC;IAiHxD;;;;;;;;;OASG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAmBlC;;;;;;;;;;OAUG;IACH,aAAa,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAoBlC;;;;;OAKG;IACH,eAAe,CAAC,KAAK,EAAE,MAAM,GAAG,YAAY;IAI5C;;;;OAIG;IACH,cAAc,IAAI,WAAW,EAAE;IA0B/B;;;;OAIG;IACH,UAAU,IAAI,OAAO,IAAI,CAAC,cAAc;IAIxC;;;;OAIG;IACH,YAAY,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI;IAajC;;;;;;OAMG;IACH,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,oBAAoB,CAAC,GAAG,IAAI;IAQzD;;OAEG;IACH,OAAO,CAAC,WAAW;IAmBnB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IA4B3B;;;;;;;;OAQG;IACH,OAAO,CAAC,mBAAmB;IAqB3B;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAaxB;;OAEG;IACH,OAAO,CAAC,oBAAoB;IAM5B;;OAEG;IACH,OAAO,CAAC,UAAU;IAUlB;;OAEG;IACH,OAAO,IAAI,IAAI;CAUhB"}
|