agentic-flow 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/MIGRATION_SUMMARY.md +222 -0
- package/.claude/agents/README.md +89 -0
- package/.claude/agents/analysis/code-analyzer.md +209 -0
- package/.claude/agents/analysis/code-review/analyze-code-quality.md +180 -0
- package/.claude/agents/architecture/system-design/arch-system-design.md +156 -0
- package/.claude/agents/base-template-generator.md +42 -0
- package/.claude/agents/consensus/README.md +253 -0
- package/.claude/agents/consensus/byzantine-coordinator.md +63 -0
- package/.claude/agents/consensus/crdt-synchronizer.md +997 -0
- package/.claude/agents/consensus/gossip-coordinator.md +63 -0
- package/.claude/agents/consensus/performance-benchmarker.md +851 -0
- package/.claude/agents/consensus/quorum-manager.md +823 -0
- package/.claude/agents/consensus/raft-manager.md +63 -0
- package/.claude/agents/consensus/security-manager.md +622 -0
- package/.claude/agents/core/coder.md +211 -0
- package/.claude/agents/core/planner.md +116 -0
- package/.claude/agents/core/researcher.md +136 -0
- package/.claude/agents/core/reviewer.md +272 -0
- package/.claude/agents/core/tester.md +266 -0
- package/.claude/agents/data/ml/data-ml-model.md +193 -0
- package/.claude/agents/development/backend/dev-backend-api.md +142 -0
- package/.claude/agents/devops/ci-cd/ops-cicd-github.md +164 -0
- package/.claude/agents/documentation/api-docs/docs-api-openapi.md +174 -0
- package/.claude/agents/flow-nexus/app-store.md +88 -0
- package/.claude/agents/flow-nexus/authentication.md +69 -0
- package/.claude/agents/flow-nexus/challenges.md +81 -0
- package/.claude/agents/flow-nexus/neural-network.md +88 -0
- package/.claude/agents/flow-nexus/payments.md +83 -0
- package/.claude/agents/flow-nexus/sandbox.md +76 -0
- package/.claude/agents/flow-nexus/swarm.md +76 -0
- package/.claude/agents/flow-nexus/user-tools.md +96 -0
- package/.claude/agents/flow-nexus/workflow.md +84 -0
- package/.claude/agents/github/code-review-swarm.md +538 -0
- package/.claude/agents/github/github-modes.md +173 -0
- package/.claude/agents/github/issue-tracker.md +319 -0
- package/.claude/agents/github/multi-repo-swarm.md +553 -0
- package/.claude/agents/github/pr-manager.md +191 -0
- package/.claude/agents/github/project-board-sync.md +509 -0
- package/.claude/agents/github/release-manager.md +367 -0
- package/.claude/agents/github/release-swarm.md +583 -0
- package/.claude/agents/github/repo-architect.md +398 -0
- package/.claude/agents/github/swarm-issue.md +573 -0
- package/.claude/agents/github/swarm-pr.md +428 -0
- package/.claude/agents/github/sync-coordinator.md +452 -0
- package/.claude/agents/github/workflow-automation.md +635 -0
- package/.claude/agents/goal/agent.md +816 -0
- package/.claude/agents/goal/goal-planner.md +73 -0
- package/.claude/agents/optimization/README.md +250 -0
- package/.claude/agents/optimization/benchmark-suite.md +665 -0
- package/.claude/agents/optimization/load-balancer.md +431 -0
- package/.claude/agents/optimization/performance-monitor.md +672 -0
- package/.claude/agents/optimization/resource-allocator.md +674 -0
- package/.claude/agents/optimization/topology-optimizer.md +808 -0
- package/.claude/agents/payments/agentic-payments.md +126 -0
- package/.claude/agents/sparc/architecture.md +472 -0
- package/.claude/agents/sparc/pseudocode.md +318 -0
- package/.claude/agents/sparc/refinement.md +525 -0
- package/.claude/agents/sparc/specification.md +276 -0
- package/.claude/agents/specialized/mobile/spec-mobile-react-native.md +226 -0
- package/.claude/agents/sublinear/consensus-coordinator.md +338 -0
- package/.claude/agents/sublinear/matrix-optimizer.md +185 -0
- package/.claude/agents/sublinear/pagerank-analyzer.md +299 -0
- package/.claude/agents/sublinear/performance-optimizer.md +368 -0
- package/.claude/agents/sublinear/trading-predictor.md +246 -0
- package/.claude/agents/swarm/README.md +190 -0
- package/.claude/agents/swarm/adaptive-coordinator.md +396 -0
- package/.claude/agents/swarm/hierarchical-coordinator.md +256 -0
- package/.claude/agents/swarm/mesh-coordinator.md +392 -0
- package/.claude/agents/templates/automation-smart-agent.md +205 -0
- package/.claude/agents/templates/coordinator-swarm-init.md +90 -0
- package/.claude/agents/templates/github-pr-manager.md +177 -0
- package/.claude/agents/templates/implementer-sparc-coder.md +259 -0
- package/.claude/agents/templates/memory-coordinator.md +187 -0
- package/.claude/agents/templates/migration-plan.md +746 -0
- package/.claude/agents/templates/orchestrator-task.md +139 -0
- package/.claude/agents/templates/performance-analyzer.md +199 -0
- package/.claude/agents/templates/sparc-coordinator.md +183 -0
- package/.claude/agents/test-neural.md +14 -0
- package/.claude/agents/testing/unit/tdd-london-swarm.md +244 -0
- package/.claude/agents/testing/validation/production-validator.md +395 -0
- package/.claude/commands/agents/README.md +10 -0
- package/.claude/commands/agents/agent-capabilities.md +21 -0
- package/.claude/commands/agents/agent-coordination.md +28 -0
- package/.claude/commands/agents/agent-spawning.md +28 -0
- package/.claude/commands/agents/agent-types.md +26 -0
- package/.claude/commands/analysis/COMMAND_COMPLIANCE_REPORT.md +54 -0
- package/.claude/commands/analysis/README.md +9 -0
- package/.claude/commands/analysis/bottleneck-detect.md +162 -0
- package/.claude/commands/analysis/performance-bottlenecks.md +59 -0
- package/.claude/commands/analysis/performance-report.md +25 -0
- package/.claude/commands/analysis/token-efficiency.md +45 -0
- package/.claude/commands/analysis/token-usage.md +25 -0
- package/.claude/commands/automation/README.md +9 -0
- package/.claude/commands/automation/auto-agent.md +122 -0
- package/.claude/commands/automation/self-healing.md +106 -0
- package/.claude/commands/automation/session-memory.md +90 -0
- package/.claude/commands/automation/smart-agents.md +73 -0
- package/.claude/commands/automation/smart-spawn.md +25 -0
- package/.claude/commands/automation/workflow-select.md +25 -0
- package/.claude/commands/claude-flow-help.md +103 -0
- package/.claude/commands/claude-flow-memory.md +107 -0
- package/.claude/commands/claude-flow-swarm.md +205 -0
- package/.claude/commands/coordination/README.md +9 -0
- package/.claude/commands/coordination/agent-spawn.md +25 -0
- package/.claude/commands/coordination/init.md +44 -0
- package/.claude/commands/coordination/orchestrate.md +43 -0
- package/.claude/commands/coordination/spawn.md +45 -0
- package/.claude/commands/coordination/swarm-init.md +85 -0
- package/.claude/commands/coordination/task-orchestrate.md +25 -0
- package/.claude/commands/flow-nexus/app-store.md +124 -0
- package/.claude/commands/flow-nexus/challenges.md +120 -0
- package/.claude/commands/flow-nexus/login-registration.md +65 -0
- package/.claude/commands/flow-nexus/neural-network.md +134 -0
- package/.claude/commands/flow-nexus/payments.md +116 -0
- package/.claude/commands/flow-nexus/sandbox.md +83 -0
- package/.claude/commands/flow-nexus/swarm.md +87 -0
- package/.claude/commands/flow-nexus/user-tools.md +152 -0
- package/.claude/commands/flow-nexus/workflow.md +115 -0
- package/.claude/commands/github/README.md +11 -0
- package/.claude/commands/github/code-review-swarm.md +514 -0
- package/.claude/commands/github/code-review.md +25 -0
- package/.claude/commands/github/github-modes.md +147 -0
- package/.claude/commands/github/github-swarm.md +121 -0
- package/.claude/commands/github/issue-tracker.md +292 -0
- package/.claude/commands/github/issue-triage.md +25 -0
- package/.claude/commands/github/multi-repo-swarm.md +519 -0
- package/.claude/commands/github/pr-enhance.md +26 -0
- package/.claude/commands/github/pr-manager.md +170 -0
- package/.claude/commands/github/project-board-sync.md +471 -0
- package/.claude/commands/github/release-manager.md +338 -0
- package/.claude/commands/github/release-swarm.md +544 -0
- package/.claude/commands/github/repo-analyze.md +25 -0
- package/.claude/commands/github/repo-architect.md +367 -0
- package/.claude/commands/github/swarm-issue.md +482 -0
- package/.claude/commands/github/swarm-pr.md +285 -0
- package/.claude/commands/github/sync-coordinator.md +301 -0
- package/.claude/commands/github/workflow-automation.md +442 -0
- package/.claude/commands/hive-mind/README.md +17 -0
- package/.claude/commands/hive-mind/hive-mind-consensus.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-init.md +18 -0
- package/.claude/commands/hive-mind/hive-mind-memory.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-metrics.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-resume.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-sessions.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-spawn.md +21 -0
- package/.claude/commands/hive-mind/hive-mind-status.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-stop.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-wizard.md +8 -0
- package/.claude/commands/hive-mind/hive-mind.md +27 -0
- package/.claude/commands/hooks/README.md +11 -0
- package/.claude/commands/hooks/overview.md +58 -0
- package/.claude/commands/hooks/post-edit.md +117 -0
- package/.claude/commands/hooks/post-task.md +112 -0
- package/.claude/commands/hooks/pre-edit.md +113 -0
- package/.claude/commands/hooks/pre-task.md +111 -0
- package/.claude/commands/hooks/session-end.md +118 -0
- package/.claude/commands/hooks/setup.md +103 -0
- package/.claude/commands/memory/README.md +9 -0
- package/.claude/commands/memory/memory-persist.md +25 -0
- package/.claude/commands/memory/memory-search.md +25 -0
- package/.claude/commands/memory/memory-usage.md +25 -0
- package/.claude/commands/memory/neural.md +47 -0
- package/.claude/commands/memory/usage.md +46 -0
- package/.claude/commands/monitoring/README.md +9 -0
- package/.claude/commands/monitoring/agent-metrics.md +25 -0
- package/.claude/commands/monitoring/agents.md +44 -0
- package/.claude/commands/monitoring/real-time-view.md +25 -0
- package/.claude/commands/monitoring/status.md +46 -0
- package/.claude/commands/monitoring/swarm-monitor.md +25 -0
- package/.claude/commands/optimization/README.md +9 -0
- package/.claude/commands/optimization/auto-topology.md +62 -0
- package/.claude/commands/optimization/cache-manage.md +25 -0
- package/.claude/commands/optimization/parallel-execute.md +25 -0
- package/.claude/commands/optimization/parallel-execution.md +50 -0
- package/.claude/commands/optimization/topology-optimize.md +25 -0
- package/.claude/commands/pair/README.md +261 -0
- package/.claude/commands/pair/commands.md +546 -0
- package/.claude/commands/pair/config.md +510 -0
- package/.claude/commands/pair/examples.md +512 -0
- package/.claude/commands/pair/modes.md +348 -0
- package/.claude/commands/pair/session.md +407 -0
- package/.claude/commands/pair/start.md +209 -0
- package/.claude/commands/sparc/analyzer.md +52 -0
- package/.claude/commands/sparc/architect.md +53 -0
- package/.claude/commands/sparc/ask.md +97 -0
- package/.claude/commands/sparc/batch-executor.md +54 -0
- package/.claude/commands/sparc/code.md +89 -0
- package/.claude/commands/sparc/coder.md +54 -0
- package/.claude/commands/sparc/debug.md +83 -0
- package/.claude/commands/sparc/debugger.md +54 -0
- package/.claude/commands/sparc/designer.md +53 -0
- package/.claude/commands/sparc/devops.md +109 -0
- package/.claude/commands/sparc/docs-writer.md +80 -0
- package/.claude/commands/sparc/documenter.md +54 -0
- package/.claude/commands/sparc/innovator.md +54 -0
- package/.claude/commands/sparc/integration.md +83 -0
- package/.claude/commands/sparc/mcp.md +117 -0
- package/.claude/commands/sparc/memory-manager.md +54 -0
- package/.claude/commands/sparc/optimizer.md +54 -0
- package/.claude/commands/sparc/orchestrator.md +132 -0
- package/.claude/commands/sparc/post-deployment-monitoring-mode.md +83 -0
- package/.claude/commands/sparc/refinement-optimization-mode.md +83 -0
- package/.claude/commands/sparc/researcher.md +54 -0
- package/.claude/commands/sparc/reviewer.md +54 -0
- package/.claude/commands/sparc/security-review.md +80 -0
- package/.claude/commands/sparc/sparc-modes.md +174 -0
- package/.claude/commands/sparc/sparc.md +111 -0
- package/.claude/commands/sparc/spec-pseudocode.md +80 -0
- package/.claude/commands/sparc/supabase-admin.md +348 -0
- package/.claude/commands/sparc/swarm-coordinator.md +54 -0
- package/.claude/commands/sparc/tdd.md +54 -0
- package/.claude/commands/sparc/tester.md +54 -0
- package/.claude/commands/sparc/tutorial.md +79 -0
- package/.claude/commands/sparc/workflow-manager.md +54 -0
- package/.claude/commands/sparc.md +166 -0
- package/.claude/commands/stream-chain/pipeline.md +121 -0
- package/.claude/commands/stream-chain/run.md +70 -0
- package/.claude/commands/swarm/README.md +15 -0
- package/.claude/commands/swarm/analysis.md +95 -0
- package/.claude/commands/swarm/development.md +96 -0
- package/.claude/commands/swarm/examples.md +168 -0
- package/.claude/commands/swarm/maintenance.md +102 -0
- package/.claude/commands/swarm/optimization.md +117 -0
- package/.claude/commands/swarm/research.md +136 -0
- package/.claude/commands/swarm/swarm-analysis.md +8 -0
- package/.claude/commands/swarm/swarm-background.md +8 -0
- package/.claude/commands/swarm/swarm-init.md +19 -0
- package/.claude/commands/swarm/swarm-modes.md +8 -0
- package/.claude/commands/swarm/swarm-monitor.md +8 -0
- package/.claude/commands/swarm/swarm-spawn.md +19 -0
- package/.claude/commands/swarm/swarm-status.md +8 -0
- package/.claude/commands/swarm/swarm-strategies.md +8 -0
- package/.claude/commands/swarm/swarm.md +27 -0
- package/.claude/commands/swarm/testing.md +131 -0
- package/.claude/commands/training/README.md +9 -0
- package/.claude/commands/training/model-update.md +25 -0
- package/.claude/commands/training/neural-patterns.md +74 -0
- package/.claude/commands/training/neural-train.md +25 -0
- package/.claude/commands/training/pattern-learn.md +25 -0
- package/.claude/commands/training/specialization.md +63 -0
- package/.claude/commands/truth/start.md +143 -0
- package/.claude/commands/verify/check.md +50 -0
- package/.claude/commands/verify/start.md +128 -0
- package/.claude/commands/workflows/README.md +9 -0
- package/.claude/commands/workflows/development.md +78 -0
- package/.claude/commands/workflows/research.md +63 -0
- package/.claude/commands/workflows/workflow-create.md +25 -0
- package/.claude/commands/workflows/workflow-execute.md +25 -0
- package/.claude/commands/workflows/workflow-export.md +25 -0
- package/.claude/helpers/checkpoint-manager.sh +251 -0
- package/.claude/helpers/github-safe.js +106 -0
- package/.claude/helpers/github-setup.sh +28 -0
- package/.claude/helpers/quick-start.sh +19 -0
- package/.claude/helpers/setup-mcp.sh +18 -0
- package/.claude/helpers/standard-checkpoint-hooks.sh +179 -0
- package/.claude/mcp.json +13 -0
- package/.claude/settings-backup.json +130 -0
- package/.claude/settings-optimized.json +116 -0
- package/.claude/settings-simple.json +78 -0
- package/.claude/settings.json +114 -0
- package/.claude/settings.local.json +14 -0
- package/README.md +1280 -0
- package/dist/agents/claudeAgent.js +73 -0
- package/dist/agents/claudeFlowAgent.js +115 -0
- package/dist/agents/codeReviewAgent.js +34 -0
- package/dist/agents/dataAgent.js +34 -0
- package/dist/agents/directApiAgent.js +260 -0
- package/dist/agents/webResearchAgent.js +35 -0
- package/dist/cli/mcp.js +135 -0
- package/dist/cli-proxy.js +246 -0
- package/dist/cli.js +158 -0
- package/dist/config/claudeFlow.js +67 -0
- package/dist/config/tools.js +33 -0
- package/dist/coordination/parallelSwarm.js +226 -0
- package/dist/examples/multi-agent-orchestration.js +45 -0
- package/dist/examples/parallel-swarm-deployment.js +171 -0
- package/dist/examples/use-goal-planner.js +52 -0
- package/dist/health.js +46 -0
- package/dist/index-with-proxy.js +101 -0
- package/dist/index.js +167 -0
- package/dist/mcp/claudeFlowSdkServer.js +202 -0
- package/dist/mcp/fastmcp/servers/claude-flow-sdk.js +198 -0
- package/dist/mcp/fastmcp/servers/http-streaming-updated.js +421 -0
- package/dist/mcp/fastmcp/servers/poc-stdio.js +82 -0
- package/dist/mcp/fastmcp/servers/stdio-full.js +421 -0
- package/dist/mcp/fastmcp/tools/agent/add-agent.js +107 -0
- package/dist/mcp/fastmcp/tools/agent/add-command.js +117 -0
- package/dist/mcp/fastmcp/tools/agent/execute.js +56 -0
- package/dist/mcp/fastmcp/tools/agent/list.js +82 -0
- package/dist/mcp/fastmcp/tools/agent/parallel.js +63 -0
- package/dist/mcp/fastmcp/tools/memory/retrieve.js +38 -0
- package/dist/mcp/fastmcp/tools/memory/search.js +41 -0
- package/dist/mcp/fastmcp/tools/memory/store.js +56 -0
- package/dist/mcp/fastmcp/tools/swarm/init.js +41 -0
- package/dist/mcp/fastmcp/tools/swarm/orchestrate.js +47 -0
- package/dist/mcp/fastmcp/tools/swarm/spawn.js +40 -0
- package/dist/mcp/fastmcp/types/index.js +2 -0
- package/dist/proxy/anthropic-to-openrouter.js +246 -0
- package/dist/router/providers/anthropic.js +89 -0
- package/dist/router/providers/onnx-local-optimized.js +167 -0
- package/dist/router/providers/onnx-local.js +294 -0
- package/dist/router/providers/onnx-phi4.js +190 -0
- package/dist/router/providers/onnx.js +242 -0
- package/dist/router/providers/openrouter.js +242 -0
- package/dist/router/router.js +283 -0
- package/dist/router/test-integration.js +140 -0
- package/dist/router/test-onnx-benchmark.js +145 -0
- package/dist/router/test-onnx-integration.js +128 -0
- package/dist/router/test-onnx-local.js +37 -0
- package/dist/router/test-onnx.js +148 -0
- package/dist/router/test-openrouter.js +121 -0
- package/dist/router/test-phi4.js +137 -0
- package/dist/router/types.js +2 -0
- package/dist/utils/agentLoader.js +106 -0
- package/dist/utils/cli.js +128 -0
- package/dist/utils/logger.js +41 -0
- package/dist/utils/mcpCommands.js +214 -0
- package/dist/utils/model-downloader.js +182 -0
- package/dist/utils/retry.js +54 -0
- package/docs/.claude-flow/metrics/agent-metrics.json +1 -0
- package/docs/.claude-flow/metrics/performance.json +9 -0
- package/docs/.claude-flow/metrics/task-metrics.json +10 -0
- package/docs/CHANGELOG.md +155 -0
- package/docs/CLAUDE.md +352 -0
- package/docs/COMPLETE_VALIDATION_SUMMARY.md +405 -0
- package/docs/INDEX.md +183 -0
- package/docs/LICENSE +21 -0
- package/docs/ONNX_CLI_USAGE.md +344 -0
- package/docs/ONNX_ENV_VARS.md +564 -0
- package/docs/ONNX_INTEGRATION.md +422 -0
- package/docs/ONNX_OPTIMIZATION_GUIDE.md +665 -0
- package/docs/ONNX_OPTIMIZATION_SUMMARY.md +374 -0
- package/docs/ONNX_VS_CLAUDE_QUALITY.md +442 -0
- package/docs/OPENROUTER_DEPLOYMENT.md +495 -0
- package/docs/architecture/EXECUTIVE_SUMMARY.md +310 -0
- package/docs/architecture/IMPROVEMENT_PLAN.md +11 -0
- package/docs/architecture/INTEGRATION-STATUS.md +290 -0
- package/docs/architecture/MULTI_MODEL_ROUTER_PLAN.md +620 -0
- package/docs/architecture/QUICK_WINS.md +333 -0
- package/docs/architecture/README.md +15 -0
- package/docs/architecture/RESEARCH_SUMMARY.md +652 -0
- package/docs/archived/FASTMCP_COMPLETE.md +428 -0
- package/docs/archived/FASTMCP_INTEGRATION_STATUS.md +288 -0
- package/docs/archived/FLOW-NEXUS-COMPLETE.md +269 -0
- package/docs/archived/INTEGRATION_CONFIRMED.md +351 -0
- package/docs/archived/ONNX_FINAL_REPORT.md +312 -0
- package/docs/archived/ONNX_IMPLEMENTATION_COMPLETE.md +215 -0
- package/docs/archived/ONNX_IMPLEMENTATION_SUMMARY.md +197 -0
- package/docs/archived/ONNX_SUCCESS_REPORT.md +271 -0
- package/docs/archived/OPENROUTER_PROXY_COMPLETE.md +494 -0
- package/docs/archived/PACKAGE-COMPLETE.md +138 -0
- package/docs/archived/README.md +27 -0
- package/docs/archived/RESEARCH_COMPLETE.txt +335 -0
- package/docs/archived/SDK-SETUP-COMPLETE.md +252 -0
- package/docs/guides/ALTERNATIVE_LLM_MODELS.md +524 -0
- package/docs/guides/DOCKER_AGENT_USAGE.md +352 -0
- package/docs/guides/IMPLEMENTATION_EXAMPLES.md +960 -0
- package/docs/guides/NPM-PUBLISH.md +218 -0
- package/docs/guides/README.md +17 -0
- package/docs/guides/agent-sdk.md +234 -0
- package/docs/integrations/CLAUDE_AGENTS_INTEGRATION.md +356 -0
- package/docs/integrations/CLAUDE_FLOW_INTEGRATION.md +535 -0
- package/docs/integrations/FASTMCP_CLI_INTEGRATION.md +503 -0
- package/docs/integrations/FLOW-NEXUS-INTEGRATION.md +319 -0
- package/docs/integrations/README.md +18 -0
- package/docs/integrations/fastmcp-implementation-plan.md +2516 -0
- package/docs/integrations/fastmcp-poc-integration.md +198 -0
- package/docs/router/ONNX_PHI4_RESEARCH.md +220 -0
- package/docs/router/ONNX_RUNTIME_INTEGRATION_PLAN.md +866 -0
- package/docs/router/PHI4_HYPEROPTIMIZATION_PLAN.md +2488 -0
- package/docs/router/README.md +552 -0
- package/docs/router/ROUTER_CONFIG_REFERENCE.md +577 -0
- package/docs/router/ROUTER_USER_GUIDE.md +865 -0
- package/docs/validation/DOCKER_MCP_VALIDATION.md +358 -0
- package/docs/validation/DOCKER_OPENROUTER_VALIDATION.md +443 -0
- package/docs/validation/FINAL_SYSTEM_VALIDATION.md +458 -0
- package/docs/validation/FINAL_VALIDATION_SUMMARY.md +409 -0
- package/docs/validation/MCP_CLI_TOOLS_VALIDATION.md +266 -0
- package/docs/validation/MODEL_VALIDATION_REPORT.md +386 -0
- package/docs/validation/OPENROUTER_VALIDATION_COMPLETE.md +382 -0
- package/docs/validation/README.md +20 -0
- package/docs/validation/ROUTER_VALIDATION.md +311 -0
- package/package.json +140 -0
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
# ONNX Environment Variables Reference
|
|
2
|
+
|
|
3
|
+
Complete guide to configuring ONNX local inference via environment variables.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Enable ONNX with all optimizations
|
|
9
|
+
export PROVIDER=onnx
|
|
10
|
+
export ONNX_OPTIMIZED=true
|
|
11
|
+
|
|
12
|
+
# Run your agent
|
|
13
|
+
npx agentic-flow --agent coder --task "Build feature"
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Provider Selection
|
|
19
|
+
|
|
20
|
+
### `PROVIDER`
|
|
21
|
+
**Values:** `anthropic` | `openrouter` | `onnx`
|
|
22
|
+
**Default:** `anthropic`
|
|
23
|
+
**Description:** Set the AI provider for all CLI commands
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Use ONNX for all commands
|
|
27
|
+
export PROVIDER=onnx
|
|
28
|
+
npx agentic-flow --agent coder --task "test"
|
|
29
|
+
|
|
30
|
+
# Use OpenRouter
|
|
31
|
+
export PROVIDER=openrouter
|
|
32
|
+
npx agentic-flow --agent coder --task "test"
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### `USE_ONNX`
|
|
36
|
+
**Values:** `true` | `false`
|
|
37
|
+
**Default:** `false`
|
|
38
|
+
**Description:** Force ONNX provider (legacy, use `PROVIDER=onnx` instead)
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
export USE_ONNX=true
|
|
42
|
+
npx agentic-flow --agent coder --task "test"
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Model Configuration
|
|
48
|
+
|
|
49
|
+
### `ONNX_MODEL_PATH`
|
|
50
|
+
**Values:** File path
|
|
51
|
+
**Default:** `./models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx`
|
|
52
|
+
**Description:** Custom path to ONNX model file
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
# Use custom model location
|
|
56
|
+
export ONNX_MODEL_PATH=/mnt/models/custom-model.onnx
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### `ONNX_EXECUTION_PROVIDERS`
|
|
60
|
+
**Values:** Comma-separated list: `cpu`, `cuda`, `dml`, `coreml`
|
|
61
|
+
**Default:** `cpu`
|
|
62
|
+
**Description:** Execution providers for inference (affects speed dramatically)
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# CPU only (default, slowest)
|
|
66
|
+
export ONNX_EXECUTION_PROVIDERS=cpu
|
|
67
|
+
|
|
68
|
+
# NVIDIA GPU acceleration (10-50x faster)
|
|
69
|
+
export ONNX_EXECUTION_PROVIDERS=cuda,cpu
|
|
70
|
+
|
|
71
|
+
# Windows DirectML GPU (5-15x faster)
|
|
72
|
+
export ONNX_EXECUTION_PROVIDERS=dml,cpu
|
|
73
|
+
|
|
74
|
+
# macOS Apple Silicon (7-20x faster)
|
|
75
|
+
export ONNX_EXECUTION_PROVIDERS=coreml,cpu
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Performance Impact:**
|
|
79
|
+
- `cpu`: ~6 tokens/sec
|
|
80
|
+
- `cuda`: ~60-300 tokens/sec (10-50x faster)
|
|
81
|
+
- `dml`: ~30-100 tokens/sec (5-15x faster)
|
|
82
|
+
- `coreml`: ~40-120 tokens/sec (7-20x faster)
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Generation Parameters
|
|
87
|
+
|
|
88
|
+
### `ONNX_MAX_TOKENS`
|
|
89
|
+
**Values:** Integer (1-4096)
|
|
90
|
+
**Default:** `200`
|
|
91
|
+
**Description:** Maximum tokens to generate in response
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
# Short responses (faster)
|
|
95
|
+
export ONNX_MAX_TOKENS=100
|
|
96
|
+
|
|
97
|
+
# Long responses
|
|
98
|
+
export ONNX_MAX_TOKENS=500
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Tip:** Keep under 300 for best speed. Context + output must stay under 4K tokens total.
|
|
102
|
+
|
|
103
|
+
### `ONNX_TEMPERATURE`
|
|
104
|
+
**Values:** Float (0.0-2.0)
|
|
105
|
+
**Default:** `0.7` (base), `0.3` (optimized)
|
|
106
|
+
**Description:** Controls output randomness/creativity
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
# Deterministic code (recommended for code generation)
|
|
110
|
+
export ONNX_TEMPERATURE=0.2
|
|
111
|
+
|
|
112
|
+
# Balanced
|
|
113
|
+
export ONNX_TEMPERATURE=0.7
|
|
114
|
+
|
|
115
|
+
# Creative writing
|
|
116
|
+
export ONNX_TEMPERATURE=0.9
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
**Recommended Settings:**
|
|
120
|
+
| Task Type | Temperature | Why |
|
|
121
|
+
|-----------|-------------|-----|
|
|
122
|
+
| Code generation | 0.2-0.4 | Consistent syntax |
|
|
123
|
+
| Refactoring | 0.3-0.5 | Some creativity, but safe |
|
|
124
|
+
| Documentation | 0.5-0.7 | Clear but varied |
|
|
125
|
+
| Brainstorming | 0.7-0.9 | Diverse ideas |
|
|
126
|
+
| Math/Logic | 0.1-0.2 | Precise |
|
|
127
|
+
|
|
128
|
+
### `ONNX_TOP_K`
|
|
129
|
+
**Values:** Integer (1-100)
|
|
130
|
+
**Default:** `50`
|
|
131
|
+
**Description:** Consider top K tokens for sampling
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
# More focused (deterministic)
|
|
135
|
+
export ONNX_TOP_K=20
|
|
136
|
+
|
|
137
|
+
# More diverse
|
|
138
|
+
export ONNX_TOP_K=80
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### `ONNX_TOP_P`
|
|
142
|
+
**Values:** Float (0.0-1.0)
|
|
143
|
+
**Default:** `0.9`
|
|
144
|
+
**Description:** Nucleus sampling threshold (probability mass)
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
# Very focused
|
|
148
|
+
export ONNX_TOP_P=0.7
|
|
149
|
+
|
|
150
|
+
# Balanced
|
|
151
|
+
export ONNX_TOP_P=0.9
|
|
152
|
+
|
|
153
|
+
# Diverse
|
|
154
|
+
export ONNX_TOP_P=0.95
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### `ONNX_REPETITION_PENALTY`
|
|
158
|
+
**Values:** Float (1.0-2.0)
|
|
159
|
+
**Default:** `1.1`
|
|
160
|
+
**Description:** Penalty for token repetition
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
# No penalty (may repeat)
|
|
164
|
+
export ONNX_REPETITION_PENALTY=1.0
|
|
165
|
+
|
|
166
|
+
# Mild penalty (recommended)
|
|
167
|
+
export ONNX_REPETITION_PENALTY=1.1
|
|
168
|
+
|
|
169
|
+
# Strong penalty (more diverse but may lose coherence)
|
|
170
|
+
export ONNX_REPETITION_PENALTY=1.5
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Optimization Features
|
|
176
|
+
|
|
177
|
+
### `ONNX_OPTIMIZED`
|
|
178
|
+
**Values:** `true` | `false`
|
|
179
|
+
**Default:** `false`
|
|
180
|
+
**Description:** Enable optimized ONNX provider with context pruning and prompt enhancement
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
# Enable all optimizations (recommended)
|
|
184
|
+
export ONNX_OPTIMIZED=true
|
|
185
|
+
|
|
186
|
+
# Use base provider
|
|
187
|
+
export ONNX_OPTIMIZED=false
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
**Benefits when enabled:**
|
|
191
|
+
- 30-50% quality improvement via prompt optimization
|
|
192
|
+
- 2-4x speed improvement via context pruning
|
|
193
|
+
- Automatic sliding window context management
|
|
194
|
+
|
|
195
|
+
### `ONNX_MAX_CONTEXT_TOKENS`
|
|
196
|
+
**Values:** Integer (500-4000)
|
|
197
|
+
**Default:** `2048`
|
|
198
|
+
**Description:** Maximum context tokens (used when `ONNX_OPTIMIZED=true`)
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
# Smaller context (faster, less history)
|
|
202
|
+
export ONNX_MAX_CONTEXT_TOKENS=1000
|
|
203
|
+
|
|
204
|
+
# Larger context (slower, more history)
|
|
205
|
+
export ONNX_MAX_CONTEXT_TOKENS=3000
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**Warning:** Total (context + output) must stay under 4096 tokens (Phi-4 limit)
|
|
209
|
+
|
|
210
|
+
### `ONNX_SLIDING_WINDOW`
|
|
211
|
+
**Values:** `true` | `false`
|
|
212
|
+
**Default:** `true` (when `ONNX_OPTIMIZED=true`)
|
|
213
|
+
**Description:** Enable sliding window context pruning
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
# Enable context pruning (recommended for speed)
|
|
217
|
+
export ONNX_SLIDING_WINDOW=true
|
|
218
|
+
|
|
219
|
+
# Disable (keep all context)
|
|
220
|
+
export ONNX_SLIDING_WINDOW=false
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
**Performance:** 2-4x faster inference by keeping only recent messages
|
|
224
|
+
|
|
225
|
+
### `ONNX_PROMPT_OPTIMIZATION`
|
|
226
|
+
**Values:** `true` | `false`
|
|
227
|
+
**Default:** `true` (when `ONNX_OPTIMIZED=true`)
|
|
228
|
+
**Description:** Auto-enhance prompts for better quality
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
# Enable prompt optimization (recommended for quality)
|
|
232
|
+
export ONNX_PROMPT_OPTIMIZATION=true
|
|
233
|
+
|
|
234
|
+
# Disable
|
|
235
|
+
export ONNX_PROMPT_OPTIMIZATION=false
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
**Quality:** 30-50% improvement by adding quality guidelines to code tasks
|
|
239
|
+
|
|
240
|
+
### `ONNX_CACHE_SYSTEM_PROMPTS`
|
|
241
|
+
**Values:** `true` | `false`
|
|
242
|
+
**Default:** `true` (when `ONNX_OPTIMIZED=true`)
|
|
243
|
+
**Description:** Cache processed system prompts for reuse
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
# Enable caching (faster repeated tasks)
|
|
247
|
+
export ONNX_CACHE_SYSTEM_PROMPTS=true
|
|
248
|
+
|
|
249
|
+
# Disable
|
|
250
|
+
export ONNX_CACHE_SYSTEM_PROMPTS=false
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
**Speed:** 30-40% faster on repeated prompts
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## Preset Configurations
|
|
258
|
+
|
|
259
|
+
### Maximum Speed
|
|
260
|
+
```bash
|
|
261
|
+
export PROVIDER=onnx
|
|
262
|
+
export ONNX_EXECUTION_PROVIDERS=cuda,cpu # or dml/coreml
|
|
263
|
+
export ONNX_OPTIMIZED=true
|
|
264
|
+
export ONNX_MAX_CONTEXT_TOKENS=1000
|
|
265
|
+
export ONNX_MAX_TOKENS=100
|
|
266
|
+
export ONNX_SLIDING_WINDOW=true
|
|
267
|
+
export ONNX_CACHE_SYSTEM_PROMPTS=true
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
**Result:** 180+ tokens/sec (with GPU), minimal latency
|
|
271
|
+
|
|
272
|
+
### Maximum Quality
|
|
273
|
+
```bash
|
|
274
|
+
export PROVIDER=onnx
|
|
275
|
+
export ONNX_OPTIMIZED=true
|
|
276
|
+
export ONNX_TEMPERATURE=0.3
|
|
277
|
+
export ONNX_TOP_P=0.9
|
|
278
|
+
export ONNX_TOP_K=50
|
|
279
|
+
export ONNX_REPETITION_PENALTY=1.1
|
|
280
|
+
export ONNX_PROMPT_OPTIMIZATION=true
|
|
281
|
+
export ONNX_MAX_TOKENS=300
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
**Result:** 8.5/10 quality for code tasks
|
|
285
|
+
|
|
286
|
+
### Balanced
|
|
287
|
+
```bash
|
|
288
|
+
export PROVIDER=onnx
|
|
289
|
+
export ONNX_OPTIMIZED=true
|
|
290
|
+
export ONNX_EXECUTION_PROVIDERS=cpu # or gpu
|
|
291
|
+
export ONNX_TEMPERATURE=0.3
|
|
292
|
+
export ONNX_MAX_TOKENS=200
|
|
293
|
+
export ONNX_MAX_CONTEXT_TOKENS=1500
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
**Result:** Good quality + speed tradeoff
|
|
297
|
+
|
|
298
|
+
### CPU Only (No GPU)
|
|
299
|
+
```bash
|
|
300
|
+
export PROVIDER=onnx
|
|
301
|
+
export ONNX_OPTIMIZED=true
|
|
302
|
+
export ONNX_EXECUTION_PROVIDERS=cpu
|
|
303
|
+
export ONNX_MAX_CONTEXT_TOKENS=1000
|
|
304
|
+
export ONNX_MAX_TOKENS=150
|
|
305
|
+
export ONNX_TEMPERATURE=0.3
|
|
306
|
+
export ONNX_SLIDING_WINDOW=true
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
**Result:** Best CPU performance (still ~12 tokens/sec)
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## Use Case Configurations
|
|
314
|
+
|
|
315
|
+
### Code Generation
|
|
316
|
+
```bash
|
|
317
|
+
export PROVIDER=onnx
|
|
318
|
+
export ONNX_OPTIMIZED=true
|
|
319
|
+
export ONNX_TEMPERATURE=0.3 # Deterministic
|
|
320
|
+
export ONNX_TOP_P=0.9
|
|
321
|
+
export ONNX_PROMPT_OPTIMIZATION=true
|
|
322
|
+
export ONNX_MAX_TOKENS=250
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### Code Review
|
|
326
|
+
```bash
|
|
327
|
+
export PROVIDER=onnx
|
|
328
|
+
export ONNX_OPTIMIZED=true
|
|
329
|
+
export ONNX_TEMPERATURE=0.4
|
|
330
|
+
export ONNX_MAX_TOKENS=300
|
|
331
|
+
export ONNX_MAX_CONTEXT_TOKENS=2000 # Need more context
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
### Documentation
|
|
335
|
+
```bash
|
|
336
|
+
export PROVIDER=onnx
|
|
337
|
+
export ONNX_OPTIMIZED=true
|
|
338
|
+
export ONNX_TEMPERATURE=0.6 # More creative
|
|
339
|
+
export ONNX_TOP_P=0.95
|
|
340
|
+
export ONNX_MAX_TOKENS=400
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
### Refactoring
|
|
344
|
+
```bash
|
|
345
|
+
export PROVIDER=onnx
|
|
346
|
+
export ONNX_OPTIMIZED=true
|
|
347
|
+
export ONNX_TEMPERATURE=0.35
|
|
348
|
+
export ONNX_MAX_TOKENS=200
|
|
349
|
+
export ONNX_SLIDING_WINDOW=true
|
|
350
|
+
```
|
|
351
|
+
|
|
352
|
+
---
|
|
353
|
+
|
|
354
|
+
## Performance Tuning
|
|
355
|
+
|
|
356
|
+
### Scenario 1: Too Slow (6 tokens/sec)
|
|
357
|
+
|
|
358
|
+
**Problem:** CPU-only inference is slow
|
|
359
|
+
**Solutions:**
|
|
360
|
+
1. Enable GPU acceleration (biggest impact)
|
|
361
|
+
2. Reduce context size
|
|
362
|
+
3. Enable sliding window
|
|
363
|
+
4. Reduce max tokens
|
|
364
|
+
|
|
365
|
+
```bash
|
|
366
|
+
# Quick wins (no hardware change)
|
|
367
|
+
export ONNX_MAX_CONTEXT_TOKENS=1000 # 2x faster
|
|
368
|
+
export ONNX_SLIDING_WINDOW=true
|
|
369
|
+
export ONNX_MAX_TOKENS=100
|
|
370
|
+
|
|
371
|
+
# Best solution (requires GPU)
|
|
372
|
+
export ONNX_EXECUTION_PROVIDERS=cuda,cpu # 30x faster!
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
### Scenario 2: Low Quality Output
|
|
376
|
+
|
|
377
|
+
**Problem:** Generated code has bugs/missing features
|
|
378
|
+
**Solutions:**
|
|
379
|
+
1. Enable optimizations
|
|
380
|
+
2. Lower temperature
|
|
381
|
+
3. Use specific prompts
|
|
382
|
+
4. Enable prompt optimization
|
|
383
|
+
|
|
384
|
+
```bash
|
|
385
|
+
export ONNX_OPTIMIZED=true
|
|
386
|
+
export ONNX_TEMPERATURE=0.3
|
|
387
|
+
export ONNX_PROMPT_OPTIMIZATION=true
|
|
388
|
+
export ONNX_TOP_K=50
|
|
389
|
+
export ONNX_TOP_P=0.9
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
### Scenario 3: Out of Memory
|
|
393
|
+
|
|
394
|
+
**Problem:** System runs out of RAM
|
|
395
|
+
**Solutions:**
|
|
396
|
+
1. Reduce context size
|
|
397
|
+
2. Reduce max tokens
|
|
398
|
+
3. Close other applications
|
|
399
|
+
|
|
400
|
+
```bash
|
|
401
|
+
export ONNX_MAX_CONTEXT_TOKENS=800
|
|
402
|
+
export ONNX_MAX_TOKENS=100
|
|
403
|
+
export ONNX_SLIDING_WINDOW=true
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
### Scenario 4: Repetitive Output
|
|
407
|
+
|
|
408
|
+
**Problem:** Model repeats same phrases
|
|
409
|
+
**Solutions:**
|
|
410
|
+
1. Increase repetition penalty
|
|
411
|
+
2. Adjust temperature
|
|
412
|
+
3. Change top_p/top_k
|
|
413
|
+
|
|
414
|
+
```bash
|
|
415
|
+
export ONNX_REPETITION_PENALTY=1.2
|
|
416
|
+
export ONNX_TEMPERATURE=0.4
|
|
417
|
+
export ONNX_TOP_P=0.85
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
---
|
|
421
|
+
|
|
422
|
+
## Debug and Logging
|
|
423
|
+
|
|
424
|
+
### `DEBUG`
|
|
425
|
+
**Values:** `true` | `false`
|
|
426
|
+
**Default:** `false`
|
|
427
|
+
**Description:** Enable detailed logging
|
|
428
|
+
|
|
429
|
+
```bash
|
|
430
|
+
export DEBUG=true
|
|
431
|
+
npx agentic-flow --agent coder --task "test"
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
### `ONNX_LOG_PERFORMANCE`
|
|
435
|
+
**Values:** `true` | `false`
|
|
436
|
+
**Default:** `false`
|
|
437
|
+
**Description:** Log performance metrics
|
|
438
|
+
|
|
439
|
+
```bash
|
|
440
|
+
export ONNX_LOG_PERFORMANCE=true
|
|
441
|
+
# Outputs: tokens/sec, latency, context size, etc.
|
|
442
|
+
```
|
|
443
|
+
|
|
444
|
+
---
|
|
445
|
+
|
|
446
|
+
## Example Workflows
|
|
447
|
+
|
|
448
|
+
### Daily Development (Local, Free)
|
|
449
|
+
|
|
450
|
+
```bash
|
|
451
|
+
# .env file
|
|
452
|
+
PROVIDER=onnx
|
|
453
|
+
ONNX_OPTIMIZED=true
|
|
454
|
+
ONNX_TEMPERATURE=0.3
|
|
455
|
+
ONNX_MAX_TOKENS=200
|
|
456
|
+
ONNX_EXECUTION_PROVIDERS=cpu
|
|
457
|
+
|
|
458
|
+
# Usage
|
|
459
|
+
npx agentic-flow --agent coder --task "Build feature"
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
### CI/CD Pipeline (Fast, Local)
|
|
463
|
+
|
|
464
|
+
```bash
|
|
465
|
+
# CI environment variables
|
|
466
|
+
PROVIDER=onnx
|
|
467
|
+
ONNX_OPTIMIZED=true
|
|
468
|
+
ONNX_MAX_CONTEXT_TOKENS=800
|
|
469
|
+
ONNX_MAX_TOKENS=100
|
|
470
|
+
ONNX_TEMPERATURE=0.2
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
### Hybrid: ONNX + Cloud Fallback
|
|
474
|
+
|
|
475
|
+
```bash
|
|
476
|
+
# Try ONNX first (80% of tasks)
|
|
477
|
+
export PROVIDER=onnx
|
|
478
|
+
export ONNX_OPTIMIZED=true
|
|
479
|
+
|
|
480
|
+
# For complex tasks, switch to cloud
|
|
481
|
+
unset PROVIDER
|
|
482
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
483
|
+
# or
|
|
484
|
+
export OPENROUTER_API_KEY=sk-or-v1-...
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
---
|
|
488
|
+
|
|
489
|
+
## Best Practices
|
|
490
|
+
|
|
491
|
+
1. **Always enable optimizations**
|
|
492
|
+
```bash
|
|
493
|
+
export ONNX_OPTIMIZED=true
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
2. **Lower temperature for code**
|
|
497
|
+
```bash
|
|
498
|
+
export ONNX_TEMPERATURE=0.3
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
3. **Enable GPU if available** (30x faster!)
|
|
502
|
+
```bash
|
|
503
|
+
export ONNX_EXECUTION_PROVIDERS=cuda,cpu
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
4. **Keep context under 2K tokens** (2-4x faster)
|
|
507
|
+
```bash
|
|
508
|
+
export ONNX_MAX_CONTEXT_TOKENS=1500
|
|
509
|
+
```
|
|
510
|
+
|
|
511
|
+
5. **Use `.env` file** for consistency
|
|
512
|
+
```bash
|
|
513
|
+
# Create .env file in project root
|
|
514
|
+
echo "PROVIDER=onnx" >> .env
|
|
515
|
+
echo "ONNX_OPTIMIZED=true" >> .env
|
|
516
|
+
echo "ONNX_TEMPERATURE=0.3" >> .env
|
|
517
|
+
```
|
|
518
|
+
|
|
519
|
+
---
|
|
520
|
+
|
|
521
|
+
## Troubleshooting
|
|
522
|
+
|
|
523
|
+
### Error: Model not found
|
|
524
|
+
```bash
|
|
525
|
+
# Check model path
|
|
526
|
+
ls -lh ./models/phi-4-mini/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/
|
|
527
|
+
|
|
528
|
+
# Re-download if missing
|
|
529
|
+
rm -rf ./models/phi-4-mini
|
|
530
|
+
npx agentic-flow --agent coder --task "test" --provider onnx
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
### Error: CUDA not available
|
|
534
|
+
```bash
|
|
535
|
+
# Check CUDA installation
|
|
536
|
+
nvidia-smi
|
|
537
|
+
|
|
538
|
+
# Fall back to CPU
|
|
539
|
+
export ONNX_EXECUTION_PROVIDERS=cpu
|
|
540
|
+
```
|
|
541
|
+
|
|
542
|
+
### Slow inference (< 10 tok/s)
|
|
543
|
+
```bash
|
|
544
|
+
# Enable optimizations
|
|
545
|
+
export ONNX_OPTIMIZED=true
|
|
546
|
+
export ONNX_MAX_CONTEXT_TOKENS=1000
|
|
547
|
+
export ONNX_SLIDING_WINDOW=true
|
|
548
|
+
|
|
549
|
+
# Best: Enable GPU
|
|
550
|
+
export ONNX_EXECUTION_PROVIDERS=cuda,cpu
|
|
551
|
+
```
|
|
552
|
+
|
|
553
|
+
---
|
|
554
|
+
|
|
555
|
+
## See Also
|
|
556
|
+
|
|
557
|
+
- [ONNX CLI Usage Guide](./ONNX_CLI_USAGE.md)
|
|
558
|
+
- [ONNX Optimization Guide](./ONNX_OPTIMIZATION_GUIDE.md)
|
|
559
|
+
- [ONNX vs Claude Quality](./ONNX_VS_CLAUDE_QUALITY.md)
|
|
560
|
+
- [Full ONNX Integration](./ONNX_INTEGRATION.md)
|
|
561
|
+
|
|
562
|
+
---
|
|
563
|
+
|
|
564
|
+
**Remember:** ONNX is free and runs locally. Optimize first, then decide if you need cloud providers for complex tasks.
|