agentic-flow 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/agents/MIGRATION_SUMMARY.md +222 -0
- package/.claude/agents/README.md +89 -0
- package/.claude/agents/analysis/code-analyzer.md +209 -0
- package/.claude/agents/analysis/code-review/analyze-code-quality.md +180 -0
- package/.claude/agents/architecture/system-design/arch-system-design.md +156 -0
- package/.claude/agents/base-template-generator.md +42 -0
- package/.claude/agents/consensus/README.md +253 -0
- package/.claude/agents/consensus/byzantine-coordinator.md +63 -0
- package/.claude/agents/consensus/crdt-synchronizer.md +997 -0
- package/.claude/agents/consensus/gossip-coordinator.md +63 -0
- package/.claude/agents/consensus/performance-benchmarker.md +851 -0
- package/.claude/agents/consensus/quorum-manager.md +823 -0
- package/.claude/agents/consensus/raft-manager.md +63 -0
- package/.claude/agents/consensus/security-manager.md +622 -0
- package/.claude/agents/core/coder.md +211 -0
- package/.claude/agents/core/planner.md +116 -0
- package/.claude/agents/core/researcher.md +136 -0
- package/.claude/agents/core/reviewer.md +272 -0
- package/.claude/agents/core/tester.md +266 -0
- package/.claude/agents/data/ml/data-ml-model.md +193 -0
- package/.claude/agents/development/backend/dev-backend-api.md +142 -0
- package/.claude/agents/devops/ci-cd/ops-cicd-github.md +164 -0
- package/.claude/agents/documentation/api-docs/docs-api-openapi.md +174 -0
- package/.claude/agents/flow-nexus/app-store.md +88 -0
- package/.claude/agents/flow-nexus/authentication.md +69 -0
- package/.claude/agents/flow-nexus/challenges.md +81 -0
- package/.claude/agents/flow-nexus/neural-network.md +88 -0
- package/.claude/agents/flow-nexus/payments.md +83 -0
- package/.claude/agents/flow-nexus/sandbox.md +76 -0
- package/.claude/agents/flow-nexus/swarm.md +76 -0
- package/.claude/agents/flow-nexus/user-tools.md +96 -0
- package/.claude/agents/flow-nexus/workflow.md +84 -0
- package/.claude/agents/github/code-review-swarm.md +538 -0
- package/.claude/agents/github/github-modes.md +173 -0
- package/.claude/agents/github/issue-tracker.md +319 -0
- package/.claude/agents/github/multi-repo-swarm.md +553 -0
- package/.claude/agents/github/pr-manager.md +191 -0
- package/.claude/agents/github/project-board-sync.md +509 -0
- package/.claude/agents/github/release-manager.md +367 -0
- package/.claude/agents/github/release-swarm.md +583 -0
- package/.claude/agents/github/repo-architect.md +398 -0
- package/.claude/agents/github/swarm-issue.md +573 -0
- package/.claude/agents/github/swarm-pr.md +428 -0
- package/.claude/agents/github/sync-coordinator.md +452 -0
- package/.claude/agents/github/workflow-automation.md +635 -0
- package/.claude/agents/goal/agent.md +816 -0
- package/.claude/agents/goal/goal-planner.md +73 -0
- package/.claude/agents/optimization/README.md +250 -0
- package/.claude/agents/optimization/benchmark-suite.md +665 -0
- package/.claude/agents/optimization/load-balancer.md +431 -0
- package/.claude/agents/optimization/performance-monitor.md +672 -0
- package/.claude/agents/optimization/resource-allocator.md +674 -0
- package/.claude/agents/optimization/topology-optimizer.md +808 -0
- package/.claude/agents/payments/agentic-payments.md +126 -0
- package/.claude/agents/sparc/architecture.md +472 -0
- package/.claude/agents/sparc/pseudocode.md +318 -0
- package/.claude/agents/sparc/refinement.md +525 -0
- package/.claude/agents/sparc/specification.md +276 -0
- package/.claude/agents/specialized/mobile/spec-mobile-react-native.md +226 -0
- package/.claude/agents/sublinear/consensus-coordinator.md +338 -0
- package/.claude/agents/sublinear/matrix-optimizer.md +185 -0
- package/.claude/agents/sublinear/pagerank-analyzer.md +299 -0
- package/.claude/agents/sublinear/performance-optimizer.md +368 -0
- package/.claude/agents/sublinear/trading-predictor.md +246 -0
- package/.claude/agents/swarm/README.md +190 -0
- package/.claude/agents/swarm/adaptive-coordinator.md +396 -0
- package/.claude/agents/swarm/hierarchical-coordinator.md +256 -0
- package/.claude/agents/swarm/mesh-coordinator.md +392 -0
- package/.claude/agents/templates/automation-smart-agent.md +205 -0
- package/.claude/agents/templates/coordinator-swarm-init.md +90 -0
- package/.claude/agents/templates/github-pr-manager.md +177 -0
- package/.claude/agents/templates/implementer-sparc-coder.md +259 -0
- package/.claude/agents/templates/memory-coordinator.md +187 -0
- package/.claude/agents/templates/migration-plan.md +746 -0
- package/.claude/agents/templates/orchestrator-task.md +139 -0
- package/.claude/agents/templates/performance-analyzer.md +199 -0
- package/.claude/agents/templates/sparc-coordinator.md +183 -0
- package/.claude/agents/test-neural.md +14 -0
- package/.claude/agents/testing/unit/tdd-london-swarm.md +244 -0
- package/.claude/agents/testing/validation/production-validator.md +395 -0
- package/.claude/commands/agents/README.md +10 -0
- package/.claude/commands/agents/agent-capabilities.md +21 -0
- package/.claude/commands/agents/agent-coordination.md +28 -0
- package/.claude/commands/agents/agent-spawning.md +28 -0
- package/.claude/commands/agents/agent-types.md +26 -0
- package/.claude/commands/analysis/COMMAND_COMPLIANCE_REPORT.md +54 -0
- package/.claude/commands/analysis/README.md +9 -0
- package/.claude/commands/analysis/bottleneck-detect.md +162 -0
- package/.claude/commands/analysis/performance-bottlenecks.md +59 -0
- package/.claude/commands/analysis/performance-report.md +25 -0
- package/.claude/commands/analysis/token-efficiency.md +45 -0
- package/.claude/commands/analysis/token-usage.md +25 -0
- package/.claude/commands/automation/README.md +9 -0
- package/.claude/commands/automation/auto-agent.md +122 -0
- package/.claude/commands/automation/self-healing.md +106 -0
- package/.claude/commands/automation/session-memory.md +90 -0
- package/.claude/commands/automation/smart-agents.md +73 -0
- package/.claude/commands/automation/smart-spawn.md +25 -0
- package/.claude/commands/automation/workflow-select.md +25 -0
- package/.claude/commands/claude-flow-help.md +103 -0
- package/.claude/commands/claude-flow-memory.md +107 -0
- package/.claude/commands/claude-flow-swarm.md +205 -0
- package/.claude/commands/coordination/README.md +9 -0
- package/.claude/commands/coordination/agent-spawn.md +25 -0
- package/.claude/commands/coordination/init.md +44 -0
- package/.claude/commands/coordination/orchestrate.md +43 -0
- package/.claude/commands/coordination/spawn.md +45 -0
- package/.claude/commands/coordination/swarm-init.md +85 -0
- package/.claude/commands/coordination/task-orchestrate.md +25 -0
- package/.claude/commands/flow-nexus/app-store.md +124 -0
- package/.claude/commands/flow-nexus/challenges.md +120 -0
- package/.claude/commands/flow-nexus/login-registration.md +65 -0
- package/.claude/commands/flow-nexus/neural-network.md +134 -0
- package/.claude/commands/flow-nexus/payments.md +116 -0
- package/.claude/commands/flow-nexus/sandbox.md +83 -0
- package/.claude/commands/flow-nexus/swarm.md +87 -0
- package/.claude/commands/flow-nexus/user-tools.md +152 -0
- package/.claude/commands/flow-nexus/workflow.md +115 -0
- package/.claude/commands/github/README.md +11 -0
- package/.claude/commands/github/code-review-swarm.md +514 -0
- package/.claude/commands/github/code-review.md +25 -0
- package/.claude/commands/github/github-modes.md +147 -0
- package/.claude/commands/github/github-swarm.md +121 -0
- package/.claude/commands/github/issue-tracker.md +292 -0
- package/.claude/commands/github/issue-triage.md +25 -0
- package/.claude/commands/github/multi-repo-swarm.md +519 -0
- package/.claude/commands/github/pr-enhance.md +26 -0
- package/.claude/commands/github/pr-manager.md +170 -0
- package/.claude/commands/github/project-board-sync.md +471 -0
- package/.claude/commands/github/release-manager.md +338 -0
- package/.claude/commands/github/release-swarm.md +544 -0
- package/.claude/commands/github/repo-analyze.md +25 -0
- package/.claude/commands/github/repo-architect.md +367 -0
- package/.claude/commands/github/swarm-issue.md +482 -0
- package/.claude/commands/github/swarm-pr.md +285 -0
- package/.claude/commands/github/sync-coordinator.md +301 -0
- package/.claude/commands/github/workflow-automation.md +442 -0
- package/.claude/commands/hive-mind/README.md +17 -0
- package/.claude/commands/hive-mind/hive-mind-consensus.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-init.md +18 -0
- package/.claude/commands/hive-mind/hive-mind-memory.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-metrics.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-resume.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-sessions.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-spawn.md +21 -0
- package/.claude/commands/hive-mind/hive-mind-status.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-stop.md +8 -0
- package/.claude/commands/hive-mind/hive-mind-wizard.md +8 -0
- package/.claude/commands/hive-mind/hive-mind.md +27 -0
- package/.claude/commands/hooks/README.md +11 -0
- package/.claude/commands/hooks/overview.md +58 -0
- package/.claude/commands/hooks/post-edit.md +117 -0
- package/.claude/commands/hooks/post-task.md +112 -0
- package/.claude/commands/hooks/pre-edit.md +113 -0
- package/.claude/commands/hooks/pre-task.md +111 -0
- package/.claude/commands/hooks/session-end.md +118 -0
- package/.claude/commands/hooks/setup.md +103 -0
- package/.claude/commands/memory/README.md +9 -0
- package/.claude/commands/memory/memory-persist.md +25 -0
- package/.claude/commands/memory/memory-search.md +25 -0
- package/.claude/commands/memory/memory-usage.md +25 -0
- package/.claude/commands/memory/neural.md +47 -0
- package/.claude/commands/memory/usage.md +46 -0
- package/.claude/commands/monitoring/README.md +9 -0
- package/.claude/commands/monitoring/agent-metrics.md +25 -0
- package/.claude/commands/monitoring/agents.md +44 -0
- package/.claude/commands/monitoring/real-time-view.md +25 -0
- package/.claude/commands/monitoring/status.md +46 -0
- package/.claude/commands/monitoring/swarm-monitor.md +25 -0
- package/.claude/commands/optimization/README.md +9 -0
- package/.claude/commands/optimization/auto-topology.md +62 -0
- package/.claude/commands/optimization/cache-manage.md +25 -0
- package/.claude/commands/optimization/parallel-execute.md +25 -0
- package/.claude/commands/optimization/parallel-execution.md +50 -0
- package/.claude/commands/optimization/topology-optimize.md +25 -0
- package/.claude/commands/pair/README.md +261 -0
- package/.claude/commands/pair/commands.md +546 -0
- package/.claude/commands/pair/config.md +510 -0
- package/.claude/commands/pair/examples.md +512 -0
- package/.claude/commands/pair/modes.md +348 -0
- package/.claude/commands/pair/session.md +407 -0
- package/.claude/commands/pair/start.md +209 -0
- package/.claude/commands/sparc/analyzer.md +52 -0
- package/.claude/commands/sparc/architect.md +53 -0
- package/.claude/commands/sparc/ask.md +97 -0
- package/.claude/commands/sparc/batch-executor.md +54 -0
- package/.claude/commands/sparc/code.md +89 -0
- package/.claude/commands/sparc/coder.md +54 -0
- package/.claude/commands/sparc/debug.md +83 -0
- package/.claude/commands/sparc/debugger.md +54 -0
- package/.claude/commands/sparc/designer.md +53 -0
- package/.claude/commands/sparc/devops.md +109 -0
- package/.claude/commands/sparc/docs-writer.md +80 -0
- package/.claude/commands/sparc/documenter.md +54 -0
- package/.claude/commands/sparc/innovator.md +54 -0
- package/.claude/commands/sparc/integration.md +83 -0
- package/.claude/commands/sparc/mcp.md +117 -0
- package/.claude/commands/sparc/memory-manager.md +54 -0
- package/.claude/commands/sparc/optimizer.md +54 -0
- package/.claude/commands/sparc/orchestrator.md +132 -0
- package/.claude/commands/sparc/post-deployment-monitoring-mode.md +83 -0
- package/.claude/commands/sparc/refinement-optimization-mode.md +83 -0
- package/.claude/commands/sparc/researcher.md +54 -0
- package/.claude/commands/sparc/reviewer.md +54 -0
- package/.claude/commands/sparc/security-review.md +80 -0
- package/.claude/commands/sparc/sparc-modes.md +174 -0
- package/.claude/commands/sparc/sparc.md +111 -0
- package/.claude/commands/sparc/spec-pseudocode.md +80 -0
- package/.claude/commands/sparc/supabase-admin.md +348 -0
- package/.claude/commands/sparc/swarm-coordinator.md +54 -0
- package/.claude/commands/sparc/tdd.md +54 -0
- package/.claude/commands/sparc/tester.md +54 -0
- package/.claude/commands/sparc/tutorial.md +79 -0
- package/.claude/commands/sparc/workflow-manager.md +54 -0
- package/.claude/commands/sparc.md +166 -0
- package/.claude/commands/stream-chain/pipeline.md +121 -0
- package/.claude/commands/stream-chain/run.md +70 -0
- package/.claude/commands/swarm/README.md +15 -0
- package/.claude/commands/swarm/analysis.md +95 -0
- package/.claude/commands/swarm/development.md +96 -0
- package/.claude/commands/swarm/examples.md +168 -0
- package/.claude/commands/swarm/maintenance.md +102 -0
- package/.claude/commands/swarm/optimization.md +117 -0
- package/.claude/commands/swarm/research.md +136 -0
- package/.claude/commands/swarm/swarm-analysis.md +8 -0
- package/.claude/commands/swarm/swarm-background.md +8 -0
- package/.claude/commands/swarm/swarm-init.md +19 -0
- package/.claude/commands/swarm/swarm-modes.md +8 -0
- package/.claude/commands/swarm/swarm-monitor.md +8 -0
- package/.claude/commands/swarm/swarm-spawn.md +19 -0
- package/.claude/commands/swarm/swarm-status.md +8 -0
- package/.claude/commands/swarm/swarm-strategies.md +8 -0
- package/.claude/commands/swarm/swarm.md +27 -0
- package/.claude/commands/swarm/testing.md +131 -0
- package/.claude/commands/training/README.md +9 -0
- package/.claude/commands/training/model-update.md +25 -0
- package/.claude/commands/training/neural-patterns.md +74 -0
- package/.claude/commands/training/neural-train.md +25 -0
- package/.claude/commands/training/pattern-learn.md +25 -0
- package/.claude/commands/training/specialization.md +63 -0
- package/.claude/commands/truth/start.md +143 -0
- package/.claude/commands/verify/check.md +50 -0
- package/.claude/commands/verify/start.md +128 -0
- package/.claude/commands/workflows/README.md +9 -0
- package/.claude/commands/workflows/development.md +78 -0
- package/.claude/commands/workflows/research.md +63 -0
- package/.claude/commands/workflows/workflow-create.md +25 -0
- package/.claude/commands/workflows/workflow-execute.md +25 -0
- package/.claude/commands/workflows/workflow-export.md +25 -0
- package/.claude/helpers/checkpoint-manager.sh +251 -0
- package/.claude/helpers/github-safe.js +106 -0
- package/.claude/helpers/github-setup.sh +28 -0
- package/.claude/helpers/quick-start.sh +19 -0
- package/.claude/helpers/setup-mcp.sh +18 -0
- package/.claude/helpers/standard-checkpoint-hooks.sh +179 -0
- package/.claude/mcp.json +13 -0
- package/.claude/settings-backup.json +130 -0
- package/.claude/settings-optimized.json +116 -0
- package/.claude/settings-simple.json +78 -0
- package/.claude/settings.json +114 -0
- package/.claude/settings.local.json +14 -0
- package/README.md +1280 -0
- package/dist/agents/claudeAgent.js +73 -0
- package/dist/agents/claudeFlowAgent.js +115 -0
- package/dist/agents/codeReviewAgent.js +34 -0
- package/dist/agents/dataAgent.js +34 -0
- package/dist/agents/directApiAgent.js +260 -0
- package/dist/agents/webResearchAgent.js +35 -0
- package/dist/cli/mcp.js +135 -0
- package/dist/cli-proxy.js +246 -0
- package/dist/cli.js +158 -0
- package/dist/config/claudeFlow.js +67 -0
- package/dist/config/tools.js +33 -0
- package/dist/coordination/parallelSwarm.js +226 -0
- package/dist/examples/multi-agent-orchestration.js +45 -0
- package/dist/examples/parallel-swarm-deployment.js +171 -0
- package/dist/examples/use-goal-planner.js +52 -0
- package/dist/health.js +46 -0
- package/dist/index-with-proxy.js +101 -0
- package/dist/index.js +167 -0
- package/dist/mcp/claudeFlowSdkServer.js +202 -0
- package/dist/mcp/fastmcp/servers/claude-flow-sdk.js +198 -0
- package/dist/mcp/fastmcp/servers/http-streaming-updated.js +421 -0
- package/dist/mcp/fastmcp/servers/poc-stdio.js +82 -0
- package/dist/mcp/fastmcp/servers/stdio-full.js +421 -0
- package/dist/mcp/fastmcp/tools/agent/add-agent.js +107 -0
- package/dist/mcp/fastmcp/tools/agent/add-command.js +117 -0
- package/dist/mcp/fastmcp/tools/agent/execute.js +56 -0
- package/dist/mcp/fastmcp/tools/agent/list.js +82 -0
- package/dist/mcp/fastmcp/tools/agent/parallel.js +63 -0
- package/dist/mcp/fastmcp/tools/memory/retrieve.js +38 -0
- package/dist/mcp/fastmcp/tools/memory/search.js +41 -0
- package/dist/mcp/fastmcp/tools/memory/store.js +56 -0
- package/dist/mcp/fastmcp/tools/swarm/init.js +41 -0
- package/dist/mcp/fastmcp/tools/swarm/orchestrate.js +47 -0
- package/dist/mcp/fastmcp/tools/swarm/spawn.js +40 -0
- package/dist/mcp/fastmcp/types/index.js +2 -0
- package/dist/proxy/anthropic-to-openrouter.js +246 -0
- package/dist/router/providers/anthropic.js +89 -0
- package/dist/router/providers/onnx-local-optimized.js +167 -0
- package/dist/router/providers/onnx-local.js +294 -0
- package/dist/router/providers/onnx-phi4.js +190 -0
- package/dist/router/providers/onnx.js +242 -0
- package/dist/router/providers/openrouter.js +242 -0
- package/dist/router/router.js +283 -0
- package/dist/router/test-integration.js +140 -0
- package/dist/router/test-onnx-benchmark.js +145 -0
- package/dist/router/test-onnx-integration.js +128 -0
- package/dist/router/test-onnx-local.js +37 -0
- package/dist/router/test-onnx.js +148 -0
- package/dist/router/test-openrouter.js +121 -0
- package/dist/router/test-phi4.js +137 -0
- package/dist/router/types.js +2 -0
- package/dist/utils/agentLoader.js +106 -0
- package/dist/utils/cli.js +128 -0
- package/dist/utils/logger.js +41 -0
- package/dist/utils/mcpCommands.js +214 -0
- package/dist/utils/model-downloader.js +182 -0
- package/dist/utils/retry.js +54 -0
- package/docs/.claude-flow/metrics/agent-metrics.json +1 -0
- package/docs/.claude-flow/metrics/performance.json +9 -0
- package/docs/.claude-flow/metrics/task-metrics.json +10 -0
- package/docs/CHANGELOG.md +155 -0
- package/docs/CLAUDE.md +352 -0
- package/docs/COMPLETE_VALIDATION_SUMMARY.md +405 -0
- package/docs/INDEX.md +183 -0
- package/docs/LICENSE +21 -0
- package/docs/ONNX_CLI_USAGE.md +344 -0
- package/docs/ONNX_ENV_VARS.md +564 -0
- package/docs/ONNX_INTEGRATION.md +422 -0
- package/docs/ONNX_OPTIMIZATION_GUIDE.md +665 -0
- package/docs/ONNX_OPTIMIZATION_SUMMARY.md +374 -0
- package/docs/ONNX_VS_CLAUDE_QUALITY.md +442 -0
- package/docs/OPENROUTER_DEPLOYMENT.md +495 -0
- package/docs/architecture/EXECUTIVE_SUMMARY.md +310 -0
- package/docs/architecture/IMPROVEMENT_PLAN.md +11 -0
- package/docs/architecture/INTEGRATION-STATUS.md +290 -0
- package/docs/architecture/MULTI_MODEL_ROUTER_PLAN.md +620 -0
- package/docs/architecture/QUICK_WINS.md +333 -0
- package/docs/architecture/README.md +15 -0
- package/docs/architecture/RESEARCH_SUMMARY.md +652 -0
- package/docs/archived/FASTMCP_COMPLETE.md +428 -0
- package/docs/archived/FASTMCP_INTEGRATION_STATUS.md +288 -0
- package/docs/archived/FLOW-NEXUS-COMPLETE.md +269 -0
- package/docs/archived/INTEGRATION_CONFIRMED.md +351 -0
- package/docs/archived/ONNX_FINAL_REPORT.md +312 -0
- package/docs/archived/ONNX_IMPLEMENTATION_COMPLETE.md +215 -0
- package/docs/archived/ONNX_IMPLEMENTATION_SUMMARY.md +197 -0
- package/docs/archived/ONNX_SUCCESS_REPORT.md +271 -0
- package/docs/archived/OPENROUTER_PROXY_COMPLETE.md +494 -0
- package/docs/archived/PACKAGE-COMPLETE.md +138 -0
- package/docs/archived/README.md +27 -0
- package/docs/archived/RESEARCH_COMPLETE.txt +335 -0
- package/docs/archived/SDK-SETUP-COMPLETE.md +252 -0
- package/docs/guides/ALTERNATIVE_LLM_MODELS.md +524 -0
- package/docs/guides/DOCKER_AGENT_USAGE.md +352 -0
- package/docs/guides/IMPLEMENTATION_EXAMPLES.md +960 -0
- package/docs/guides/NPM-PUBLISH.md +218 -0
- package/docs/guides/README.md +17 -0
- package/docs/guides/agent-sdk.md +234 -0
- package/docs/integrations/CLAUDE_AGENTS_INTEGRATION.md +356 -0
- package/docs/integrations/CLAUDE_FLOW_INTEGRATION.md +535 -0
- package/docs/integrations/FASTMCP_CLI_INTEGRATION.md +503 -0
- package/docs/integrations/FLOW-NEXUS-INTEGRATION.md +319 -0
- package/docs/integrations/README.md +18 -0
- package/docs/integrations/fastmcp-implementation-plan.md +2516 -0
- package/docs/integrations/fastmcp-poc-integration.md +198 -0
- package/docs/router/ONNX_PHI4_RESEARCH.md +220 -0
- package/docs/router/ONNX_RUNTIME_INTEGRATION_PLAN.md +866 -0
- package/docs/router/PHI4_HYPEROPTIMIZATION_PLAN.md +2488 -0
- package/docs/router/README.md +552 -0
- package/docs/router/ROUTER_CONFIG_REFERENCE.md +577 -0
- package/docs/router/ROUTER_USER_GUIDE.md +865 -0
- package/docs/validation/DOCKER_MCP_VALIDATION.md +358 -0
- package/docs/validation/DOCKER_OPENROUTER_VALIDATION.md +443 -0
- package/docs/validation/FINAL_SYSTEM_VALIDATION.md +458 -0
- package/docs/validation/FINAL_VALIDATION_SUMMARY.md +409 -0
- package/docs/validation/MCP_CLI_TOOLS_VALIDATION.md +266 -0
- package/docs/validation/MODEL_VALIDATION_REPORT.md +386 -0
- package/docs/validation/OPENROUTER_VALIDATION_COMPLETE.md +382 -0
- package/docs/validation/README.md +20 -0
- package/docs/validation/ROUTER_VALIDATION.md +311 -0
- package/package.json +140 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ONNX Runtime Local Inference Provider for Phi-4
|
|
3
|
+
*
|
|
4
|
+
* Uses onnxruntime-node for true local CPU/GPU inference
|
|
5
|
+
*/
|
|
6
|
+
import * as ort from 'onnxruntime-node';
|
|
7
|
+
import { get_encoding } from 'tiktoken';
|
|
8
|
+
import { ensurePhi4Model, ModelDownloader } from '../../utils/model-downloader.js';
|
|
9
|
+
export class ONNXLocalProvider {
|
|
10
|
+
name = 'onnx-local';
|
|
11
|
+
type = 'custom';
|
|
12
|
+
supportsStreaming = false; // Streaming requires complex token generation loop
|
|
13
|
+
supportsTools = false;
|
|
14
|
+
supportsMCP = false;
|
|
15
|
+
session = null;
|
|
16
|
+
config;
|
|
17
|
+
tokenizer = null;
|
|
18
|
+
tiktoken = null;
|
|
19
|
+
constructor(config = {}) {
|
|
20
|
+
this.config = {
|
|
21
|
+
modelPath: config.modelPath || './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx',
|
|
22
|
+
executionProviders: config.executionProviders || ['cpu'],
|
|
23
|
+
maxTokens: config.maxTokens || 100,
|
|
24
|
+
temperature: config.temperature || 0.7
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Load optimized tiktoken tokenizer (cl100k_base for Phi-4)
|
|
29
|
+
*/
|
|
30
|
+
async loadTokenizer() {
|
|
31
|
+
if (this.tiktoken)
|
|
32
|
+
return;
|
|
33
|
+
try {
|
|
34
|
+
// Use cl100k_base encoding (GPT-4, similar to Phi-4)
|
|
35
|
+
this.tiktoken = get_encoding('cl100k_base');
|
|
36
|
+
console.log('✅ Tokenizer loaded (tiktoken cl100k_base)');
|
|
37
|
+
}
|
|
38
|
+
catch (error) {
|
|
39
|
+
console.error('❌ Failed to load tiktoken:', error);
|
|
40
|
+
throw new Error(`Tokenizer loading failed: ${error}`);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Encode text using tiktoken (fast BPE)
|
|
45
|
+
*/
|
|
46
|
+
encode(text) {
|
|
47
|
+
return Array.from(this.tiktoken.encode(text));
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Decode tokens using tiktoken
|
|
51
|
+
*/
|
|
52
|
+
decode(ids) {
|
|
53
|
+
try {
|
|
54
|
+
const decoded = this.tiktoken.decode(new Uint32Array(ids));
|
|
55
|
+
// tiktoken returns buffer, convert to string
|
|
56
|
+
if (typeof decoded === 'string') {
|
|
57
|
+
return decoded;
|
|
58
|
+
}
|
|
59
|
+
else if (decoded instanceof Uint8Array || decoded instanceof Buffer) {
|
|
60
|
+
return new TextDecoder().decode(decoded);
|
|
61
|
+
}
|
|
62
|
+
return String(decoded);
|
|
63
|
+
}
|
|
64
|
+
catch (error) {
|
|
65
|
+
console.warn('Decode error, returning raw IDs:', error);
|
|
66
|
+
return ids.join(',');
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Initialize ONNX session (with automatic model download)
|
|
71
|
+
*/
|
|
72
|
+
async initializeSession() {
|
|
73
|
+
if (this.session)
|
|
74
|
+
return;
|
|
75
|
+
try {
|
|
76
|
+
// Ensure model is downloaded
|
|
77
|
+
console.log(`🔍 Checking for Phi-4 ONNX model...`);
|
|
78
|
+
const modelPath = await ensurePhi4Model((progress) => {
|
|
79
|
+
if (progress.percentage % 10 < 1) { // Log every ~10%
|
|
80
|
+
console.log(` 📥 Downloading: ${ModelDownloader.formatProgress(progress)}`);
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
// Update config with actual model path
|
|
84
|
+
this.config.modelPath = modelPath;
|
|
85
|
+
console.log(`📦 Loading ONNX model: ${this.config.modelPath}`);
|
|
86
|
+
this.session = await ort.InferenceSession.create(this.config.modelPath, {
|
|
87
|
+
executionProviders: this.config.executionProviders,
|
|
88
|
+
graphOptimizationLevel: 'all',
|
|
89
|
+
enableCpuMemArena: true,
|
|
90
|
+
enableMemPattern: true
|
|
91
|
+
});
|
|
92
|
+
console.log(`✅ ONNX model loaded`);
|
|
93
|
+
console.log(`🔧 Execution providers: ${this.config.executionProviders.join(', ')}`);
|
|
94
|
+
// Load tokenizer
|
|
95
|
+
await this.loadTokenizer();
|
|
96
|
+
}
|
|
97
|
+
catch (error) {
|
|
98
|
+
const providerError = {
|
|
99
|
+
name: 'ONNXInitError',
|
|
100
|
+
message: `Failed to initialize ONNX model: ${error}`,
|
|
101
|
+
provider: 'onnx-local',
|
|
102
|
+
retryable: false
|
|
103
|
+
};
|
|
104
|
+
throw providerError;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Format messages for Phi-4 chat template
|
|
109
|
+
*/
|
|
110
|
+
formatMessages(messages) {
|
|
111
|
+
let prompt = '';
|
|
112
|
+
for (const msg of messages) {
|
|
113
|
+
const content = typeof msg.content === 'string'
|
|
114
|
+
? msg.content
|
|
115
|
+
: msg.content.map(c => c.type === 'text' ? c.text : '').join('');
|
|
116
|
+
if (msg.role === 'system') {
|
|
117
|
+
prompt += `<|system|>\n${content}<|end|>\n`;
|
|
118
|
+
}
|
|
119
|
+
else if (msg.role === 'user') {
|
|
120
|
+
prompt += `<|user|>\n${content}<|end|>\n`;
|
|
121
|
+
}
|
|
122
|
+
else if (msg.role === 'assistant') {
|
|
123
|
+
prompt += `<|assistant|>\n${content}<|end|>\n`;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
prompt += '<|assistant|>\n';
|
|
127
|
+
return prompt;
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Initialize KV cache tensors for all 32 layers
|
|
131
|
+
* Phi-4 architecture: 32 layers, 8 KV heads, 128 head_dim
|
|
132
|
+
*/
|
|
133
|
+
initializeKVCache(batchSize, sequenceLength) {
|
|
134
|
+
const numLayers = 32;
|
|
135
|
+
const numKVHeads = 8;
|
|
136
|
+
const headDim = 128; // 3072 / 24 = 128
|
|
137
|
+
const kvCache = {};
|
|
138
|
+
// Initialize empty cache for each layer (key and value)
|
|
139
|
+
for (let i = 0; i < numLayers; i++) {
|
|
140
|
+
// Empty cache: [batch_size, num_kv_heads, 0, head_dim]
|
|
141
|
+
const emptyCache = new Float32Array(0);
|
|
142
|
+
kvCache[`past_key_values.${i}.key`] = new ort.Tensor('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
|
|
143
|
+
kvCache[`past_key_values.${i}.value`] = new ort.Tensor('float32', emptyCache, [batchSize, numKVHeads, 0, headDim]);
|
|
144
|
+
}
|
|
145
|
+
return kvCache;
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Chat completion using ONNX with KV cache
|
|
149
|
+
*/
|
|
150
|
+
async chat(params) {
|
|
151
|
+
await this.initializeSession();
|
|
152
|
+
const startTime = Date.now();
|
|
153
|
+
const prompt = this.formatMessages(params.messages);
|
|
154
|
+
try {
|
|
155
|
+
// Tokenize input using optimized tiktoken
|
|
156
|
+
const inputIds = this.encode(prompt);
|
|
157
|
+
console.log(`📝 Input tokens: ${inputIds.length}`);
|
|
158
|
+
// Initialize KV cache (reusable for batch)
|
|
159
|
+
let pastKVCache = this.initializeKVCache(1, 0);
|
|
160
|
+
// Track all generated tokens
|
|
161
|
+
const allTokenIds = [...inputIds];
|
|
162
|
+
const outputIds = [];
|
|
163
|
+
// Pre-allocate tensor buffers for performance
|
|
164
|
+
const maxSeqLen = inputIds.length + (params.maxTokens || this.config.maxTokens);
|
|
165
|
+
// Autoregressive generation loop
|
|
166
|
+
const maxNewTokens = params.maxTokens || this.config.maxTokens;
|
|
167
|
+
for (let step = 0; step < maxNewTokens; step++) {
|
|
168
|
+
// For first step, use all input tokens; for subsequent steps, use only last token
|
|
169
|
+
const currentInputIds = step === 0 ? inputIds : [outputIds[outputIds.length - 1]];
|
|
170
|
+
const currentSeqLen = currentInputIds.length;
|
|
171
|
+
// Create input tensor for current step
|
|
172
|
+
const inputTensor = new ort.Tensor('int64', BigInt64Array.from(currentInputIds.map(BigInt)), [1, currentSeqLen]);
|
|
173
|
+
// Create attention mask for current step
|
|
174
|
+
const totalSeqLen = allTokenIds.length;
|
|
175
|
+
const attentionMask = new ort.Tensor('int64', BigInt64Array.from(Array(totalSeqLen).fill(1n)), [1, totalSeqLen]);
|
|
176
|
+
// Build feeds with input, attention mask, and KV cache
|
|
177
|
+
const feeds = {
|
|
178
|
+
input_ids: inputTensor,
|
|
179
|
+
attention_mask: attentionMask,
|
|
180
|
+
...pastKVCache
|
|
181
|
+
};
|
|
182
|
+
// Run inference
|
|
183
|
+
const results = await this.session.run(feeds);
|
|
184
|
+
// Get logits for next token (last position)
|
|
185
|
+
const logits = results.logits.data;
|
|
186
|
+
const vocabSize = results.logits.dims[results.logits.dims.length - 1];
|
|
187
|
+
// Extract logits for last token
|
|
188
|
+
const lastTokenLogitsOffset = (currentSeqLen - 1) * vocabSize;
|
|
189
|
+
// Apply temperature and get next token
|
|
190
|
+
let nextToken = 0;
|
|
191
|
+
let maxVal = -Infinity;
|
|
192
|
+
for (let i = 0; i < vocabSize; i++) {
|
|
193
|
+
const logit = logits[lastTokenLogitsOffset + i] / (params.temperature || this.config.temperature);
|
|
194
|
+
if (logit > maxVal) {
|
|
195
|
+
maxVal = logit;
|
|
196
|
+
nextToken = i;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
// Add to output
|
|
200
|
+
outputIds.push(nextToken);
|
|
201
|
+
allTokenIds.push(nextToken);
|
|
202
|
+
// Check for end token (2 is typical EOS for Phi models)
|
|
203
|
+
if (nextToken === 2 || nextToken === 0) {
|
|
204
|
+
console.log(`🛑 Stop token detected: ${nextToken}`);
|
|
205
|
+
break;
|
|
206
|
+
}
|
|
207
|
+
// Update KV cache from outputs for next iteration
|
|
208
|
+
pastKVCache = {};
|
|
209
|
+
for (let i = 0; i < 32; i++) {
|
|
210
|
+
pastKVCache[`past_key_values.${i}.key`] = results[`present.${i}.key`];
|
|
211
|
+
pastKVCache[`past_key_values.${i}.value`] = results[`present.${i}.value`];
|
|
212
|
+
}
|
|
213
|
+
// Progress indicator
|
|
214
|
+
if ((step + 1) % 10 === 0) {
|
|
215
|
+
console.log(`🔄 Generated ${step + 1} tokens...`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
// Decode output using optimized tiktoken
|
|
219
|
+
const generatedText = this.decode(outputIds);
|
|
220
|
+
const latency = Date.now() - startTime;
|
|
221
|
+
const tokensPerSecond = (outputIds.length / (latency / 1000)).toFixed(1);
|
|
222
|
+
console.log(`✅ Generated: ${generatedText}`);
|
|
223
|
+
console.log(`⏱️ Latency: ${latency}ms (${tokensPerSecond} tokens/sec)`);
|
|
224
|
+
const content = [{
|
|
225
|
+
type: 'text',
|
|
226
|
+
text: generatedText.trim()
|
|
227
|
+
}];
|
|
228
|
+
return {
|
|
229
|
+
id: `onnx-local-${Date.now()}`,
|
|
230
|
+
model: this.config.modelPath,
|
|
231
|
+
content,
|
|
232
|
+
stopReason: 'end_turn',
|
|
233
|
+
usage: {
|
|
234
|
+
inputTokens: inputIds.length,
|
|
235
|
+
outputTokens: outputIds.length
|
|
236
|
+
},
|
|
237
|
+
metadata: {
|
|
238
|
+
provider: 'onnx-local',
|
|
239
|
+
model: 'Phi-4-mini-instruct-onnx',
|
|
240
|
+
latency,
|
|
241
|
+
cost: 0, // Local inference is free
|
|
242
|
+
executionProviders: this.config.executionProviders,
|
|
243
|
+
tokensPerSecond: parseFloat(tokensPerSecond)
|
|
244
|
+
}
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
catch (error) {
|
|
248
|
+
const providerError = {
|
|
249
|
+
name: 'ONNXInferenceError',
|
|
250
|
+
message: `ONNX inference failed: ${error}`,
|
|
251
|
+
provider: 'onnx-local',
|
|
252
|
+
retryable: true
|
|
253
|
+
};
|
|
254
|
+
throw providerError;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
/**
|
|
258
|
+
* Streaming not implemented (requires complex generation loop)
|
|
259
|
+
*/
|
|
260
|
+
async *stream(params) {
|
|
261
|
+
throw new Error('Streaming not yet implemented for ONNX local inference');
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Validate capabilities
|
|
265
|
+
*/
|
|
266
|
+
validateCapabilities(features) {
|
|
267
|
+
const supported = ['chat'];
|
|
268
|
+
return features.every(f => supported.includes(f));
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Get model info
|
|
272
|
+
*/
|
|
273
|
+
getModelInfo() {
|
|
274
|
+
return {
|
|
275
|
+
modelPath: this.config.modelPath,
|
|
276
|
+
executionProviders: this.config.executionProviders,
|
|
277
|
+
initialized: this.session !== null,
|
|
278
|
+
tokenizerLoaded: this.tiktoken !== null
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Cleanup resources
|
|
283
|
+
*/
|
|
284
|
+
async dispose() {
|
|
285
|
+
if (this.session) {
|
|
286
|
+
// ONNX Runtime sessions don't have explicit disposal in Node.js
|
|
287
|
+
this.session = null;
|
|
288
|
+
}
|
|
289
|
+
if (this.tiktoken) {
|
|
290
|
+
this.tiktoken.free();
|
|
291
|
+
this.tiktoken = null;
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ONNX Runtime Provider for Phi-4 Model
|
|
3
|
+
*
|
|
4
|
+
* Hybrid implementation with fallback to HuggingFace Inference API
|
|
5
|
+
* when local ONNX model is not available
|
|
6
|
+
*/
|
|
7
|
+
import { HfInference } from '@huggingface/inference';
|
|
8
|
+
export class ONNXPhi4Provider {
|
|
9
|
+
name = 'onnx-phi4';
|
|
10
|
+
type = 'custom';
|
|
11
|
+
supportsStreaming = true;
|
|
12
|
+
supportsTools = false;
|
|
13
|
+
supportsMCP = false;
|
|
14
|
+
config;
|
|
15
|
+
hf;
|
|
16
|
+
modelPath = './models/phi-4/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/model.onnx';
|
|
17
|
+
constructor(config = {}) {
|
|
18
|
+
this.config = {
|
|
19
|
+
modelId: config.modelId || 'microsoft/Phi-3-mini-4k-instruct',
|
|
20
|
+
useLocalONNX: config.useLocalONNX ?? false, // Default to API until local model downloaded
|
|
21
|
+
huggingfaceApiKey: config.huggingfaceApiKey || process.env.HUGGINGFACE_API_KEY || '',
|
|
22
|
+
maxTokens: config.maxTokens || 512,
|
|
23
|
+
temperature: config.temperature || 0.7
|
|
24
|
+
};
|
|
25
|
+
this.hf = new HfInference(this.config.huggingfaceApiKey);
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Format messages for Phi-4 chat template
|
|
29
|
+
*/
|
|
30
|
+
formatMessages(messages) {
|
|
31
|
+
let prompt = '';
|
|
32
|
+
for (const msg of messages) {
|
|
33
|
+
const content = typeof msg.content === 'string'
|
|
34
|
+
? msg.content
|
|
35
|
+
: msg.content.map(c => c.type === 'text' ? c.text : '').join('');
|
|
36
|
+
if (msg.role === 'system') {
|
|
37
|
+
prompt += `<|system|>\n${content}<|end|>\n`;
|
|
38
|
+
}
|
|
39
|
+
else if (msg.role === 'user') {
|
|
40
|
+
prompt += `<|user|>\n${content}<|end|>\n`;
|
|
41
|
+
}
|
|
42
|
+
else if (msg.role === 'assistant') {
|
|
43
|
+
prompt += `<|assistant|>\n${content}<|end|>\n`;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
prompt += '<|assistant|>\n';
|
|
47
|
+
return prompt;
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Chat completion via HuggingFace Inference API
|
|
51
|
+
*/
|
|
52
|
+
async chatViaAPI(params) {
|
|
53
|
+
const startTime = Date.now();
|
|
54
|
+
const prompt = this.formatMessages(params.messages);
|
|
55
|
+
try {
|
|
56
|
+
const result = await this.hf.textGeneration({
|
|
57
|
+
model: this.config.modelId,
|
|
58
|
+
inputs: prompt,
|
|
59
|
+
parameters: {
|
|
60
|
+
max_new_tokens: params.maxTokens || this.config.maxTokens,
|
|
61
|
+
temperature: params.temperature || this.config.temperature,
|
|
62
|
+
return_full_text: false,
|
|
63
|
+
do_sample: true,
|
|
64
|
+
top_p: 0.9
|
|
65
|
+
}
|
|
66
|
+
});
|
|
67
|
+
const latency = Date.now() - startTime;
|
|
68
|
+
const generatedText = result.generated_text;
|
|
69
|
+
// Clean up the response
|
|
70
|
+
const assistantResponse = generatedText
|
|
71
|
+
.split('<|end|>')[0]
|
|
72
|
+
.trim();
|
|
73
|
+
// Estimate token counts
|
|
74
|
+
const inputTokens = Math.ceil(prompt.length / 4);
|
|
75
|
+
const outputTokens = Math.ceil(assistantResponse.length / 4);
|
|
76
|
+
const content = [{
|
|
77
|
+
type: 'text',
|
|
78
|
+
text: assistantResponse
|
|
79
|
+
}];
|
|
80
|
+
return {
|
|
81
|
+
id: `onnx-phi4-${Date.now()}`,
|
|
82
|
+
model: this.config.modelId,
|
|
83
|
+
content,
|
|
84
|
+
stopReason: 'end_turn',
|
|
85
|
+
usage: {
|
|
86
|
+
inputTokens,
|
|
87
|
+
outputTokens
|
|
88
|
+
},
|
|
89
|
+
metadata: {
|
|
90
|
+
provider: 'onnx-phi4',
|
|
91
|
+
model: this.config.modelId,
|
|
92
|
+
latency,
|
|
93
|
+
cost: outputTokens * 0.000002, // Rough estimate
|
|
94
|
+
mode: 'api'
|
|
95
|
+
}
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
const providerError = {
|
|
100
|
+
name: 'ONNXPhi4APIError',
|
|
101
|
+
message: `HuggingFace API inference failed: ${error}`,
|
|
102
|
+
provider: 'onnx-phi4',
|
|
103
|
+
retryable: true
|
|
104
|
+
};
|
|
105
|
+
throw providerError;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Chat completion via local ONNX (not yet implemented)
|
|
110
|
+
*/
|
|
111
|
+
async chatViaONNX(params) {
|
|
112
|
+
throw new Error('Local ONNX inference not yet implemented. Download model.onnx.data first.');
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Chat completion (uses API or local ONNX based on config)
|
|
116
|
+
*/
|
|
117
|
+
async chat(params) {
|
|
118
|
+
if (this.config.useLocalONNX) {
|
|
119
|
+
return this.chatViaONNX(params);
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
return this.chatViaAPI(params);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Streaming generation via HuggingFace API
|
|
127
|
+
*/
|
|
128
|
+
async *stream(params) {
|
|
129
|
+
const prompt = this.formatMessages(params.messages);
|
|
130
|
+
try {
|
|
131
|
+
const stream = this.hf.textGenerationStream({
|
|
132
|
+
model: this.config.modelId,
|
|
133
|
+
inputs: prompt,
|
|
134
|
+
parameters: {
|
|
135
|
+
max_new_tokens: params.maxTokens || this.config.maxTokens,
|
|
136
|
+
temperature: params.temperature || this.config.temperature,
|
|
137
|
+
return_full_text: false
|
|
138
|
+
}
|
|
139
|
+
});
|
|
140
|
+
for await (const chunk of stream) {
|
|
141
|
+
if (chunk.token.text) {
|
|
142
|
+
yield {
|
|
143
|
+
type: 'content_block_delta',
|
|
144
|
+
delta: {
|
|
145
|
+
type: 'text_delta',
|
|
146
|
+
text: chunk.token.text
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
yield {
|
|
152
|
+
type: 'message_stop'
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
catch (error) {
|
|
156
|
+
const providerError = {
|
|
157
|
+
name: 'ONNXPhi4StreamError',
|
|
158
|
+
message: `Streaming failed: ${error}`,
|
|
159
|
+
provider: 'onnx-phi4',
|
|
160
|
+
retryable: true
|
|
161
|
+
};
|
|
162
|
+
throw providerError;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
/**
|
|
166
|
+
* Validate capabilities
|
|
167
|
+
*/
|
|
168
|
+
validateCapabilities(features) {
|
|
169
|
+
const supported = ['chat', 'stream'];
|
|
170
|
+
return features.every(f => supported.includes(f));
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Get model info
|
|
174
|
+
*/
|
|
175
|
+
getModelInfo() {
|
|
176
|
+
return {
|
|
177
|
+
modelId: this.config.modelId,
|
|
178
|
+
mode: this.config.useLocalONNX ? 'local-onnx' : 'api',
|
|
179
|
+
supportsLocalInference: false, // Will be true when model.onnx.data downloaded
|
|
180
|
+
modelPath: this.modelPath,
|
|
181
|
+
apiKey: this.config.huggingfaceApiKey ? '***' : undefined
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Switch between API and local ONNX
|
|
186
|
+
*/
|
|
187
|
+
setMode(useLocalONNX) {
|
|
188
|
+
this.config.useLocalONNX = useLocalONNX;
|
|
189
|
+
}
|
|
190
|
+
}
|