npm - agentic-flow - Versions diffs - 1.1.14 → 1.2.1 - Mend

agentic-flow 1.1.14 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/.claude/agents/custom/test-long-runner.md +44 -0
package/README.md +50 -1
package/dist/agents/claudeAgent.js +31 -0
package/dist/cli/mcp-manager.js +474 -0
package/dist/cli-proxy.js +22 -1
package/dist/utils/.claude-flow/metrics/agent-metrics.json +1 -0
package/dist/utils/.claude-flow/metrics/performance.json +9 -0
package/dist/utils/.claude-flow/metrics/task-metrics.json +10 -0
package/dist/utils/cli.js +9 -1
package/dist/utils/modelOptimizer.js +18 -2
package/docs/.claude-flow/metrics/performance.json +1 -1
package/docs/.claude-flow/metrics/task-metrics.json +3 -3
package/docs/INDEX.md +44 -7
package/docs/archived/RELEASE-SUMMARY-v1.1.14-beta.1.md +336 -0
package/docs/archived/V1.1.14-BETA-READY.md +418 -0
package/docs/guides/ADDING-MCP-SERVERS-CLI.md +515 -0
package/docs/guides/ADDING-MCP-SERVERS.md +642 -0
package/docs/mcp-validation/IMPLEMENTATION-SUMMARY.md +493 -0
package/docs/mcp-validation/MCP-CLI-VALIDATION-REPORT.md +322 -0
package/docs/mcp-validation/README.md +43 -0
package/docs/mcp-validation/strange-loops-test.md +63 -0
package/docs/releases/HOTFIX-v1.2.1.md +315 -0
package/docs/releases/NPM-PUBLISH-GUIDE-v1.2.0.md +440 -0
package/docs/releases/PUBLISH-COMPLETE-v1.2.0.md +308 -0
package/docs/releases/README.md +18 -0
package/docs/releases/RELEASE-v1.2.0.md +339 -0
package/docs/testing/AGENT-SYSTEM-VALIDATION.md +517 -0
package/docs/testing/FINAL-TESTING-SUMMARY.md +362 -0
package/docs/testing/README.md +46 -0
package/docs/testing/REGRESSION-TEST-RESULTS.md +269 -0
package/docs/testing/STREAMING-AND-MCP-VALIDATION.md +517 -0
package/package.json +2 -2

package/dist/utils/cli.js CHANGED Viewed

@@ -16,8 +16,16 @@ export function parseArgs() {
     }
     // Check for MCP command
     if (args[0] === 'mcp') {
+        const mcpSubcommand = args[1];
+        // MCP Manager commands (CLI configuration)
+        const managerCommands = ['add', 'list', 'remove', 'enable', 'disable', 'update', 'test', 'info', 'export', 'import'];
+        if (managerCommands.includes(mcpSubcommand)) {
+            options.mode = 'mcp-manager';
+            return options;
+        }
+        // MCP Server commands (start/stop server)
         options.mode = 'mcp';
-        options.mcpCommand = args[1] || 'start'; // default to start
+        options.mcpCommand = mcpSubcommand || 'start'; // default to start
         options.mcpServer = args[2] || 'all'; // default to all servers
         return options;
     }

package/dist/utils/modelOptimizer.js CHANGED Viewed

@@ -16,6 +16,7 @@ const MODEL_DATABASE = {
         speed_score: 85,
         cost_score: 20,
         tier: 'flagship',
+        supports_tools: true,
         strengths: ['reasoning', 'coding', 'analysis', 'production'],
         weaknesses: ['cost'],
         bestFor: ['coder', 'reviewer', 'architecture', 'planner', 'production-validator']
@@ -30,6 +31,7 @@ const MODEL_DATABASE = {
         speed_score: 90,
         cost_score: 30,
         tier: 'flagship',
+        supports_tools: true,
         strengths: ['multimodal', 'speed', 'general-purpose', 'vision'],
         weaknesses: ['cost'],
         bestFor: ['researcher', 'analyst', 'multimodal-tasks']
@@ -44,6 +46,7 @@ const MODEL_DATABASE = {
         speed_score: 75,
         cost_score: 50,
         tier: 'flagship',
+        supports_tools: true,
         strengths: ['reasoning', 'large-context', 'math', 'analysis'],
         weaknesses: ['speed'],
         bestFor: ['planner', 'architecture', 'researcher', 'code-analyzer']
@@ -59,8 +62,9 @@ const MODEL_DATABASE = {
         speed_score: 80,
         cost_score: 100,
         tier: 'cost-effective',
+        supports_tools: false, // DeepSeek R1 does NOT support tool/function calling
         strengths: ['reasoning', 'coding', 'math', 'value', 'free'],
-        weaknesses: ['newer-model'],
+        weaknesses: ['newer-model', 'no-tool-use'],
         bestFor: ['coder', 'pseudocode', 'specification', 'refinement', 'tester']
     },
     'deepseek-chat-v3': {
@@ -73,6 +77,7 @@ const MODEL_DATABASE = {
         speed_score: 90,
         cost_score: 100,
         tier: 'cost-effective',
+        supports_tools: true,
         strengths: ['cost', 'speed', 'coding', 'development', 'free'],
         weaknesses: ['complex-reasoning'],
         bestFor: ['coder', 'reviewer', 'tester', 'backend-dev', 'cicd-engineer']
@@ -88,6 +93,7 @@ const MODEL_DATABASE = {
         speed_score: 98,
         cost_score: 98,
         tier: 'balanced',
+        supports_tools: true,
         strengths: ['speed', 'cost', 'interactive'],
         weaknesses: ['quality'],
         bestFor: ['researcher', 'planner', 'smart-agent']
@@ -102,6 +108,7 @@ const MODEL_DATABASE = {
         speed_score: 95,
         cost_score: 100,
         tier: 'balanced',
+        supports_tools: true,
         strengths: ['open-source', 'versatile', 'coding', 'free', 'fast'],
         weaknesses: ['smaller-model'],
         bestFor: ['coder', 'reviewer', 'base-template-generator', 'tester']
@@ -116,6 +123,7 @@ const MODEL_DATABASE = {
         speed_score: 85,
         cost_score: 90,
         tier: 'balanced',
+        supports_tools: true,
         strengths: ['multilingual', 'coding', 'reasoning'],
         weaknesses: ['english-optimized'],
         bestFor: ['researcher', 'coder', 'multilingual-tasks']
@@ -131,6 +139,7 @@ const MODEL_DATABASE = {
         speed_score: 95,
         cost_score: 99,
         tier: 'budget',
+        supports_tools: true,
         strengths: ['ultra-low-cost', 'speed'],
         weaknesses: ['quality', 'complex-tasks'],
         bestFor: ['simple-tasks', 'testing']
@@ -146,6 +155,7 @@ const MODEL_DATABASE = {
         speed_score: 30,
         cost_score: 100,
         tier: 'local',
+        supports_tools: false,
         strengths: ['privacy', 'offline', 'zero-cost'],
         weaknesses: ['quality', 'speed'],
         bestFor: ['privacy-tasks', 'offline-tasks']
@@ -197,8 +207,14 @@ export class ModelOptimizer {
         const taskComplexity = criteria.taskComplexity || this.inferComplexity(criteria.task);
         // Set default priority to balanced if not specified
         const priority = criteria.priority || 'balanced';
+        // Filter models that support tools if required
+        let availableModels = Object.entries(MODEL_DATABASE);
+        if (criteria.requiresTools) {
+            availableModels = availableModels.filter(([key, model]) => model.supports_tools !== false);
+            logger.info(`Filtered to ${availableModels.length} models with tool support`);
+        }
         // Score all models
-        const scoredModels = Object.entries(MODEL_DATABASE).map(([key, model]) => {
+        const scoredModels = availableModels.map(([key, model]) => {
             // Calculate overall score based on priority
             let overall_score;
             switch (priority) {

package/docs/.claude-flow/metrics/performance.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "startTime": 1759680310468,
+  "startTime": 1759762593440,
   "totalTasks": 1,
   "successfulTasks": 1,
   "failedTasks": 0,

package/docs/.claude-flow/metrics/task-metrics.json CHANGED Viewed

@@ -1,10 +1,10 @@
 [
   {
-    "id": "cmd-hooks-1759680310618",
+    "id": "cmd-hooks-1759762593563",
     "type": "hooks",
     "success": true,
-    "duration": 8.211029999999994,
-    "timestamp": 1759680310627,
+    "duration": 24.05694200000005,
+    "timestamp": 1759762593587,
     "metadata": {}
   }
 ]

package/docs/INDEX.md CHANGED Viewed

@@ -54,10 +54,35 @@ Multi-model router configuration and usage.
 - [Router Config Reference](router/ROUTER_CONFIG_REFERENCE.md) - Configuration options
 - [Top 20 Models Matrix](router/TOP20_MODELS_MATRIX.md) - Model comparison guide
-### ✅ [Validation & Testing](validation/)
-Test results and quality assurance reports.
+### ✅ [Testing & Validation](testing/)
+Current test results, validation reports, and quality assurance.
-- [Validation README](validation/README.md) - Overview and archived reports
+- [Testing Overview](testing/README.md) - Current testing documentation
+- [Agent System Validation](testing/AGENT-SYSTEM-VALIDATION.md) - Multi-agent testing
+- [Final Testing Summary](testing/FINAL-TESTING-SUMMARY.md) - Comprehensive coverage
+- [Regression Test Results](testing/REGRESSION-TEST-RESULTS.md) - Regression testing
+- [Streaming & MCP Validation](testing/STREAMING-AND-MCP-VALIDATION.md) - Integration tests
+### 🔍 [MCP Validation](mcp-validation/)
+Model Context Protocol implementation and validation.
+- [MCP Validation Overview](mcp-validation/README.md) - MCP testing documentation
+- [Implementation Summary](mcp-validation/IMPLEMENTATION-SUMMARY.md) - MCP implementation
+- [CLI Validation Report](mcp-validation/MCP-CLI-VALIDATION-REPORT.md) - CLI tool testing
+- [Strange Loops Test](mcp-validation/strange-loops-test.md) - Advanced patterns
+### 📦 [Releases](releases/)
+Version-specific release notes and publishing documentation.
+- [Release Overview](releases/README.md) - Release documentation index
+- [v1.2.0 Release](releases/RELEASE-v1.2.0.md) - Latest stable release
+- [v1.2.0 Publishing Guide](releases/NPM-PUBLISH-GUIDE-v1.2.0.md) - Publishing process
+- [v1.2.1 Hotfix](releases/HOTFIX-v1.2.1.md) - Critical fixes
+### 🗄️ [Validation Archive](validation/)
+Historical validation reports and test archives.
+- [Validation Archive](validation/README.md) - Archived test reports
 ### 📦 [Archived](archived/)
 Historical documentation, completed implementations, and validation reports.
@@ -103,13 +128,15 @@ Historical documentation, completed implementations, and validation reports.
 ### Path 2: Developers (1.5 hours)
 1. [Architecture Overview](architecture/EXECUTIVE_SUMMARY.md) - System design (20 min)
 2. [Implementation Examples](guides/IMPLEMENTATION_EXAMPLES.md) - Code patterns (40 min)
-3. [Integration Guides](integrations/) - External services (30 min)
+3. [Integration Guides](integrations/) - External services (20 min)
+4. [Testing Documentation](testing/) - Quality assurance (10 min)
 ### Path 3: System Architects (2 hours)
 1. [Research Summary](architecture/RESEARCH_SUMMARY.md) - Technical findings (30 min)
 2. [Multi-Model Router Plan](architecture/MULTI_MODEL_ROUTER_PLAN.md) - Router architecture (45 min)
-3. [Integration Status](architecture/INTEGRATION-STATUS.md) - Current state (30 min)
+3. [Integration Status](architecture/INTEGRATION-STATUS.md) - Current state (20 min)
 4. [Router Documentation](router/) - Configuration and usage (15 min)
+5. [MCP Validation](mcp-validation/) - Protocol implementation (10 min)
 ---
@@ -192,5 +219,15 @@ Historical reports, completed implementations, and superseded guides are in the
 ---
-**Documentation Status**: ✅ Organized and up-to-date
-**Last Updated**: October 5, 2025
+**Documentation Status**: ✅ Reorganized and up-to-date
+**Last Updated**: October 6, 2025
+## 📋 Recent Documentation Updates
+**v2.0 Reorganization (Oct 6, 2025)**:
+- Created dedicated `releases/` directory for version-specific documentation
+- Consolidated testing reports into `testing/` directory
+- Separated MCP validation into dedicated `mcp-validation/` section
+- Added comprehensive READMEs to all major sections
+- Archived historical v1.1.x releases for cleaner navigation
+- Improved documentation index with better categorization

package/docs/archived/RELEASE-SUMMARY-v1.1.14-beta.1.md ADDED Viewed

@@ -0,0 +1,336 @@
+# v1.1.14-beta.1 Release Summary
+**Date:** 2025-10-05
+**Status:** ✅ **PUBLISHED & LIVE**
+**NPM:** https://www.npmjs.com/package/agentic-flow/v/1.1.14-beta.1
+**GitHub:** https://github.com/ruvnet/agentic-flow/releases/tag/v1.1.14-beta.1
+---
+## 🎉 Major Achievement
+**Fixed critical OpenRouter proxy bug that was causing 100% failure rate!**
+From **0% success** → **80% success** (8 out of 10 models working)
+---
+## Installation & Usage
+### Install Beta Version
+```bash
+# Install globally
+npm install -g agentic-flow@beta
+# Or use with npx (no installation needed)
+npx agentic-flow@beta --help
+```
+### Quick Start
+```bash
+# List available agents
+npx agentic-flow@beta --list
+# Run with Anthropic (default)
+npx agentic-flow@beta --agent coder --task "Write Python hello world"
+# Run with OpenRouter (99% cost savings!)
+npx agentic-flow@beta --agent coder --task "Write Python hello world" \
+  --provider openrouter --model "openai/gpt-4o-mini"
+# Run with Grok 4 Fast (FREE!)
+npx agentic-flow@beta --agent coder --task "Write Python hello world" \
+  --provider openrouter --model "x-ai/grok-4-fast"
+```
+---
+## ✅ Verified Working
+### NPX Command
+```bash
+$ npx agentic-flow@beta --version
+agentic-flow v1.1.14-beta.1
+$ npx agentic-flow@beta --help
+🤖 Agentic Flow v1.1.14-beta.1 - AI Agent Orchestration with OpenRouter Support
+[Full help output shown]
+$ npx agentic-flow@beta --agent coder --task "hello world" --provider anthropic
+✅ Completed! [Working perfectly]
+```
+### OpenRouter Models (8/10 = 80%)
+| Model | Status | Time | Cost/M Tokens | Use Case |
+|-------|--------|------|---------------|----------|
+| **openai/gpt-4o-mini** | ✅ | 7s | $0.15 | Best value |
+| **openai/gpt-3.5-turbo** | ✅ | 5s | $0.50 | Fastest |
+| **meta-llama/llama-3.1-8b-instruct** | ✅ | 14s | $0.06 | Open source |
+| **anthropic/claude-3.5-sonnet** | ✅ | 11s | $3.00 | Highest quality |
+| **mistralai/mistral-7b-instruct** | ✅ | 6s | $0.25 | Fast & efficient |
+| **google/gemini-2.0-flash-exp** | ✅ | 6s | Free | Free tier |
+| **x-ai/grok-4-fast** | ✅ | 8s | Free | #1 most popular! |
+| **z-ai/glm-4.6** | ✅ | 5s | Varies | Fixed in this release |
+### Known Issues (2/10)
+| Model | Issue | Workaround |
+|-------|-------|------------|
+| **meta-llama/llama-3.3-70b-instruct** | Intermittent timeout | Use llama-3.1-8b instead |
+| **x-ai/grok-4** | Too slow (60s+) | Use grok-4-fast instead |
+---
+## 💰 Cost Savings
+### Comparison vs Claude Direct API
+| Model | Cost | vs Claude ($3/M) | Savings |
+|-------|------|------------------|---------|
+| GPT-4o-mini | $0.15/M | $2.85 | **95%** |
+| Llama 3.1 8B | $0.06/M | $2.94 | **98%** |
+| Mistral 7B | $0.25/M | $2.75 | **92%** |
+| GPT-3.5-turbo | $0.50/M | $2.50 | **83%** |
+| Grok 4 Fast | Free | $3.00 | **100%** |
+| Gemini 2.0 Flash | Free | $3.00 | **100%** |
+**Average savings: ~94% across all working models**
+---
+## 🔧 What Was Fixed
+### Critical Bug
+**TypeError: anthropicReq.system?.substring is not a function**
+**Root Cause:**
+- Anthropic API allows `system` field to be string OR array of content blocks
+- Claude Agent SDK sends it as array (for prompt caching)
+- Proxy assumed string only → called `.substring()` on array → crash
+- Result: 100% failure rate
+**Solution:**
+```typescript
+// Before (BROKEN)
+interface AnthropicRequest {
+  system?: string;
+}
+// After (FIXED)
+interface AnthropicRequest {
+  system?: string | Array<{ type: string; text?: string; [key: string]: any }>;
+}
+// Safe extraction logic
+if (typeof anthropicReq.system === 'string') {
+  originalSystem = anthropicReq.system;
+} else if (Array.isArray(anthropicReq.system)) {
+  originalSystem = anthropicReq.system
+    .filter(block => block.type === 'text' && block.text)
+    .map(block => block.text)
+    .join('\n');
+}
+```
+---
+## 📊 Testing Results
+### Regression Tests
+- ✅ Anthropic Direct: No regressions
+- ✅ Google Gemini: No regressions
+- ✅ OpenRouter: Fixed from 0% → 80%
+### MCP Tools
+- ✅ All 15 tools working through OpenRouter proxy
+- ✅ File operations validated (Write, Read, Bash)
+- ✅ Tool format conversion working (Anthropic ↔ OpenAI)
+### Performance
+- GPT-3.5-turbo: 5s (fastest)
+- Mistral 7B: 6s
+- Gemini 2.0 Flash: 6s
+- GPT-4o-mini: 7s
+- Grok 4 Fast: 8s
+- Claude 3.5 Sonnet: 11s
+- Llama 3.1 8B: 14s
+---
+## 📖 Documentation
+### Technical Details
+- [OPENROUTER-FIX-VALIDATION.md](docs/archived/OPENROUTER-FIX-VALIDATION.md) - Technical validation
+- [OPENROUTER-SUCCESS-REPORT.md](docs/archived/OPENROUTER-SUCCESS-REPORT.md) - Comprehensive report
+- [FINAL-TESTING-SUMMARY.md](FINAL-TESTING-SUMMARY.md) - Complete testing summary
+- [REGRESSION-TEST-RESULTS.md](REGRESSION-TEST-RESULTS.md) - Regression validation
+- [V1.1.14-BETA-READY.md](V1.1.14-BETA-READY.md) - Beta readiness assessment
+### Quick Reference
+- **66+ specialized agents** available
+- **111 MCP tools** for coordination
+- **4 providers:** Anthropic, OpenRouter, Gemini, ONNX
+- **400+ models** via OpenRouter
+- **Zero breaking changes** - fully backward compatible
+---
+## 🚀 Example Usage
+### Basic Code Generation
+```bash
+# With Anthropic (highest quality)
+npx agentic-flow@beta --agent coder --task "Create REST API with Express"
+# With OpenRouter GPT-4o-mini (best value)
+npx agentic-flow@beta --agent coder --task "Create REST API with Express" \
+  --provider openrouter --model "openai/gpt-4o-mini"
+# With Grok 4 Fast (free!)
+npx agentic-flow@beta --agent coder --task "Create REST API with Express" \
+  --provider openrouter --model "x-ai/grok-4-fast"
+```
+### Multi-Agent Workflows
+```bash
+# Research task with cheaper model
+npx agentic-flow@beta --agent researcher \
+  --task "Research best practices for microservices" \
+  --provider openrouter --model "openai/gpt-3.5-turbo"
+# Code review with high-quality model
+npx agentic-flow@beta --agent reviewer \
+  --task "Review my authentication code" \
+  --provider openrouter --model "anthropic/claude-3.5-sonnet"
+# Testing with fast model
+npx agentic-flow@beta --agent tester \
+  --task "Create Jest tests for my API" \
+  --provider openrouter --model "mistralai/mistral-7b-instruct"
+```
+### Configuration
+```bash
+# Interactive wizard
+npx agentic-flow@beta config
+# Set OpenRouter API key
+npx agentic-flow@beta config set OPENROUTER_API_KEY "sk-or-..."
+# List configuration
+npx agentic-flow@beta config list
+```
+---
+## 🐛 Reporting Issues
+This is a **beta release** - please test and report any issues:
+**GitHub Issues:** https://github.com/ruvnet/agentic-flow/issues
+When reporting, please include:
+- Model being used
+- Task description
+- Error message (if any)
+- Output received
+- Expected behavior
+---
+## 🔄 Upgrade Path
+### From v1.1.13 → v1.1.14-beta.1
+**Changes:**
+- OpenRouter proxy now functional (was 100% broken)
+- No breaking changes to API
+- All existing code continues to work
+- New: 8 OpenRouter models now available
+**Migration:**
+```bash
+# Update to beta
+npm install agentic-flow@beta
+# Or use npx (always gets latest)
+npx agentic-flow@beta [commands]
+```
+**Rollback if needed:**
+```bash
+npm install agentic-flow@1.1.13
+```
+---
+## 🎯 Next Steps
+### Before Stable Release (v1.1.14)
+1. ⏳ User beta testing feedback
+2. ⏳ Test DeepSeek models with proper API keys
+3. ⏳ Debug Llama 3.3 70B timeout issue
+4. ⏳ Test streaming responses
+5. ⏳ Performance benchmarking
+6. ⏳ Additional model validation
+### Future Enhancements (v1.2.0)
+1. Auto-detect best model for task
+2. Automatic failover between models
+3. Model capability detection
+4. Streaming response support
+5. Cost optimization features
+6. Performance metrics dashboard
+---
+## 📈 Success Metrics
+### Before v1.1.14-beta.1
+- OpenRouter success rate: **0%** (100% failure)
+- Working models: 0
+- Cost savings: Not available
+- User complaints: High
+### After v1.1.14-beta.1
+- OpenRouter success rate: **80%** (8/10 working)
+- Working models: 8
+- Cost savings: Up to **99%**
+- MCP tools: All 15 working
+- Most popular model: ✅ Working (Grok 4 Fast)
+---
+## ✅ Release Checklist
+- [x] Core bug fixed (anthropicReq.system)
+- [x] 10 models tested (8 working)
+- [x] Popular models validated (Grok 4 Fast)
+- [x] MCP tools working (all 15)
+- [x] File operations confirmed
+- [x] No regressions in baseline providers
+- [x] Documentation complete
+- [x] Changelog updated
+- [x] Package version updated
+- [x] TypeScript build successful
+- [x] Git tag created
+- [x] NPM published with beta tag
+- [x] GitHub release created
+- [x] npx command verified
+- [x] User communication prepared
+---
+## 🙏 Credits
+**Debugging time:** ~4 hours
+**Lines changed:** ~50
+**Models tested:** 10
+**Success rate:** 80%
+**Impact:** Unlocked 400+ models via OpenRouter
+**Built with:** [Claude Code](https://claude.com/claude-code)
+---
+**Ready for production after beta testing!** 🚀