agentic-flow 1.9.3 → 1.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -0
- package/dist/cli-proxy.js +19 -1
- package/dist/core/long-running-agent.js +219 -0
- package/dist/core/provider-manager.js +434 -0
- package/dist/examples/use-provider-fallback.js +176 -0
- package/docs/LANDING-PAGE-PROVIDER-CONTENT.md +204 -0
- package/docs/PROVIDER-FALLBACK-GUIDE.md +619 -0
- package/docs/PROVIDER-FALLBACK-SUMMARY.md +418 -0
- package/package.json +1 -1
- package/validation/test-provider-fallback.ts +285 -0
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,58 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.9.4] - 2025-11-06
|
|
9
|
+
|
|
10
|
+
### Added - Enterprise Provider Fallback & Dynamic Switching 🚀
|
|
11
|
+
|
|
12
|
+
**Production-grade provider fallback for long-running agents**
|
|
13
|
+
|
|
14
|
+
#### New Core Classes
|
|
15
|
+
|
|
16
|
+
1. **`ProviderManager`** (src/core/provider-manager.ts)
|
|
17
|
+
- Intelligent multi-provider management with automatic failover
|
|
18
|
+
- 4 fallback strategies: priority, cost-optimized, performance-optimized, round-robin
|
|
19
|
+
- Circuit breaker pattern prevents cascading failures
|
|
20
|
+
- Real-time health monitoring with automatic recovery
|
|
21
|
+
- Exponential/linear retry logic with backoff
|
|
22
|
+
- Per-provider cost tracking and budget controls
|
|
23
|
+
- Performance metrics (latency, success rate, error rate)
|
|
24
|
+
|
|
25
|
+
2. **`LongRunningAgent`** (src/core/long-running-agent.ts)
|
|
26
|
+
- Long-running agent with automatic checkpointing
|
|
27
|
+
- Budget constraints (e.g., max $5 spending)
|
|
28
|
+
- Runtime limits (e.g., max 1 hour execution)
|
|
29
|
+
- Task complexity heuristics (simple → Gemini, complex → Claude)
|
|
30
|
+
- State management and crash recovery
|
|
31
|
+
- Periodic checkpoints every 30 seconds (configurable)
|
|
32
|
+
|
|
33
|
+
#### Key Features
|
|
34
|
+
|
|
35
|
+
- ✅ **Automatic Fallback** - Seamless switching between providers on failure
|
|
36
|
+
- ✅ **Circuit Breaker** - Opens after N failures, auto-recovers after timeout
|
|
37
|
+
- ✅ **Health Monitoring** - Real-time provider health tracking and metrics
|
|
38
|
+
- ✅ **Cost Optimization** - Intelligent provider selection based on cost/performance
|
|
39
|
+
- ✅ **Retry Logic** - Exponential/linear backoff for transient errors (rate limits, timeouts)
|
|
40
|
+
- ✅ **Checkpointing** - Save/restore agent state for crash recovery
|
|
41
|
+
- ✅ **Budget Control** - Hard limits on spending and runtime
|
|
42
|
+
- ✅ **Performance Tracking** - Latency, success rate, token usage metrics
|
|
43
|
+
|
|
44
|
+
#### Production Benefits
|
|
45
|
+
|
|
46
|
+
- **70% cost savings** - Use Gemini for simple tasks vs Claude
|
|
47
|
+
- **100% free option** - ONNX local inference fallback
|
|
48
|
+
- **Zero downtime** - Automatic failover between providers
|
|
49
|
+
- **2-5x faster** - Smart provider selection by task complexity
|
|
50
|
+
- **Self-healing** - Circuit breaker with automatic recovery
|
|
51
|
+
|
|
52
|
+
#### Documentation
|
|
53
|
+
|
|
54
|
+
- **Complete Guide:** `docs/PROVIDER-FALLBACK-GUIDE.md` (400+ lines)
|
|
55
|
+
- **Implementation Summary:** `docs/PROVIDER-FALLBACK-SUMMARY.md`
|
|
56
|
+
- **Working Example:** `src/examples/use-provider-fallback.ts`
|
|
57
|
+
- **Tests:** `validation/test-provider-fallback.ts`
|
|
58
|
+
- **Docker Validated:** `Dockerfile.provider-fallback` ✅
|
|
59
|
+
|
|
8
60
|
## [1.9.3] - 2025-11-06
|
|
9
61
|
|
|
10
62
|
### Fixed - Gemini Provider Now Fully Functional 🎉
|
package/dist/cli-proxy.js
CHANGED
|
@@ -905,7 +905,10 @@ PERFORMANCE:
|
|
|
905
905
|
}
|
|
906
906
|
printHelp() {
|
|
907
907
|
console.log(`
|
|
908
|
-
🤖 Agentic Flow v${VERSION} - AI Agent Orchestration with
|
|
908
|
+
🤖 Agentic Flow v${VERSION} - AI Agent Orchestration with Multi-Provider Support
|
|
909
|
+
|
|
910
|
+
NEW IN v1.9.4: Enterprise provider fallback & dynamic switching for long-running agents
|
|
911
|
+
✅ Automatic failover ✅ Circuit breaker ✅ Cost optimization ✅ Health monitoring
|
|
909
912
|
|
|
910
913
|
USAGE:
|
|
911
914
|
npx agentic-flow [COMMAND] [OPTIONS]
|
|
@@ -995,6 +998,21 @@ OPTIONS:
|
|
|
995
998
|
Example savings: DeepSeek R1 costs 85% less than Claude Sonnet 4.5 with similar quality.
|
|
996
999
|
See docs/agentic-flow/benchmarks/MODEL_CAPABILITIES.md for full comparison.
|
|
997
1000
|
|
|
1001
|
+
PROVIDER FALLBACK (NEW v1.9.4):
|
|
1002
|
+
Enterprise-grade provider fallback for long-running agents with automatic failover,
|
|
1003
|
+
circuit breaker, health monitoring, cost tracking, and crash recovery.
|
|
1004
|
+
|
|
1005
|
+
Features:
|
|
1006
|
+
• Automatic failover between providers (Gemini → Claude → ONNX)
|
|
1007
|
+
• Circuit breaker prevents cascading failures (auto-recovery after timeout)
|
|
1008
|
+
• Real-time health monitoring (success rate, latency, error tracking)
|
|
1009
|
+
• Cost optimization (70% savings using Gemini for simple tasks)
|
|
1010
|
+
• Checkpointing for crash recovery (save/restore agent state)
|
|
1011
|
+
• Budget controls (hard limits on spending and runtime)
|
|
1012
|
+
|
|
1013
|
+
See: docs/PROVIDER-FALLBACK-GUIDE.md for complete documentation
|
|
1014
|
+
Example: src/examples/use-provider-fallback.ts
|
|
1015
|
+
|
|
998
1016
|
EXAMPLES:
|
|
999
1017
|
# MCP Server Management
|
|
1000
1018
|
npx agentic-flow mcp start # Start all MCP servers
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Long-Running Agent with Provider Fallback
|
|
3
|
+
*
|
|
4
|
+
* Demonstrates how to use ProviderManager for resilient, cost-optimized agents
|
|
5
|
+
* that can run for hours or days with automatic provider switching.
|
|
6
|
+
*/
|
|
7
|
+
import { ProviderManager } from './provider-manager.js';
|
|
8
|
+
import { logger } from '../utils/logger.js';
|
|
9
|
+
export class LongRunningAgent {
|
|
10
|
+
providerManager;
|
|
11
|
+
config;
|
|
12
|
+
startTime;
|
|
13
|
+
checkpoints = [];
|
|
14
|
+
currentState = {};
|
|
15
|
+
isRunning = false;
|
|
16
|
+
checkpointInterval;
|
|
17
|
+
constructor(config) {
|
|
18
|
+
this.config = config;
|
|
19
|
+
this.startTime = new Date();
|
|
20
|
+
// Initialize provider manager
|
|
21
|
+
this.providerManager = new ProviderManager(config.providers, config.fallbackStrategy);
|
|
22
|
+
logger.info('Long-running agent initialized', {
|
|
23
|
+
agentName: config.agentName,
|
|
24
|
+
providers: config.providers.map(p => p.name)
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Start the agent with automatic checkpointing
|
|
29
|
+
*/
|
|
30
|
+
async start() {
|
|
31
|
+
this.isRunning = true;
|
|
32
|
+
this.startTime = new Date();
|
|
33
|
+
// Start checkpoint interval
|
|
34
|
+
if (this.config.checkpointInterval) {
|
|
35
|
+
this.checkpointInterval = setInterval(() => {
|
|
36
|
+
this.saveCheckpoint();
|
|
37
|
+
}, this.config.checkpointInterval);
|
|
38
|
+
}
|
|
39
|
+
logger.info('Long-running agent started', {
|
|
40
|
+
agentName: this.config.agentName,
|
|
41
|
+
startTime: this.startTime
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Execute a task with automatic provider fallback
|
|
46
|
+
*/
|
|
47
|
+
async executeTask(task) {
|
|
48
|
+
if (!this.isRunning) {
|
|
49
|
+
throw new Error('Agent not running. Call start() first.');
|
|
50
|
+
}
|
|
51
|
+
// Check budget constraint
|
|
52
|
+
if (this.config.costBudget) {
|
|
53
|
+
const currentCost = this.providerManager.getCostSummary().total;
|
|
54
|
+
if (currentCost >= this.config.costBudget) {
|
|
55
|
+
throw new Error(`Cost budget exceeded: $${currentCost.toFixed(2)} >= $${this.config.costBudget}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
// Check runtime constraint
|
|
59
|
+
if (this.config.maxRuntime) {
|
|
60
|
+
const runtime = Date.now() - this.startTime.getTime();
|
|
61
|
+
if (runtime >= this.config.maxRuntime) {
|
|
62
|
+
throw new Error(`Max runtime exceeded: ${runtime}ms >= ${this.config.maxRuntime}ms`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
logger.info('Executing task', {
|
|
66
|
+
agentName: this.config.agentName,
|
|
67
|
+
taskName: task.name,
|
|
68
|
+
complexity: task.complexity
|
|
69
|
+
});
|
|
70
|
+
try {
|
|
71
|
+
// Execute with automatic fallback
|
|
72
|
+
const { result, provider, attempts } = await this.providerManager.executeWithFallback(task.execute, task.complexity, task.estimatedTokens);
|
|
73
|
+
// Update state
|
|
74
|
+
this.currentState.lastTask = task.name;
|
|
75
|
+
this.currentState.lastProvider = provider;
|
|
76
|
+
this.currentState.completedTasks = (this.currentState.completedTasks || 0) + 1;
|
|
77
|
+
logger.info('Task completed', {
|
|
78
|
+
agentName: this.config.agentName,
|
|
79
|
+
taskName: task.name,
|
|
80
|
+
provider,
|
|
81
|
+
attempts
|
|
82
|
+
});
|
|
83
|
+
return result;
|
|
84
|
+
}
|
|
85
|
+
catch (error) {
|
|
86
|
+
this.currentState.failedTasks = (this.currentState.failedTasks || 0) + 1;
|
|
87
|
+
logger.error('Task failed', {
|
|
88
|
+
agentName: this.config.agentName,
|
|
89
|
+
taskName: task.name,
|
|
90
|
+
error: error.message
|
|
91
|
+
});
|
|
92
|
+
throw error;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Save checkpoint of current state
|
|
97
|
+
*/
|
|
98
|
+
saveCheckpoint() {
|
|
99
|
+
const costSummary = this.providerManager.getCostSummary();
|
|
100
|
+
const health = this.providerManager.getHealth();
|
|
101
|
+
const checkpoint = {
|
|
102
|
+
timestamp: new Date(),
|
|
103
|
+
taskProgress: this.calculateProgress(),
|
|
104
|
+
currentProvider: this.currentState.lastProvider || 'none',
|
|
105
|
+
totalCost: costSummary.total,
|
|
106
|
+
totalTokens: costSummary.totalTokens,
|
|
107
|
+
completedTasks: this.currentState.completedTasks || 0,
|
|
108
|
+
failedTasks: this.currentState.failedTasks || 0,
|
|
109
|
+
state: { ...this.currentState }
|
|
110
|
+
};
|
|
111
|
+
this.checkpoints.push(checkpoint);
|
|
112
|
+
logger.info('Checkpoint saved', {
|
|
113
|
+
agentName: this.config.agentName,
|
|
114
|
+
checkpoint: {
|
|
115
|
+
...checkpoint,
|
|
116
|
+
state: undefined // Don't log full state
|
|
117
|
+
}
|
|
118
|
+
});
|
|
119
|
+
// Alert if cost approaching budget
|
|
120
|
+
if (this.config.costBudget) {
|
|
121
|
+
const costPercentage = (costSummary.total / this.config.costBudget) * 100;
|
|
122
|
+
if (costPercentage >= 80) {
|
|
123
|
+
logger.warn('Cost budget warning', {
|
|
124
|
+
agentName: this.config.agentName,
|
|
125
|
+
currentCost: costSummary.total,
|
|
126
|
+
budget: this.config.costBudget,
|
|
127
|
+
percentage: costPercentage.toFixed(1) + '%'
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
// Alert if providers unhealthy
|
|
132
|
+
const unhealthyProviders = health.filter(h => !h.isHealthy || h.circuitBreakerOpen);
|
|
133
|
+
if (unhealthyProviders.length > 0) {
|
|
134
|
+
logger.warn('Unhealthy providers detected', {
|
|
135
|
+
agentName: this.config.agentName,
|
|
136
|
+
unhealthy: unhealthyProviders.map(h => ({
|
|
137
|
+
provider: h.provider,
|
|
138
|
+
circuitBreakerOpen: h.circuitBreakerOpen,
|
|
139
|
+
consecutiveFailures: h.consecutiveFailures
|
|
140
|
+
}))
|
|
141
|
+
});
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Calculate task progress (override in subclass)
|
|
146
|
+
*/
|
|
147
|
+
calculateProgress() {
|
|
148
|
+
// Default: based on completed vs total tasks
|
|
149
|
+
const completed = this.currentState.completedTasks || 0;
|
|
150
|
+
const failed = this.currentState.failedTasks || 0;
|
|
151
|
+
const total = completed + failed;
|
|
152
|
+
return total > 0 ? completed / total : 0;
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* Get current status
|
|
156
|
+
*/
|
|
157
|
+
getStatus() {
|
|
158
|
+
const costSummary = this.providerManager.getCostSummary();
|
|
159
|
+
const health = this.providerManager.getHealth();
|
|
160
|
+
const runtime = Date.now() - this.startTime.getTime();
|
|
161
|
+
return {
|
|
162
|
+
isRunning: this.isRunning,
|
|
163
|
+
runtime,
|
|
164
|
+
completedTasks: this.currentState.completedTasks || 0,
|
|
165
|
+
failedTasks: this.currentState.failedTasks || 0,
|
|
166
|
+
totalCost: costSummary.total,
|
|
167
|
+
totalTokens: costSummary.totalTokens,
|
|
168
|
+
providers: health.map(h => ({
|
|
169
|
+
name: h.provider,
|
|
170
|
+
healthy: h.isHealthy,
|
|
171
|
+
circuitBreakerOpen: h.circuitBreakerOpen,
|
|
172
|
+
successRate: (h.successRate * 100).toFixed(1) + '%',
|
|
173
|
+
avgLatency: h.averageLatency.toFixed(0) + 'ms'
|
|
174
|
+
})),
|
|
175
|
+
lastCheckpoint: this.checkpoints[this.checkpoints.length - 1]
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Get detailed metrics
|
|
180
|
+
*/
|
|
181
|
+
getMetrics() {
|
|
182
|
+
return {
|
|
183
|
+
providers: this.providerManager.getMetrics(),
|
|
184
|
+
health: this.providerManager.getHealth(),
|
|
185
|
+
costs: this.providerManager.getCostSummary(),
|
|
186
|
+
checkpoints: this.checkpoints
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Restore from checkpoint
|
|
191
|
+
*/
|
|
192
|
+
restoreFromCheckpoint(checkpoint) {
|
|
193
|
+
this.currentState = { ...checkpoint.state };
|
|
194
|
+
logger.info('Restored from checkpoint', {
|
|
195
|
+
agentName: this.config.agentName,
|
|
196
|
+
checkpoint: checkpoint.timestamp
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
/**
|
|
200
|
+
* Stop the agent
|
|
201
|
+
*/
|
|
202
|
+
async stop() {
|
|
203
|
+
this.isRunning = false;
|
|
204
|
+
// Clear checkpoint interval
|
|
205
|
+
if (this.checkpointInterval) {
|
|
206
|
+
clearInterval(this.checkpointInterval);
|
|
207
|
+
}
|
|
208
|
+
// Save final checkpoint
|
|
209
|
+
this.saveCheckpoint();
|
|
210
|
+
// Cleanup provider manager
|
|
211
|
+
this.providerManager.destroy();
|
|
212
|
+
logger.info('Long-running agent stopped', {
|
|
213
|
+
agentName: this.config.agentName,
|
|
214
|
+
runtime: Date.now() - this.startTime.getTime(),
|
|
215
|
+
completedTasks: this.currentState.completedTasks,
|
|
216
|
+
failedTasks: this.currentState.failedTasks
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
}
|