@azumag/opencode-rate-limit-fallback 1.31.0 → 1.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +134 -47
- package/dist/index.d.ts +2 -1
- package/dist/index.js +58 -7
- package/dist/src/circuitbreaker/CircuitBreaker.d.ts +60 -0
- package/dist/src/circuitbreaker/CircuitBreaker.js +218 -0
- package/dist/src/circuitbreaker/CircuitState.d.ts +44 -0
- package/dist/src/circuitbreaker/CircuitState.js +128 -0
- package/dist/src/circuitbreaker/index.d.ts +8 -0
- package/dist/src/circuitbreaker/index.js +8 -0
- package/dist/src/config/Validator.d.ts +64 -0
- package/dist/src/config/Validator.js +618 -0
- package/dist/src/diagnostics/Reporter.d.ts +128 -0
- package/dist/src/diagnostics/Reporter.js +285 -0
- package/dist/src/errors/PatternRegistry.d.ts +75 -0
- package/dist/src/errors/PatternRegistry.js +234 -0
- package/dist/src/fallback/FallbackHandler.d.ts +4 -1
- package/dist/src/fallback/FallbackHandler.js +41 -2
- package/dist/src/fallback/ModelSelector.d.ts +9 -1
- package/dist/src/fallback/ModelSelector.js +33 -4
- package/dist/src/health/HealthTracker.d.ts +96 -0
- package/dist/src/health/HealthTracker.js +353 -0
- package/dist/src/metrics/MetricsManager.d.ts +10 -1
- package/dist/src/metrics/MetricsManager.js +137 -0
- package/dist/src/types/index.d.ts +98 -0
- package/dist/src/types/index.js +10 -0
- package/dist/src/utils/config.d.ts +8 -1
- package/dist/src/utils/config.js +26 -11
- package/package.json +1 -1
- package/dist/src/utils/errorDetection.d.ts +0 -7
- package/dist/src/utils/errorDetection.js +0 -34
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Health Tracker
|
|
3
|
+
* Tracks model success rates and response times for health-based selection
|
|
4
|
+
*/
|
|
5
|
+
import { getModelKey } from '../utils/helpers.js';
|
|
6
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
|
|
7
|
+
import { join, dirname } from 'path';
|
|
8
|
+
import { homedir } from 'os';
|
|
9
|
+
/**
|
|
10
|
+
* Default health persistence path
|
|
11
|
+
*/
|
|
12
|
+
const DEFAULT_HEALTH_PERSISTENCE_PATH = join(homedir(), '.opencode', 'rate-limit-fallback-health.json');
|
|
13
|
+
/**
|
|
14
|
+
* Minimum requests before health score is considered reliable
|
|
15
|
+
*/
|
|
16
|
+
const MIN_REQUESTS_FOR_RELIABLE_SCORE = 3;
|
|
17
|
+
/**
|
|
18
|
+
* Default health configuration
|
|
19
|
+
*/
|
|
20
|
+
const DEFAULT_HEALTH_CONFIG = {
|
|
21
|
+
enabled: true,
|
|
22
|
+
path: DEFAULT_HEALTH_PERSISTENCE_PATH,
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Model Health Tracker class
|
|
26
|
+
*/
|
|
27
|
+
export class HealthTracker {
|
|
28
|
+
healthData;
|
|
29
|
+
persistenceEnabled;
|
|
30
|
+
persistencePath;
|
|
31
|
+
healthBasedSelectionEnabled;
|
|
32
|
+
logger;
|
|
33
|
+
savePending;
|
|
34
|
+
saveTimeout;
|
|
35
|
+
// Configurable thresholds
|
|
36
|
+
responseTimeThreshold;
|
|
37
|
+
responseTimePenaltyDivisor;
|
|
38
|
+
failurePenaltyMultiplier;
|
|
39
|
+
persistenceDebounceMs;
|
|
40
|
+
constructor(config, logger) {
|
|
41
|
+
this.healthData = new Map();
|
|
42
|
+
// Parse health persistence config
|
|
43
|
+
const healthPersistence = config.healthPersistence || DEFAULT_HEALTH_CONFIG;
|
|
44
|
+
this.persistenceEnabled = healthPersistence.enabled !== false;
|
|
45
|
+
this.persistencePath = healthPersistence.path || DEFAULT_HEALTH_PERSISTENCE_PATH;
|
|
46
|
+
this.healthBasedSelectionEnabled = config.enableHealthBasedSelection || false;
|
|
47
|
+
// Initialize logger
|
|
48
|
+
this.logger = logger;
|
|
49
|
+
// Initialize save state
|
|
50
|
+
this.savePending = false;
|
|
51
|
+
// Initialize configurable thresholds (can be customized via config if needed)
|
|
52
|
+
this.responseTimeThreshold = 2000; // ms - threshold for response time penalty
|
|
53
|
+
this.responseTimePenaltyDivisor = 200; // divisor for response time penalty calculation
|
|
54
|
+
this.failurePenaltyMultiplier = 15; // penalty per consecutive failure
|
|
55
|
+
this.persistenceDebounceMs = 30000; // 30 seconds debounce for persistence
|
|
56
|
+
// Load existing state
|
|
57
|
+
if (this.persistenceEnabled) {
|
|
58
|
+
this.loadState();
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Record a successful request for a model
|
|
63
|
+
*/
|
|
64
|
+
recordSuccess(providerID, modelID, responseTime) {
|
|
65
|
+
const key = getModelKey(providerID, modelID);
|
|
66
|
+
const now = Date.now();
|
|
67
|
+
let health = this.healthData.get(key);
|
|
68
|
+
if (!health) {
|
|
69
|
+
// Initialize new health entry
|
|
70
|
+
health = {
|
|
71
|
+
modelKey: key,
|
|
72
|
+
providerID,
|
|
73
|
+
modelID,
|
|
74
|
+
totalRequests: 0,
|
|
75
|
+
successfulRequests: 0,
|
|
76
|
+
failedRequests: 0,
|
|
77
|
+
consecutiveFailures: 0,
|
|
78
|
+
avgResponseTime: 0,
|
|
79
|
+
lastUsed: now,
|
|
80
|
+
lastSuccess: now,
|
|
81
|
+
lastFailure: 0,
|
|
82
|
+
healthScore: 100, // Start with perfect score
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
// Update metrics
|
|
86
|
+
health.totalRequests++;
|
|
87
|
+
health.successfulRequests++;
|
|
88
|
+
health.consecutiveFailures = 0;
|
|
89
|
+
health.lastUsed = now;
|
|
90
|
+
health.lastSuccess = now;
|
|
91
|
+
// Update average response time (weighted moving average)
|
|
92
|
+
if (health.avgResponseTime === 0) {
|
|
93
|
+
health.avgResponseTime = responseTime;
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
// Weight new response at 30%
|
|
97
|
+
health.avgResponseTime = Math.round(health.avgResponseTime * 0.7 + responseTime * 0.3);
|
|
98
|
+
}
|
|
99
|
+
// Recalculate health score
|
|
100
|
+
health.healthScore = this.calculateHealthScore(health);
|
|
101
|
+
this.healthData.set(key, health);
|
|
102
|
+
// Persist if enabled
|
|
103
|
+
if (this.persistenceEnabled) {
|
|
104
|
+
this.saveState();
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Record a failed request for a model
|
|
109
|
+
*/
|
|
110
|
+
recordFailure(providerID, modelID) {
|
|
111
|
+
const key = getModelKey(providerID, modelID);
|
|
112
|
+
const now = Date.now();
|
|
113
|
+
let health = this.healthData.get(key);
|
|
114
|
+
if (!health) {
|
|
115
|
+
// Initialize new health entry
|
|
116
|
+
health = {
|
|
117
|
+
modelKey: key,
|
|
118
|
+
providerID,
|
|
119
|
+
modelID,
|
|
120
|
+
totalRequests: 0,
|
|
121
|
+
successfulRequests: 0,
|
|
122
|
+
failedRequests: 0,
|
|
123
|
+
consecutiveFailures: 0,
|
|
124
|
+
avgResponseTime: 0,
|
|
125
|
+
lastUsed: now,
|
|
126
|
+
lastSuccess: 0,
|
|
127
|
+
lastFailure: now,
|
|
128
|
+
healthScore: 100,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
// Update metrics
|
|
132
|
+
health.totalRequests++;
|
|
133
|
+
health.failedRequests++;
|
|
134
|
+
health.consecutiveFailures++;
|
|
135
|
+
health.lastUsed = now;
|
|
136
|
+
health.lastFailure = now;
|
|
137
|
+
// Recalculate health score
|
|
138
|
+
health.healthScore = this.calculateHealthScore(health);
|
|
139
|
+
this.healthData.set(key, health);
|
|
140
|
+
// Persist if enabled
|
|
141
|
+
if (this.persistenceEnabled) {
|
|
142
|
+
this.saveState();
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Get the health score for a model (0-100)
|
|
147
|
+
*/
|
|
148
|
+
getHealthScore(providerID, modelID) {
|
|
149
|
+
const key = getModelKey(providerID, modelID);
|
|
150
|
+
const health = this.healthData.get(key);
|
|
151
|
+
if (!health) {
|
|
152
|
+
return 100; // No data yet - assume healthy
|
|
153
|
+
}
|
|
154
|
+
return health.healthScore;
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Get full health data for a model
|
|
158
|
+
*/
|
|
159
|
+
getModelHealth(providerID, modelID) {
|
|
160
|
+
const key = getModelKey(providerID, modelID);
|
|
161
|
+
return this.healthData.get(key) || null;
|
|
162
|
+
}
|
|
163
|
+
/**
|
|
164
|
+
* Get all health data
|
|
165
|
+
*/
|
|
166
|
+
getAllHealthData() {
|
|
167
|
+
return Array.from(this.healthData.values());
|
|
168
|
+
}
|
|
169
|
+
/**
|
|
170
|
+
* Get healthiest models from a list of candidates
|
|
171
|
+
* Returns models sorted by health score (highest first)
|
|
172
|
+
*/
|
|
173
|
+
getHealthiestModels(candidates, limit) {
|
|
174
|
+
// Map candidates with health scores
|
|
175
|
+
const scored = candidates.map(model => ({
|
|
176
|
+
model,
|
|
177
|
+
score: this.getHealthScore(model.providerID, model.modelID),
|
|
178
|
+
}));
|
|
179
|
+
// Sort by health score (descending)
|
|
180
|
+
scored.sort((a, b) => b.score - a.score);
|
|
181
|
+
// Return limited results or all
|
|
182
|
+
const result = scored.map(item => item.model);
|
|
183
|
+
return limit ? result.slice(0, limit) : result;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Calculate health score based on metrics
|
|
187
|
+
* Score is 0-100, higher is healthier
|
|
188
|
+
*/
|
|
189
|
+
calculateHealthScore(health) {
|
|
190
|
+
let score = 100;
|
|
191
|
+
// Penalize based on success rate
|
|
192
|
+
if (health.totalRequests >= MIN_REQUESTS_FOR_RELIABLE_SCORE) {
|
|
193
|
+
const successRate = health.successfulRequests / health.totalRequests;
|
|
194
|
+
score = Math.round(score * successRate);
|
|
195
|
+
}
|
|
196
|
+
// Penalize consecutive failures heavily
|
|
197
|
+
const failurePenalty = Math.min(health.consecutiveFailures * this.failurePenaltyMultiplier, 80);
|
|
198
|
+
score -= failurePenalty;
|
|
199
|
+
// Penalize slow response times (if we have data)
|
|
200
|
+
if (health.avgResponseTime > 0) {
|
|
201
|
+
const responseTimePenalty = Math.min(Math.round((health.avgResponseTime - this.responseTimeThreshold) / this.responseTimePenaltyDivisor), 30);
|
|
202
|
+
if (responseTimePenalty > 0) {
|
|
203
|
+
score -= responseTimePenalty;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
// Ensure score is within valid range
|
|
207
|
+
return Math.max(0, Math.min(100, score));
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Save health state to file (with debouncing)
|
|
211
|
+
*/
|
|
212
|
+
saveState() {
|
|
213
|
+
if (!this.persistenceEnabled) {
|
|
214
|
+
return;
|
|
215
|
+
}
|
|
216
|
+
// If a save is already pending, don't schedule another one
|
|
217
|
+
if (this.savePending) {
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
this.savePending = true;
|
|
221
|
+
// Clear any existing timeout
|
|
222
|
+
if (this.saveTimeout) {
|
|
223
|
+
clearTimeout(this.saveTimeout);
|
|
224
|
+
}
|
|
225
|
+
// Schedule debounced save
|
|
226
|
+
this.saveTimeout = setTimeout(() => {
|
|
227
|
+
this.performSave();
|
|
228
|
+
this.savePending = false;
|
|
229
|
+
}, this.persistenceDebounceMs);
|
|
230
|
+
}
|
|
231
|
+
/**
|
|
232
|
+
* Perform the actual save operation
|
|
233
|
+
*/
|
|
234
|
+
performSave() {
|
|
235
|
+
try {
|
|
236
|
+
// Ensure directory exists
|
|
237
|
+
const dir = dirname(this.persistencePath);
|
|
238
|
+
if (!existsSync(dir)) {
|
|
239
|
+
mkdirSync(dir, { recursive: true });
|
|
240
|
+
}
|
|
241
|
+
const state = {
|
|
242
|
+
models: Object.fromEntries(this.healthData.entries()),
|
|
243
|
+
lastUpdated: Date.now(),
|
|
244
|
+
};
|
|
245
|
+
writeFileSync(this.persistencePath, JSON.stringify(state, null, 2), 'utf-8');
|
|
246
|
+
}
|
|
247
|
+
catch (error) {
|
|
248
|
+
// Use logger instead of console
|
|
249
|
+
this.logger.warn('[HealthTracker] Failed to save state', { error });
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Load health state from file
|
|
254
|
+
*/
|
|
255
|
+
loadState() {
|
|
256
|
+
if (!this.persistenceEnabled || !existsSync(this.persistencePath)) {
|
|
257
|
+
return;
|
|
258
|
+
}
|
|
259
|
+
try {
|
|
260
|
+
const content = readFileSync(this.persistencePath, 'utf-8');
|
|
261
|
+
const state = JSON.parse(content);
|
|
262
|
+
// Validate state structure
|
|
263
|
+
if (state.models && typeof state.models === 'object') {
|
|
264
|
+
for (const [key, health] of Object.entries(state.models)) {
|
|
265
|
+
// Validate health object structure
|
|
266
|
+
if (health && typeof health === 'object' && health.modelKey === key) {
|
|
267
|
+
this.healthData.set(key, health);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
catch (error) {
|
|
273
|
+
// Use logger instead of console
|
|
274
|
+
this.logger.warn('[HealthTracker] Failed to load state, starting fresh', { error });
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
/**
|
|
278
|
+
* Reset health data for a specific model
|
|
279
|
+
*/
|
|
280
|
+
resetModelHealth(providerID, modelID) {
|
|
281
|
+
const key = getModelKey(providerID, modelID);
|
|
282
|
+
this.healthData.delete(key);
|
|
283
|
+
if (this.persistenceEnabled) {
|
|
284
|
+
this.saveState();
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
/**
|
|
288
|
+
* Reset all health data
|
|
289
|
+
*/
|
|
290
|
+
resetAllHealth() {
|
|
291
|
+
this.healthData.clear();
|
|
292
|
+
if (this.persistenceEnabled) {
|
|
293
|
+
this.saveState();
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Check if health-based selection is enabled
|
|
298
|
+
*/
|
|
299
|
+
isEnabled() {
|
|
300
|
+
return this.healthBasedSelectionEnabled;
|
|
301
|
+
}
|
|
302
|
+
/**
|
|
303
|
+
* Get statistics about tracked models
|
|
304
|
+
*/
|
|
305
|
+
getStats() {
|
|
306
|
+
const models = Array.from(this.healthData.values());
|
|
307
|
+
const totalRequests = models.reduce((sum, h) => sum + h.totalRequests, 0);
|
|
308
|
+
const totalSuccesses = models.reduce((sum, h) => sum + h.successfulRequests, 0);
|
|
309
|
+
const totalFailures = models.reduce((sum, h) => sum + h.failedRequests, 0);
|
|
310
|
+
const avgHealthScore = models.length > 0
|
|
311
|
+
? Math.round(models.reduce((sum, h) => sum + h.healthScore, 0) / models.length)
|
|
312
|
+
: 100;
|
|
313
|
+
const modelsWithReliableData = models.filter(h => h.totalRequests >= MIN_REQUESTS_FOR_RELIABLE_SCORE).length;
|
|
314
|
+
return {
|
|
315
|
+
totalTracked: models.length,
|
|
316
|
+
totalRequests,
|
|
317
|
+
totalSuccesses,
|
|
318
|
+
totalFailures,
|
|
319
|
+
avgHealthScore,
|
|
320
|
+
modelsWithReliableData,
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
/**
|
|
324
|
+
* Clean up old health data (models not used recently)
|
|
325
|
+
*/
|
|
326
|
+
cleanupOldEntries(maxAgeMs = 30 * 24 * 60 * 60 * 1000) {
|
|
327
|
+
// Default: 30 days
|
|
328
|
+
const now = Date.now();
|
|
329
|
+
let cleaned = 0;
|
|
330
|
+
for (const [key, health] of this.healthData.entries()) {
|
|
331
|
+
if (now - health.lastUsed > maxAgeMs) {
|
|
332
|
+
this.healthData.delete(key);
|
|
333
|
+
cleaned++;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
if (cleaned > 0 && this.persistenceEnabled) {
|
|
337
|
+
this.saveState();
|
|
338
|
+
}
|
|
339
|
+
return cleaned;
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Destroy the health tracker
|
|
343
|
+
*/
|
|
344
|
+
destroy() {
|
|
345
|
+
// Cancel any pending save
|
|
346
|
+
if (this.saveTimeout) {
|
|
347
|
+
clearTimeout(this.saveTimeout);
|
|
348
|
+
}
|
|
349
|
+
// Save state immediately before cleanup
|
|
350
|
+
this.performSave();
|
|
351
|
+
this.healthData.clear();
|
|
352
|
+
}
|
|
353
|
+
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Metrics Manager - Handles metrics collection, aggregation, and reporting
|
|
3
3
|
*/
|
|
4
4
|
import type { Logger } from '../../logger.js';
|
|
5
|
-
import type { MetricsConfig, MetricsData, RateLimitMetrics, FallbackTargetMetrics, ModelPerformanceMetrics } from '../types/index.js';
|
|
5
|
+
import type { MetricsConfig, MetricsData, RateLimitMetrics, FallbackTargetMetrics, ModelPerformanceMetrics, CircuitBreakerStateType } from '../types/index.js';
|
|
6
6
|
/**
|
|
7
7
|
* Metrics Manager class for collecting and reporting metrics
|
|
8
8
|
*/
|
|
@@ -61,6 +61,15 @@ export declare class MetricsManager {
|
|
|
61
61
|
* Record a failed retry
|
|
62
62
|
*/
|
|
63
63
|
recordRetryFailure(): void;
|
|
64
|
+
/**
|
|
65
|
+
* Record a circuit breaker state transition
|
|
66
|
+
*/
|
|
67
|
+
recordCircuitBreakerStateTransition(modelKey: string, oldState: CircuitBreakerStateType, newState: CircuitBreakerStateType): void;
|
|
68
|
+
/**
|
|
69
|
+
* Helper method to update circuit breaker state counts
|
|
70
|
+
* @private
|
|
71
|
+
*/
|
|
72
|
+
private updateCircuitBreakerStateCounts;
|
|
64
73
|
/**
|
|
65
74
|
* Get a copy of the current metrics
|
|
66
75
|
*/
|
|
@@ -31,6 +31,18 @@ export class MetricsManager {
|
|
|
31
31
|
byModel: new Map(),
|
|
32
32
|
},
|
|
33
33
|
modelPerformance: new Map(),
|
|
34
|
+
circuitBreaker: {
|
|
35
|
+
total: {
|
|
36
|
+
stateTransitions: 0,
|
|
37
|
+
opens: 0,
|
|
38
|
+
closes: 0,
|
|
39
|
+
halfOpens: 0,
|
|
40
|
+
currentOpen: 0,
|
|
41
|
+
currentHalfOpen: 0,
|
|
42
|
+
currentClosed: 0,
|
|
43
|
+
},
|
|
44
|
+
byModel: new Map(),
|
|
45
|
+
},
|
|
34
46
|
startedAt: Date.now(),
|
|
35
47
|
generatedAt: Date.now(),
|
|
36
48
|
};
|
|
@@ -71,6 +83,18 @@ export class MetricsManager {
|
|
|
71
83
|
byModel: new Map(),
|
|
72
84
|
},
|
|
73
85
|
modelPerformance: new Map(),
|
|
86
|
+
circuitBreaker: {
|
|
87
|
+
total: {
|
|
88
|
+
stateTransitions: 0,
|
|
89
|
+
opens: 0,
|
|
90
|
+
closes: 0,
|
|
91
|
+
halfOpens: 0,
|
|
92
|
+
currentOpen: 0,
|
|
93
|
+
currentHalfOpen: 0,
|
|
94
|
+
currentClosed: 0,
|
|
95
|
+
},
|
|
96
|
+
byModel: new Map(),
|
|
97
|
+
},
|
|
74
98
|
startedAt: Date.now(),
|
|
75
99
|
generatedAt: Date.now(),
|
|
76
100
|
};
|
|
@@ -229,6 +253,62 @@ export class MetricsManager {
|
|
|
229
253
|
return;
|
|
230
254
|
this.metrics.retries.failed++;
|
|
231
255
|
}
|
|
256
|
+
/**
|
|
257
|
+
* Record a circuit breaker state transition
|
|
258
|
+
*/
|
|
259
|
+
recordCircuitBreakerStateTransition(modelKey, oldState, newState) {
|
|
260
|
+
if (!this.config.enabled)
|
|
261
|
+
return;
|
|
262
|
+
// Update total metrics
|
|
263
|
+
this.metrics.circuitBreaker.total.stateTransitions++;
|
|
264
|
+
this.updateCircuitBreakerStateCounts(this.metrics.circuitBreaker.total, oldState, newState);
|
|
265
|
+
// Update model-specific metrics
|
|
266
|
+
let modelMetrics = this.metrics.circuitBreaker.byModel.get(modelKey);
|
|
267
|
+
if (!modelMetrics) {
|
|
268
|
+
modelMetrics = {
|
|
269
|
+
stateTransitions: 0,
|
|
270
|
+
opens: 0,
|
|
271
|
+
closes: 0,
|
|
272
|
+
halfOpens: 0,
|
|
273
|
+
currentOpen: 0,
|
|
274
|
+
currentHalfOpen: 0,
|
|
275
|
+
currentClosed: 1, // Start with CLOSED
|
|
276
|
+
};
|
|
277
|
+
this.metrics.circuitBreaker.byModel.set(modelKey, modelMetrics);
|
|
278
|
+
}
|
|
279
|
+
modelMetrics.stateTransitions++;
|
|
280
|
+
this.updateCircuitBreakerStateCounts(modelMetrics, oldState, newState);
|
|
281
|
+
this.metrics.circuitBreaker.byModel.set(modelKey, modelMetrics);
|
|
282
|
+
}
|
|
283
|
+
/**
|
|
284
|
+
* Helper method to update circuit breaker state counts
|
|
285
|
+
* @private
|
|
286
|
+
*/
|
|
287
|
+
updateCircuitBreakerStateCounts(metrics, oldState, newState) {
|
|
288
|
+
// Update state counts based on old state
|
|
289
|
+
if (oldState === 'OPEN') {
|
|
290
|
+
metrics.currentOpen--;
|
|
291
|
+
}
|
|
292
|
+
else if (oldState === 'HALF_OPEN') {
|
|
293
|
+
metrics.currentHalfOpen--;
|
|
294
|
+
}
|
|
295
|
+
else if (oldState === 'CLOSED') {
|
|
296
|
+
metrics.currentClosed--;
|
|
297
|
+
}
|
|
298
|
+
// Update state counts based on new state
|
|
299
|
+
if (newState === 'OPEN') {
|
|
300
|
+
metrics.opens++;
|
|
301
|
+
metrics.currentOpen++;
|
|
302
|
+
}
|
|
303
|
+
else if (newState === 'HALF_OPEN') {
|
|
304
|
+
metrics.halfOpens++;
|
|
305
|
+
metrics.currentHalfOpen++;
|
|
306
|
+
}
|
|
307
|
+
else if (newState === 'CLOSED') {
|
|
308
|
+
metrics.closes++;
|
|
309
|
+
metrics.currentClosed++;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
232
312
|
/**
|
|
233
313
|
* Get a copy of the current metrics
|
|
234
314
|
*/
|
|
@@ -266,6 +346,10 @@ export class MetricsManager {
|
|
|
266
346
|
byModel: Object.fromEntries(Array.from(metrics.retries.byModel.entries()).map(([k, v]) => [k, v])),
|
|
267
347
|
},
|
|
268
348
|
modelPerformance: Object.fromEntries(Array.from(metrics.modelPerformance.entries()).map(([k, v]) => [k, v])),
|
|
349
|
+
circuitBreaker: {
|
|
350
|
+
...metrics.circuitBreaker,
|
|
351
|
+
byModel: Object.fromEntries(Array.from(metrics.circuitBreaker.byModel.entries()).map(([k, v]) => [k, v])),
|
|
352
|
+
},
|
|
269
353
|
startedAt: metrics.startedAt,
|
|
270
354
|
generatedAt: metrics.generatedAt,
|
|
271
355
|
};
|
|
@@ -342,6 +426,31 @@ export class MetricsManager {
|
|
|
342
426
|
}
|
|
343
427
|
}
|
|
344
428
|
lines.push("");
|
|
429
|
+
// Circuit Breaker
|
|
430
|
+
lines.push("Circuit Breaker:");
|
|
431
|
+
lines.push("-".repeat(40));
|
|
432
|
+
lines.push(` State Transitions: ${this.metrics.circuitBreaker.total.stateTransitions}`);
|
|
433
|
+
lines.push(` Opens: ${this.metrics.circuitBreaker.total.opens}`);
|
|
434
|
+
lines.push(` Closes: ${this.metrics.circuitBreaker.total.closes}`);
|
|
435
|
+
lines.push(` Half Opens: ${this.metrics.circuitBreaker.total.halfOpens}`);
|
|
436
|
+
lines.push("");
|
|
437
|
+
lines.push(" Current State Distribution:");
|
|
438
|
+
lines.push(` CLOSED: ${this.metrics.circuitBreaker.total.currentClosed}`);
|
|
439
|
+
lines.push(` HALF_OPEN: ${this.metrics.circuitBreaker.total.currentHalfOpen}`);
|
|
440
|
+
lines.push(` OPEN: ${this.metrics.circuitBreaker.total.currentOpen}`);
|
|
441
|
+
if (this.metrics.circuitBreaker.byModel.size > 0) {
|
|
442
|
+
lines.push("");
|
|
443
|
+
lines.push(" By Model:");
|
|
444
|
+
for (const [model, data] of this.metrics.circuitBreaker.byModel.entries()) {
|
|
445
|
+
lines.push(` ${model}:`);
|
|
446
|
+
lines.push(` State Transitions: ${data.stateTransitions}`);
|
|
447
|
+
lines.push(` Opens: ${data.opens}`);
|
|
448
|
+
lines.push(` Closes: ${data.closes}`);
|
|
449
|
+
lines.push(` Half Opens: ${data.halfOpens}`);
|
|
450
|
+
lines.push(` Current State: ${data.currentOpen > 0 ? 'OPEN' : data.currentHalfOpen > 0 ? 'HALF_OPEN' : 'CLOSED'}`);
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
lines.push("");
|
|
345
454
|
// Model Performance
|
|
346
455
|
lines.push("Model Performance:");
|
|
347
456
|
lines.push("-".repeat(40));
|
|
@@ -428,6 +537,34 @@ export class MetricsManager {
|
|
|
428
537
|
].join(","));
|
|
429
538
|
}
|
|
430
539
|
lines.push("");
|
|
540
|
+
// Circuit Breaker Summary CSV
|
|
541
|
+
lines.push("=== CIRCUIT_BREAKER_SUMMARY ===");
|
|
542
|
+
lines.push(`state_transitions,opens,closes,half_opens,current_open,current_half_open,current_closed`);
|
|
543
|
+
lines.push([
|
|
544
|
+
this.metrics.circuitBreaker.total.stateTransitions,
|
|
545
|
+
this.metrics.circuitBreaker.total.opens,
|
|
546
|
+
this.metrics.circuitBreaker.total.closes,
|
|
547
|
+
this.metrics.circuitBreaker.total.halfOpens,
|
|
548
|
+
this.metrics.circuitBreaker.total.currentOpen,
|
|
549
|
+
this.metrics.circuitBreaker.total.currentHalfOpen,
|
|
550
|
+
this.metrics.circuitBreaker.total.currentClosed,
|
|
551
|
+
].join(","));
|
|
552
|
+
lines.push("");
|
|
553
|
+
// Circuit Breaker by Model CSV
|
|
554
|
+
lines.push("=== CIRCUIT_BREAKER_BY_MODEL ===");
|
|
555
|
+
lines.push("model,state_transitions,opens,closes,half_opens,current_state");
|
|
556
|
+
for (const [model, data] of this.metrics.circuitBreaker.byModel.entries()) {
|
|
557
|
+
const currentState = data.currentOpen > 0 ? 'OPEN' : data.currentHalfOpen > 0 ? 'HALF_OPEN' : 'CLOSED';
|
|
558
|
+
lines.push([
|
|
559
|
+
model,
|
|
560
|
+
data.stateTransitions,
|
|
561
|
+
data.opens,
|
|
562
|
+
data.closes,
|
|
563
|
+
data.halfOpens,
|
|
564
|
+
currentState,
|
|
565
|
+
].join(","));
|
|
566
|
+
}
|
|
567
|
+
lines.push("");
|
|
431
568
|
// Model Performance CSV
|
|
432
569
|
lines.push("=== MODEL_PERFORMANCE ===");
|
|
433
570
|
lines.push("model,requests,successes,failures,avg_response_time_ms,success_rate");
|
|
@@ -33,6 +33,31 @@ export interface RetryPolicy {
|
|
|
33
33
|
jitterFactor: number;
|
|
34
34
|
timeoutMs?: number;
|
|
35
35
|
}
|
|
36
|
+
/**
|
|
37
|
+
* Circuit breaker state
|
|
38
|
+
*/
|
|
39
|
+
export type CircuitBreakerStateType = 'CLOSED' | 'OPEN' | 'HALF_OPEN';
|
|
40
|
+
/**
|
|
41
|
+
* Circuit breaker configuration
|
|
42
|
+
*/
|
|
43
|
+
export interface CircuitBreakerConfig {
|
|
44
|
+
enabled: boolean;
|
|
45
|
+
failureThreshold: number;
|
|
46
|
+
recoveryTimeoutMs: number;
|
|
47
|
+
halfOpenMaxCalls: number;
|
|
48
|
+
successThreshold: number;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Circuit breaker state data
|
|
52
|
+
*/
|
|
53
|
+
export interface CircuitBreakerState {
|
|
54
|
+
state: CircuitBreakerStateType;
|
|
55
|
+
failureCount: number;
|
|
56
|
+
successCount: number;
|
|
57
|
+
lastFailureTime: number;
|
|
58
|
+
lastSuccessTime: number;
|
|
59
|
+
nextAttemptTime: number;
|
|
60
|
+
}
|
|
36
61
|
/**
|
|
37
62
|
* Metrics output configuration
|
|
38
63
|
*/
|
|
@@ -49,6 +74,53 @@ export interface MetricsConfig {
|
|
|
49
74
|
output: MetricsOutputConfig;
|
|
50
75
|
resetInterval: "hourly" | "daily" | "weekly";
|
|
51
76
|
}
|
|
77
|
+
/**
|
|
78
|
+
* Configuration validation options
|
|
79
|
+
*/
|
|
80
|
+
export interface ConfigValidationOptions {
|
|
81
|
+
strict?: boolean;
|
|
82
|
+
logWarnings?: boolean;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Health persistence configuration
|
|
86
|
+
*/
|
|
87
|
+
export interface HealthPersistenceConfig {
|
|
88
|
+
enabled: boolean;
|
|
89
|
+
path?: string;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Health metrics for a model
|
|
93
|
+
*/
|
|
94
|
+
export interface ModelHealth {
|
|
95
|
+
modelKey: string;
|
|
96
|
+
providerID: string;
|
|
97
|
+
modelID: string;
|
|
98
|
+
totalRequests: number;
|
|
99
|
+
successfulRequests: number;
|
|
100
|
+
failedRequests: number;
|
|
101
|
+
consecutiveFailures: number;
|
|
102
|
+
avgResponseTime: number;
|
|
103
|
+
lastUsed: number;
|
|
104
|
+
lastSuccess: number;
|
|
105
|
+
lastFailure: number;
|
|
106
|
+
healthScore: number;
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Error pattern definition
|
|
110
|
+
*/
|
|
111
|
+
export interface ErrorPattern {
|
|
112
|
+
name: string;
|
|
113
|
+
provider?: string;
|
|
114
|
+
patterns: (string | RegExp)[];
|
|
115
|
+
priority: number;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Error pattern configuration
|
|
119
|
+
*/
|
|
120
|
+
export interface ErrorPatternsConfig {
|
|
121
|
+
custom?: ErrorPattern[];
|
|
122
|
+
enableLearning?: boolean;
|
|
123
|
+
}
|
|
52
124
|
/**
|
|
53
125
|
* Plugin configuration
|
|
54
126
|
*/
|
|
@@ -60,8 +132,14 @@ export interface PluginConfig {
|
|
|
60
132
|
maxSubagentDepth?: number;
|
|
61
133
|
enableSubagentFallback?: boolean;
|
|
62
134
|
retryPolicy?: RetryPolicy;
|
|
135
|
+
circuitBreaker?: CircuitBreakerConfig;
|
|
63
136
|
log?: LogConfig;
|
|
64
137
|
metrics?: MetricsConfig;
|
|
138
|
+
configValidation?: ConfigValidationOptions;
|
|
139
|
+
enableHealthBasedSelection?: boolean;
|
|
140
|
+
healthPersistence?: HealthPersistenceConfig;
|
|
141
|
+
verbose?: boolean;
|
|
142
|
+
errorPatterns?: ErrorPatternsConfig;
|
|
65
143
|
}
|
|
66
144
|
/**
|
|
67
145
|
* Fallback state for tracking progress
|
|
@@ -183,6 +261,18 @@ export interface ModelPerformanceMetrics {
|
|
|
183
261
|
failures: number;
|
|
184
262
|
averageResponseTime?: number;
|
|
185
263
|
}
|
|
264
|
+
/**
|
|
265
|
+
* Circuit breaker metrics
|
|
266
|
+
*/
|
|
267
|
+
export interface CircuitBreakerMetrics {
|
|
268
|
+
stateTransitions: number;
|
|
269
|
+
opens: number;
|
|
270
|
+
closes: number;
|
|
271
|
+
halfOpens: number;
|
|
272
|
+
currentOpen: number;
|
|
273
|
+
currentHalfOpen: number;
|
|
274
|
+
currentClosed: number;
|
|
275
|
+
}
|
|
186
276
|
/**
|
|
187
277
|
* Retry metrics
|
|
188
278
|
*/
|
|
@@ -210,6 +300,10 @@ export interface MetricsData {
|
|
|
210
300
|
};
|
|
211
301
|
retries: RetryMetrics;
|
|
212
302
|
modelPerformance: Map<string, ModelPerformanceMetrics>;
|
|
303
|
+
circuitBreaker: {
|
|
304
|
+
total: CircuitBreakerMetrics;
|
|
305
|
+
byModel: Map<string, CircuitBreakerMetrics>;
|
|
306
|
+
};
|
|
213
307
|
startedAt: number;
|
|
214
308
|
generatedAt: number;
|
|
215
309
|
}
|
|
@@ -314,6 +408,10 @@ export declare const DEFAULT_FALLBACK_MODELS: FallbackModel[];
|
|
|
314
408
|
* Default retry policy
|
|
315
409
|
*/
|
|
316
410
|
export declare const DEFAULT_RETRY_POLICY: RetryPolicy;
|
|
411
|
+
/**
|
|
412
|
+
* Default circuit breaker configuration
|
|
413
|
+
*/
|
|
414
|
+
export declare const DEFAULT_CIRCUIT_BREAKER_CONFIG: CircuitBreakerConfig;
|
|
317
415
|
/**
|
|
318
416
|
* Valid fallback modes
|
|
319
417
|
*/
|