@azumag/opencode-rate-limit-fallback 1.31.0 → 1.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +134 -47
- package/dist/index.d.ts +2 -1
- package/dist/index.js +58 -7
- package/dist/src/circuitbreaker/CircuitBreaker.d.ts +60 -0
- package/dist/src/circuitbreaker/CircuitBreaker.js +218 -0
- package/dist/src/circuitbreaker/CircuitState.d.ts +44 -0
- package/dist/src/circuitbreaker/CircuitState.js +128 -0
- package/dist/src/circuitbreaker/index.d.ts +8 -0
- package/dist/src/circuitbreaker/index.js +8 -0
- package/dist/src/config/Validator.d.ts +64 -0
- package/dist/src/config/Validator.js +618 -0
- package/dist/src/diagnostics/Reporter.d.ts +128 -0
- package/dist/src/diagnostics/Reporter.js +285 -0
- package/dist/src/errors/PatternRegistry.d.ts +75 -0
- package/dist/src/errors/PatternRegistry.js +234 -0
- package/dist/src/fallback/FallbackHandler.d.ts +4 -1
- package/dist/src/fallback/FallbackHandler.js +41 -2
- package/dist/src/fallback/ModelSelector.d.ts +9 -1
- package/dist/src/fallback/ModelSelector.js +33 -4
- package/dist/src/health/HealthTracker.d.ts +96 -0
- package/dist/src/health/HealthTracker.js +353 -0
- package/dist/src/metrics/MetricsManager.d.ts +10 -1
- package/dist/src/metrics/MetricsManager.js +137 -0
- package/dist/src/types/index.d.ts +98 -0
- package/dist/src/types/index.js +10 -0
- package/dist/src/utils/config.d.ts +8 -1
- package/dist/src/utils/config.js +26 -11
- package/package.json +1 -1
- package/dist/src/utils/errorDetection.d.ts +0 -7
- package/dist/src/utils/errorDetection.js +0 -34
package/README.md
CHANGED
|
@@ -17,10 +17,11 @@ OpenCode plugin that automatically switches to fallback models when rate limited
|
|
|
17
17
|
- Jitter to prevent thundering herd problem
|
|
18
18
|
- Configurable retry limits and timeouts
|
|
19
19
|
- Retry statistics tracking
|
|
20
|
-
- Toast notifications for user feedback
|
|
21
|
-
- Subagent session support with automatic fallback propagation to parent sessions
|
|
22
|
-
- Configurable maximum subagent nesting depth
|
|
23
|
-
- **
|
|
20
|
+
- Toast notifications for user feedback
|
|
21
|
+
- Subagent session support with automatic fallback propagation to parent sessions
|
|
22
|
+
- Configurable maximum subagent nesting depth
|
|
23
|
+
- **Circuit breaker pattern** to prevent cascading failures from consistently failing models
|
|
24
|
+
- **Metrics collection** to track rate limits, fallbacks, and model performance
|
|
24
25
|
|
|
25
26
|
## Installation
|
|
26
27
|
|
|
@@ -85,21 +86,29 @@ Create a configuration file at one of these locations:
|
|
|
85
86
|
"format": "pretty"
|
|
86
87
|
},
|
|
87
88
|
"resetInterval": "daily"
|
|
89
|
+
},
|
|
90
|
+
"circuitBreaker": {
|
|
91
|
+
"enabled": true,
|
|
92
|
+
"failureThreshold": 5,
|
|
93
|
+
"recoveryTimeoutMs": 60000,
|
|
94
|
+
"halfOpenMaxCalls": 1,
|
|
95
|
+
"successThreshold": 2
|
|
88
96
|
}
|
|
89
97
|
}
|
|
90
98
|
```
|
|
91
99
|
|
|
92
100
|
### Configuration Options
|
|
93
101
|
|
|
94
|
-
| Option | Type | Default | Description |
|
|
95
|
-
|--------|------|---------|-------------|
|
|
96
|
-
| `enabled` | boolean | `true` | Enable/disable the plugin |
|
|
97
|
-
| `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
|
|
98
|
-
| `fallbackMode` | string | `"cycle"` | Behavior when all models are exhausted (see below) |
|
|
99
|
-
| `fallbackModels` | array | See below | List of fallback models in priority order |
|
|
100
|
-
| `maxSubagentDepth` | number | `10` | Maximum nesting depth for subagent hierarchies |
|
|
101
|
-
| `enableSubagentFallback` | boolean | `true` | Enable/disable fallback for subagent sessions |
|
|
102
|
-
| `retryPolicy` | object | See below | Retry policy configuration (see below) |
|
|
102
|
+
| Option | Type | Default | Description |
|
|
103
|
+
|--------|------|---------|-------------|
|
|
104
|
+
| `enabled` | boolean | `true` | Enable/disable the plugin |
|
|
105
|
+
| `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
|
|
106
|
+
| `fallbackMode` | string | `"cycle"` | Behavior when all models are exhausted (see below) |
|
|
107
|
+
| `fallbackModels` | array | See below | List of fallback models in priority order |
|
|
108
|
+
| `maxSubagentDepth` | number | `10` | Maximum nesting depth for subagent hierarchies |
|
|
109
|
+
| `enableSubagentFallback` | boolean | `true` | Enable/disable fallback for subagent sessions |
|
|
110
|
+
| `retryPolicy` | object | See below | Retry policy configuration (see below) |
|
|
111
|
+
| `circuitBreaker` | object | See below | Circuit breaker configuration (see below) |
|
|
103
112
|
|
|
104
113
|
### Fallback Modes
|
|
105
114
|
|
|
@@ -163,11 +172,56 @@ Example with `baseDelayMs: 1000` and `maxDelayMs: 5000`:
|
|
|
163
172
|
|
|
164
173
|
Jitter adds random variation to delay times to prevent the "thundering herd" problem, where multiple clients retry simultaneously and overwhelm the API.
|
|
165
174
|
|
|
166
|
-
- Recommended for production environments with multiple concurrent users
|
|
167
|
-
- `jitterFactor: 0.1` adds ±10% variance to delay times
|
|
168
|
-
- Example: With base delay of 1000ms and jitterFactor 0.1, actual delay will be 900-1100ms
|
|
175
|
+
- Recommended for production environments with multiple concurrent users
|
|
176
|
+
- `jitterFactor: 0.1` adds ±10% variance to delay times
|
|
177
|
+
- Example: With base delay of 1000ms and jitterFactor 0.1, actual delay will be 900-1100ms
|
|
178
|
+
|
|
179
|
+
### Circuit Breaker
|
|
180
|
+
|
|
181
|
+
The circuit breaker pattern prevents cascading failures by temporarily disabling models that are consistently failing (not due to rate limits).
|
|
182
|
+
|
|
183
|
+
| Option | Type | Default | Description |
|
|
184
|
+
|--------|------|---------|-------------|
|
|
185
|
+
| `circuitBreaker.enabled` | boolean | `false` | Enable/disable circuit breaker |
|
|
186
|
+
| `circuitBreaker.failureThreshold` | number | `5` | Consecutive failures before opening circuit |
|
|
187
|
+
| `circuitBreaker.recoveryTimeoutMs` | number | `60000` | Wait time before attempting recovery (ms) |
|
|
188
|
+
| `circuitBreaker.halfOpenMaxCalls` | number | `1` | Max calls allowed in HALF_OPEN state |
|
|
189
|
+
| `circuitBreaker.successThreshold` | number | `2` | Successes needed to close circuit |
|
|
190
|
+
|
|
191
|
+
#### How It Works
|
|
192
|
+
|
|
193
|
+
The circuit breaker maintains three states for each model:
|
|
194
|
+
|
|
195
|
+
1. **CLOSED State**: Normal operation, requests pass through
|
|
196
|
+
- Failures are counted until the threshold is reached
|
|
197
|
+
- On threshold breach, transitions to OPEN state
|
|
169
198
|
|
|
170
|
-
|
|
199
|
+
2. **OPEN State**: Model is failing, requests fail fast
|
|
200
|
+
- The circuit is "open" to prevent unnecessary API calls
|
|
201
|
+
- No requests are allowed through
|
|
202
|
+
- After the recovery timeout, transitions to HALF_OPEN state
|
|
203
|
+
|
|
204
|
+
3. **HALF_OPEN State**: Testing if model recovered after timeout
|
|
205
|
+
- A limited number of test requests are allowed
|
|
206
|
+
- On success, transitions back to CLOSED state
|
|
207
|
+
- On failure, returns to OPEN state
|
|
208
|
+
|
|
209
|
+
#### Important Notes
|
|
210
|
+
|
|
211
|
+
- **Rate limit errors are NOT counted as failures**: The circuit breaker only tracks actual failures, not rate limit errors
|
|
212
|
+
- **Disabled by default**: Set `circuitBreaker.enabled: true` to activate this feature
|
|
213
|
+
- **Per-model tracking**: Each model has its own circuit state
|
|
214
|
+
- **Toast notifications**: Users are notified when circuits open/close for awareness
|
|
215
|
+
|
|
216
|
+
#### Configuration Recommendations
|
|
217
|
+
|
|
218
|
+
| Environment | failureThreshold | recoveryTimeoutMs | halfOpenMaxCalls |
|
|
219
|
+
|-------------|------------------|-------------------|------------------|
|
|
220
|
+
| Development | 3 | 30000 | 1 |
|
|
221
|
+
| Production | 5 | 60000 | 1 |
|
|
222
|
+
| High Availability | 10 | 30000 | 2 |
|
|
223
|
+
|
|
224
|
+
### Default Fallback Models
|
|
171
225
|
|
|
172
226
|
If no configuration is provided, the following models are used:
|
|
173
227
|
|
|
@@ -206,11 +260,12 @@ When OpenCode uses subagents (e.g., for complex tasks requiring specialized agen
|
|
|
206
260
|
|
|
207
261
|
## Metrics
|
|
208
262
|
|
|
209
|
-
The plugin includes a metrics collection feature that tracks:
|
|
210
|
-
- Rate limit events per provider/model
|
|
211
|
-
- Fallback statistics (total, successful, failed, average duration)
|
|
212
|
-
- **Retry statistics** (total attempts, successes, failures, average delay)
|
|
213
|
-
- Model performance (requests, successes, failures, response time)
|
|
263
|
+
The plugin includes a metrics collection feature that tracks:
|
|
264
|
+
- Rate limit events per provider/model
|
|
265
|
+
- Fallback statistics (total, successful, failed, average duration)
|
|
266
|
+
- **Retry statistics** (total attempts, successes, failures, average delay)
|
|
267
|
+
- Model performance (requests, successes, failures, response time)
|
|
268
|
+
- **Circuit breaker statistics** (state transitions, open/closed counts)
|
|
214
269
|
|
|
215
270
|
### Metrics Configuration
|
|
216
271
|
|
|
@@ -284,15 +339,28 @@ Retries:
|
|
|
284
339
|
|
|
285
340
|
Model Performance:
|
|
286
341
|
----------------------------------------
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
342
|
+
google/gemini-2.5-pro:
|
|
343
|
+
Requests: 10
|
|
344
|
+
Successes: 9
|
|
345
|
+
Failures: 1
|
|
346
|
+
Avg Response: 0.85s
|
|
347
|
+
Success Rate: 90.0%
|
|
348
|
+
|
|
349
|
+
Circuit Breaker:
|
|
350
|
+
----------------------------------------
|
|
351
|
+
anthropic/claude-3-5-sonnet-20250514:
|
|
352
|
+
State: OPEN
|
|
353
|
+
Failures: 5
|
|
354
|
+
Successes: 0
|
|
355
|
+
State Transitions: 2
|
|
356
|
+
google/gemini-2.5-pro:
|
|
357
|
+
State: CLOSED
|
|
358
|
+
Failures: 2
|
|
359
|
+
Successes: 8
|
|
360
|
+
State Transitions: 3
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
**JSON** (machine-readable):
|
|
296
364
|
```json
|
|
297
365
|
{
|
|
298
366
|
"rateLimits": {
|
|
@@ -332,18 +400,32 @@ Model Performance:
|
|
|
332
400
|
}
|
|
333
401
|
}
|
|
334
402
|
},
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
403
|
+
"modelPerformance": {
|
|
404
|
+
"google/gemini-2.5-pro": {
|
|
405
|
+
"requests": 10,
|
|
406
|
+
"successes": 9,
|
|
407
|
+
"failures": 1,
|
|
408
|
+
"averageResponseTime": 850
|
|
409
|
+
}
|
|
410
|
+
},
|
|
411
|
+
"circuitBreaker": {
|
|
412
|
+
"anthropic/claude-3-5-sonnet-20250514": {
|
|
413
|
+
"currentState": "OPEN",
|
|
414
|
+
"failures": 5,
|
|
415
|
+
"successes": 0,
|
|
416
|
+
"stateTransitions": 2
|
|
417
|
+
},
|
|
418
|
+
"google/gemini-2.5-pro": {
|
|
419
|
+
"currentState": "CLOSED",
|
|
420
|
+
"failures": 2,
|
|
421
|
+
"successes": 8,
|
|
422
|
+
"stateTransitions": 3
|
|
423
|
+
}
|
|
424
|
+
},
|
|
425
|
+
"startedAt": 1739148000000,
|
|
426
|
+
"generatedAt": 1739149800000
|
|
427
|
+
}
|
|
428
|
+
```
|
|
347
429
|
|
|
348
430
|
**CSV** (spreadsheet-friendly):
|
|
349
431
|
```
|
|
@@ -364,10 +446,15 @@ model,attempts,successes,success_rate
|
|
|
364
446
|
anthropic/claude-3-5-sonnet-20250514,5,3,60.0
|
|
365
447
|
google/gemini-2.5-pro,7,5,71.4
|
|
366
448
|
|
|
367
|
-
=== MODEL_PERFORMANCE ===
|
|
368
|
-
model,requests,successes,failures,avg_response_time_ms,success_rate
|
|
369
|
-
google/gemini-2.5-pro,10,9,1,850,90.0
|
|
370
|
-
|
|
449
|
+
=== MODEL_PERFORMANCE ===
|
|
450
|
+
model,requests,successes,failures,avg_response_time_ms,success_rate
|
|
451
|
+
google/gemini-2.5-pro,10,9,1,850,90.0
|
|
452
|
+
|
|
453
|
+
=== CIRCUIT_BREAKER ===
|
|
454
|
+
model,current_state,failures,successes,state_transitions
|
|
455
|
+
anthropic/claude-3-5-sonnet-20250514,OPEN,5,0,2
|
|
456
|
+
google/gemini-2.5-pro,CLOSED,2,8,3
|
|
457
|
+
```
|
|
371
458
|
|
|
372
459
|
## License
|
|
373
460
|
|
package/dist/index.d.ts
CHANGED
|
@@ -6,5 +6,6 @@
|
|
|
6
6
|
import type { Plugin } from "@opencode-ai/plugin";
|
|
7
7
|
export declare const RateLimitFallback: Plugin;
|
|
8
8
|
export default RateLimitFallback;
|
|
9
|
-
export type { PluginConfig, MetricsConfig, FallbackModel, FallbackMode } from "./src/types/index.js";
|
|
9
|
+
export type { PluginConfig, MetricsConfig, FallbackModel, FallbackMode, CircuitBreakerConfig, CircuitBreakerState, CircuitBreakerStateType } from "./src/types/index.js";
|
|
10
10
|
export type { LogConfig, Logger } from "./logger.js";
|
|
11
|
+
export { Logger as LoggerClass } from "./logger.js";
|
package/dist/index.js
CHANGED
|
@@ -7,9 +7,12 @@ import { createLogger } from "./logger.js";
|
|
|
7
7
|
import { MetricsManager } from "./src/metrics/MetricsManager.js";
|
|
8
8
|
import { FallbackHandler } from "./src/fallback/FallbackHandler.js";
|
|
9
9
|
import { loadConfig } from "./src/utils/config.js";
|
|
10
|
-
import { isRateLimitError } from "./src/utils/errorDetection.js";
|
|
11
10
|
import { SubagentTracker } from "./src/session/SubagentTracker.js";
|
|
12
11
|
import { CLEANUP_INTERVAL_MS } from "./src/types/index.js";
|
|
12
|
+
import { ConfigValidator } from "./src/config/Validator.js";
|
|
13
|
+
import { ErrorPatternRegistry } from "./src/errors/PatternRegistry.js";
|
|
14
|
+
import { HealthTracker } from "./src/health/HealthTracker.js";
|
|
15
|
+
import { DiagnosticReporter } from "./src/diagnostics/Reporter.js";
|
|
13
16
|
// ============================================================================
|
|
14
17
|
// Event Type Guards
|
|
15
18
|
// ============================================================================
|
|
@@ -53,8 +56,8 @@ function isSubagentSessionCreatedEvent(event) {
|
|
|
53
56
|
// ============================================================================
|
|
54
57
|
// Main Plugin Export
|
|
55
58
|
// ============================================================================
|
|
56
|
-
export const RateLimitFallback = async ({ client, directory }) => {
|
|
57
|
-
const config = loadConfig(directory);
|
|
59
|
+
export const RateLimitFallback = async ({ client, directory, worktree }) => {
|
|
60
|
+
const { config, source: configSource } = loadConfig(directory, worktree);
|
|
58
61
|
// Detect headless mode (no TUI)
|
|
59
62
|
const isHeadless = !client.tui;
|
|
60
63
|
// Auto-adjust log level for headless mode to ensure visibility
|
|
@@ -64,38 +67,82 @@ export const RateLimitFallback = async ({ client, directory }) => {
|
|
|
64
67
|
};
|
|
65
68
|
// Create logger instance
|
|
66
69
|
const logger = createLogger(logConfig, "RateLimitFallback");
|
|
70
|
+
if (configSource) {
|
|
71
|
+
logger.info(`Config loaded from ${configSource}`);
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
logger.info("No config file found, using defaults");
|
|
75
|
+
}
|
|
76
|
+
// Initialize configuration validator
|
|
77
|
+
const validator = new ConfigValidator(logger);
|
|
78
|
+
const validation = configSource
|
|
79
|
+
? validator.validateFile(configSource, config.configValidation)
|
|
80
|
+
: validator.validate(config, config.configValidation);
|
|
81
|
+
if (!validation.isValid && config.configValidation?.strict) {
|
|
82
|
+
logger.error("Configuration validation failed in strict mode. Plugin will not load.");
|
|
83
|
+
logger.error(`Errors: ${validation.errors.map(e => `${e.path}: ${e.message}`).join(', ')}`);
|
|
84
|
+
return {};
|
|
85
|
+
}
|
|
86
|
+
if (validation.errors.length > 0) {
|
|
87
|
+
logger.warn(`Configuration validation found ${validation.errors.length} error(s)`);
|
|
88
|
+
}
|
|
89
|
+
if (validation.warnings.length > 0) {
|
|
90
|
+
logger.warn(`Configuration validation found ${validation.warnings.length} warning(s)`);
|
|
91
|
+
}
|
|
67
92
|
if (!config.enabled) {
|
|
68
93
|
return {};
|
|
69
94
|
}
|
|
95
|
+
// Initialize error pattern registry
|
|
96
|
+
const errorPatternRegistry = new ErrorPatternRegistry(logger);
|
|
97
|
+
if (config.errorPatterns?.custom) {
|
|
98
|
+
errorPatternRegistry.registerMany(config.errorPatterns.custom);
|
|
99
|
+
}
|
|
100
|
+
// Initialize health tracker
|
|
101
|
+
let healthTracker;
|
|
102
|
+
if (config.enableHealthBasedSelection) {
|
|
103
|
+
healthTracker = new HealthTracker(config, logger);
|
|
104
|
+
logger.info("Health-based model selection enabled");
|
|
105
|
+
}
|
|
106
|
+
// Initialize diagnostic reporter
|
|
107
|
+
const diagnostics = new DiagnosticReporter(config, configSource || 'default', healthTracker, undefined, // circuitBreaker will be initialized in FallbackHandler
|
|
108
|
+
errorPatternRegistry, logger);
|
|
109
|
+
// Log startup diagnostics if verbose mode
|
|
110
|
+
if (config.verbose) {
|
|
111
|
+
logger.debug("Verbose mode enabled - showing diagnostic information");
|
|
112
|
+
diagnostics.logCurrentConfig();
|
|
113
|
+
}
|
|
70
114
|
// Initialize components
|
|
71
115
|
const subagentTracker = new SubagentTracker(config);
|
|
72
116
|
const metricsManager = new MetricsManager(config.metrics ?? { enabled: false, output: { console: true, format: "pretty" }, resetInterval: "daily" }, logger);
|
|
73
|
-
const fallbackHandler = new FallbackHandler(config, client, logger, metricsManager, subagentTracker);
|
|
117
|
+
const fallbackHandler = new FallbackHandler(config, client, logger, metricsManager, subagentTracker, healthTracker);
|
|
74
118
|
// Cleanup stale entries periodically
|
|
75
119
|
const cleanupInterval = setInterval(() => {
|
|
76
120
|
subagentTracker.cleanupStaleEntries();
|
|
77
121
|
fallbackHandler.cleanupStaleEntries();
|
|
122
|
+
if (healthTracker) {
|
|
123
|
+
healthTracker.cleanupOldEntries();
|
|
124
|
+
}
|
|
78
125
|
}, CLEANUP_INTERVAL_MS);
|
|
79
126
|
return {
|
|
80
127
|
event: async ({ event }) => {
|
|
81
128
|
// Handle session.error events
|
|
82
129
|
if (isSessionErrorEvent(event)) {
|
|
83
130
|
const { sessionID, error } = event.properties;
|
|
84
|
-
if (sessionID && error && isRateLimitError(error)) {
|
|
131
|
+
if (sessionID && error && errorPatternRegistry.isRateLimitError(error)) {
|
|
85
132
|
await fallbackHandler.handleRateLimitFallback(sessionID, "", "");
|
|
86
133
|
}
|
|
87
134
|
}
|
|
88
135
|
// Handle message.updated events
|
|
89
136
|
if (isMessageUpdatedEvent(event)) {
|
|
90
137
|
const info = event.properties.info;
|
|
91
|
-
if (info?.error && isRateLimitError(info.error)) {
|
|
138
|
+
if (info?.error && errorPatternRegistry.isRateLimitError(info.error)) {
|
|
92
139
|
await fallbackHandler.handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
|
|
93
140
|
}
|
|
94
141
|
else if (info?.status === "completed" && !info?.error && info?.id) {
|
|
95
142
|
// Record fallback success
|
|
96
143
|
fallbackHandler.handleMessageUpdated(info.sessionID, info.id, false, false);
|
|
97
144
|
}
|
|
98
|
-
else if (info?.error && !isRateLimitError(info.error) && info?.id) {
|
|
145
|
+
else if (info?.error && !errorPatternRegistry.isRateLimitError(info.error) && info?.id) {
|
|
99
146
|
// Record non-rate-limit error
|
|
100
147
|
fallbackHandler.handleMessageUpdated(info.sessionID, info.id, true, false);
|
|
101
148
|
}
|
|
@@ -131,7 +178,11 @@ export const RateLimitFallback = async ({ client, directory }) => {
|
|
|
131
178
|
subagentTracker.clearAll();
|
|
132
179
|
metricsManager.destroy();
|
|
133
180
|
fallbackHandler.destroy();
|
|
181
|
+
if (healthTracker) {
|
|
182
|
+
healthTracker.destroy();
|
|
183
|
+
}
|
|
134
184
|
},
|
|
135
185
|
};
|
|
136
186
|
};
|
|
137
187
|
export default RateLimitFallback;
|
|
188
|
+
export { Logger as LoggerClass } from "./logger.js";
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit Breaker - Manages circuit breakers for multiple models
|
|
3
|
+
*/
|
|
4
|
+
import type { Logger } from '../../logger.js';
|
|
5
|
+
import type { CircuitBreakerConfig, CircuitBreakerState, OpenCodeClient } from '../types/index.js';
|
|
6
|
+
import type { MetricsManager } from '../metrics/MetricsManager.js';
|
|
7
|
+
/**
|
|
8
|
+
* CircuitBreaker class - Manages circuit breaker logic for models
|
|
9
|
+
*/
|
|
10
|
+
export declare class CircuitBreaker {
|
|
11
|
+
private circuits;
|
|
12
|
+
private config;
|
|
13
|
+
private logger;
|
|
14
|
+
private metricsManager?;
|
|
15
|
+
private client?;
|
|
16
|
+
constructor(config: CircuitBreakerConfig, logger: Logger, metricsManager?: MetricsManager, client?: OpenCodeClient);
|
|
17
|
+
/**
|
|
18
|
+
* Check if a request should be allowed for a model
|
|
19
|
+
* @param modelKey - The model key (providerID/modelID)
|
|
20
|
+
* @returns true if request is allowed, false otherwise
|
|
21
|
+
*/
|
|
22
|
+
canExecute(modelKey: string): boolean;
|
|
23
|
+
/**
|
|
24
|
+
* Record a successful request for a model
|
|
25
|
+
* @param modelKey - The model key (providerID/modelID)
|
|
26
|
+
*/
|
|
27
|
+
recordSuccess(modelKey: string): void;
|
|
28
|
+
/**
|
|
29
|
+
* Record a failed request for a model
|
|
30
|
+
* @param modelKey - The model key (providerID/modelID)
|
|
31
|
+
* @param isRateLimit - true if the failure was due to rate limiting
|
|
32
|
+
*/
|
|
33
|
+
recordFailure(modelKey: string, isRateLimit: boolean): void;
|
|
34
|
+
/**
|
|
35
|
+
* Get the current state of a circuit
|
|
36
|
+
* @param modelKey - The model key (providerID/modelID)
|
|
37
|
+
* @returns The current circuit state
|
|
38
|
+
*/
|
|
39
|
+
getState(modelKey: string): CircuitBreakerState;
|
|
40
|
+
/**
|
|
41
|
+
* Clean up stale entries from the circuits map
|
|
42
|
+
*/
|
|
43
|
+
cleanupStaleEntries(): void;
|
|
44
|
+
/**
|
|
45
|
+
* Get or create a circuit for a model
|
|
46
|
+
* @private
|
|
47
|
+
*/
|
|
48
|
+
private getOrCreateCircuit;
|
|
49
|
+
/**
|
|
50
|
+
* Get all circuit states
|
|
51
|
+
*/
|
|
52
|
+
getAllStates(): {
|
|
53
|
+
modelKey: string;
|
|
54
|
+
state: CircuitBreakerState;
|
|
55
|
+
}[];
|
|
56
|
+
/**
|
|
57
|
+
* Destroy circuit breaker and clean up resources
|
|
58
|
+
*/
|
|
59
|
+
destroy(): void;
|
|
60
|
+
}
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Circuit Breaker - Manages circuit breakers for multiple models
|
|
3
|
+
*/
|
|
4
|
+
import { CircuitState } from './CircuitState.js';
|
|
5
|
+
import { safeShowToast } from '../utils/helpers.js';
|
|
6
|
+
/**
|
|
7
|
+
* CircuitBreaker class - Manages circuit breaker logic for models
|
|
8
|
+
*/
|
|
9
|
+
export class CircuitBreaker {
|
|
10
|
+
circuits;
|
|
11
|
+
config;
|
|
12
|
+
logger;
|
|
13
|
+
metricsManager;
|
|
14
|
+
client;
|
|
15
|
+
constructor(config, logger, metricsManager, client) {
|
|
16
|
+
this.config = config;
|
|
17
|
+
this.logger = logger;
|
|
18
|
+
this.metricsManager = metricsManager;
|
|
19
|
+
this.client = client;
|
|
20
|
+
this.circuits = new Map();
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Check if a request should be allowed for a model
|
|
24
|
+
* @param modelKey - The model key (providerID/modelID)
|
|
25
|
+
* @returns true if request is allowed, false otherwise
|
|
26
|
+
*/
|
|
27
|
+
canExecute(modelKey) {
|
|
28
|
+
if (!this.config.enabled) {
|
|
29
|
+
return true;
|
|
30
|
+
}
|
|
31
|
+
const circuit = this.getOrCreateCircuit(modelKey);
|
|
32
|
+
const { allowed, transition } = circuit.canExecute();
|
|
33
|
+
const state = circuit.getState();
|
|
34
|
+
this.logger.debug(`Circuit breaker check for ${modelKey}`, {
|
|
35
|
+
state: state.state,
|
|
36
|
+
allowed,
|
|
37
|
+
failureCount: state.failureCount,
|
|
38
|
+
});
|
|
39
|
+
// Log and record transition if occurred
|
|
40
|
+
if (transition) {
|
|
41
|
+
const oldStateType = transition.from;
|
|
42
|
+
const newStateType = transition.to;
|
|
43
|
+
this.logger.info(`Circuit breaker state changed for ${modelKey}`, {
|
|
44
|
+
oldState: oldStateType,
|
|
45
|
+
newState: newStateType,
|
|
46
|
+
});
|
|
47
|
+
// Show toast notification for HALF_OPEN transition (recovery attempt)
|
|
48
|
+
if (newStateType === 'HALF_OPEN' && this.client) {
|
|
49
|
+
safeShowToast(this.client, {
|
|
50
|
+
body: {
|
|
51
|
+
title: "Circuit Recovery Attempt",
|
|
52
|
+
message: `Attempting recovery for ${modelKey} after ${this.config.recoveryTimeoutMs}ms`,
|
|
53
|
+
variant: "info",
|
|
54
|
+
duration: 3000,
|
|
55
|
+
},
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
// Record metrics
|
|
59
|
+
if (this.metricsManager) {
|
|
60
|
+
this.metricsManager.recordCircuitBreakerStateTransition(modelKey, oldStateType, newStateType);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return allowed;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Record a successful request for a model
|
|
67
|
+
* @param modelKey - The model key (providerID/modelID)
|
|
68
|
+
*/
|
|
69
|
+
recordSuccess(modelKey) {
|
|
70
|
+
if (!this.config.enabled) {
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
const circuit = this.getOrCreateCircuit(modelKey);
|
|
74
|
+
const oldState = circuit.getState().state;
|
|
75
|
+
circuit.onSuccess();
|
|
76
|
+
const newState = circuit.getState().state;
|
|
77
|
+
// Log state transition
|
|
78
|
+
if (oldState !== newState) {
|
|
79
|
+
this.logger.info(`Circuit breaker state changed for ${modelKey}`, {
|
|
80
|
+
oldState,
|
|
81
|
+
newState,
|
|
82
|
+
});
|
|
83
|
+
// Show toast notification for circuit close
|
|
84
|
+
if (newState === 'CLOSED' && oldState !== 'CLOSED' && this.client) {
|
|
85
|
+
safeShowToast(this.client, {
|
|
86
|
+
body: {
|
|
87
|
+
title: "Circuit Closed",
|
|
88
|
+
message: `Circuit breaker closed for ${modelKey} - service recovered`,
|
|
89
|
+
variant: "success",
|
|
90
|
+
duration: 3000,
|
|
91
|
+
},
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
// Record metrics
|
|
95
|
+
if (this.metricsManager) {
|
|
96
|
+
this.metricsManager.recordCircuitBreakerStateTransition(modelKey, oldState, newState);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Record a failed request for a model
|
|
102
|
+
* @param modelKey - The model key (providerID/modelID)
|
|
103
|
+
* @param isRateLimit - true if the failure was due to rate limiting
|
|
104
|
+
*/
|
|
105
|
+
recordFailure(modelKey, isRateLimit) {
|
|
106
|
+
if (!this.config.enabled) {
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
// Rate limit errors don't count as circuit failures
|
|
110
|
+
if (isRateLimit) {
|
|
111
|
+
this.logger.debug(`Rate limit error for ${modelKey}, not counting as circuit failure`);
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
const circuit = this.getOrCreateCircuit(modelKey);
|
|
115
|
+
const oldState = circuit.getState().state;
|
|
116
|
+
circuit.onFailure();
|
|
117
|
+
const newState = circuit.getState().state;
|
|
118
|
+
// Log state transition
|
|
119
|
+
if (oldState !== newState) {
|
|
120
|
+
this.logger.warn(`Circuit breaker state changed for ${modelKey}`, {
|
|
121
|
+
oldState,
|
|
122
|
+
newState,
|
|
123
|
+
failureCount: circuit.getState().failureCount,
|
|
124
|
+
});
|
|
125
|
+
// Show toast notification for circuit open
|
|
126
|
+
if (newState === 'OPEN' && this.client) {
|
|
127
|
+
safeShowToast(this.client, {
|
|
128
|
+
body: {
|
|
129
|
+
title: "Circuit Opened",
|
|
130
|
+
message: `Circuit breaker opened for ${modelKey} after failure threshold`,
|
|
131
|
+
variant: "warning",
|
|
132
|
+
duration: 5000,
|
|
133
|
+
},
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
// Show toast notification for circuit close
|
|
137
|
+
if (newState === 'CLOSED' && oldState !== 'CLOSED' && this.client) {
|
|
138
|
+
safeShowToast(this.client, {
|
|
139
|
+
body: {
|
|
140
|
+
title: "Circuit Closed",
|
|
141
|
+
message: `Circuit breaker closed for ${modelKey} - service recovered`,
|
|
142
|
+
variant: "success",
|
|
143
|
+
duration: 3000,
|
|
144
|
+
},
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
// Record metrics
|
|
148
|
+
if (this.metricsManager) {
|
|
149
|
+
this.metricsManager.recordCircuitBreakerStateTransition(modelKey, oldState, newState);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Get the current state of a circuit
|
|
155
|
+
* @param modelKey - The model key (providerID/modelID)
|
|
156
|
+
* @returns The current circuit state
|
|
157
|
+
*/
|
|
158
|
+
getState(modelKey) {
|
|
159
|
+
const circuit = this.circuits.get(modelKey);
|
|
160
|
+
if (!circuit) {
|
|
161
|
+
return {
|
|
162
|
+
state: 'CLOSED',
|
|
163
|
+
failureCount: 0,
|
|
164
|
+
successCount: 0,
|
|
165
|
+
lastFailureTime: 0,
|
|
166
|
+
lastSuccessTime: 0,
|
|
167
|
+
nextAttemptTime: 0,
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
return circuit.getState();
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Clean up stale entries from the circuits map
|
|
174
|
+
*/
|
|
175
|
+
cleanupStaleEntries() {
|
|
176
|
+
const now = Date.now();
|
|
177
|
+
const cutoffTime = now - (24 * 60 * 60 * 1000); // 24 hours
|
|
178
|
+
for (const [key, circuit] of this.circuits.entries()) {
|
|
179
|
+
const state = circuit.getState();
|
|
180
|
+
const lastActivity = Math.max(state.lastFailureTime, state.lastSuccessTime);
|
|
181
|
+
// Remove circuits that haven't been active for 24 hours
|
|
182
|
+
if (lastActivity < cutoffTime) {
|
|
183
|
+
this.circuits.delete(key);
|
|
184
|
+
this.logger.debug(`Cleaned up stale circuit for ${key}`);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Get or create a circuit for a model
|
|
190
|
+
* @private
|
|
191
|
+
*/
|
|
192
|
+
getOrCreateCircuit(modelKey) {
|
|
193
|
+
let circuit = this.circuits.get(modelKey);
|
|
194
|
+
if (!circuit) {
|
|
195
|
+
circuit = new CircuitState(this.config);
|
|
196
|
+
this.circuits.set(modelKey, circuit);
|
|
197
|
+
this.logger.debug(`Created new circuit for ${modelKey}`);
|
|
198
|
+
}
|
|
199
|
+
return circuit;
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Get all circuit states
|
|
203
|
+
*/
|
|
204
|
+
getAllStates() {
|
|
205
|
+
const result = [];
|
|
206
|
+
for (const [modelKey, circuit] of this.circuits.entries()) {
|
|
207
|
+
result.push({ modelKey, state: circuit.getState() });
|
|
208
|
+
}
|
|
209
|
+
return result;
|
|
210
|
+
}
|
|
211
|
+
/**
|
|
212
|
+
* Destroy circuit breaker and clean up resources
|
|
213
|
+
*/
|
|
214
|
+
destroy() {
|
|
215
|
+
this.circuits.clear();
|
|
216
|
+
this.logger.debug('Circuit breaker destroyed');
|
|
217
|
+
}
|
|
218
|
+
}
|