@azumag/opencode-rate-limit-fallback 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -1
- package/index.ts +96 -9
- package/package.json +1 -1
- package/rate-limit-fallback.example.json +1 -0
package/README.md
CHANGED
|
@@ -6,9 +6,11 @@ OpenCode plugin that automatically switches to fallback models when rate limited
|
|
|
6
6
|
|
|
7
7
|
## Features
|
|
8
8
|
|
|
9
|
-
- Detects rate limit errors (429, "usage limit", "quota exceeded", etc.)
|
|
9
|
+
- Detects rate limit errors (429, "usage limit", "quota exceeded", "high concurrency", etc.)
|
|
10
10
|
- Automatically aborts the current request and retries with a fallback model
|
|
11
11
|
- Configurable fallback model list with priority order
|
|
12
|
+
- Three fallback modes: `cycle`, `stop`, and `retry-last`
|
|
13
|
+
- Session model tracking for sequential fallback across multiple rate limits
|
|
12
14
|
- Cooldown period to prevent immediate retry on rate-limited models
|
|
13
15
|
- Toast notifications for user feedback
|
|
14
16
|
|
|
@@ -51,6 +53,7 @@ Create a configuration file at one of these locations:
|
|
|
51
53
|
{
|
|
52
54
|
"enabled": true,
|
|
53
55
|
"cooldownMs": 60000,
|
|
56
|
+
"fallbackMode": "cycle",
|
|
54
57
|
"fallbackModels": [
|
|
55
58
|
{ "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
|
|
56
59
|
{ "providerID": "google", "modelID": "gemini-2.5-pro" },
|
|
@@ -65,8 +68,17 @@ Create a configuration file at one of these locations:
|
|
|
65
68
|
|--------|------|---------|-------------|
|
|
66
69
|
| `enabled` | boolean | `true` | Enable/disable the plugin |
|
|
67
70
|
| `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
|
|
71
|
+
| `fallbackMode` | string | `"cycle"` | Behavior when all models are exhausted (see below) |
|
|
68
72
|
| `fallbackModels` | array | See below | List of fallback models in priority order |
|
|
69
73
|
|
|
74
|
+
### Fallback Modes
|
|
75
|
+
|
|
76
|
+
| Mode | Description |
|
|
77
|
+
|------|-------------|
|
|
78
|
+
| `"cycle"` | Reset and retry from the first model when all models are exhausted (default) |
|
|
79
|
+
| `"stop"` | Stop and show error when all models are exhausted |
|
|
80
|
+
| `"retry-last"` | Try the last model once more, then reset to first on next prompt |
|
|
81
|
+
|
|
70
82
|
### Default Fallback Models
|
|
71
83
|
|
|
72
84
|
If no configuration is provided, the following models are used:
|
package/index.ts
CHANGED
|
@@ -7,10 +7,19 @@ interface FallbackModel {
|
|
|
7
7
|
modelID: string;
|
|
8
8
|
}
|
|
9
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Fallback mode when all models are exhausted:
|
|
12
|
+
* - "cycle": Reset and retry from the first model (default)
|
|
13
|
+
* - "stop": Stop and show error message
|
|
14
|
+
* - "retry-last": Try the last model once, then reset to first on next prompt
|
|
15
|
+
*/
|
|
16
|
+
type FallbackMode = "cycle" | "stop" | "retry-last";
|
|
17
|
+
|
|
10
18
|
interface PluginConfig {
|
|
11
19
|
fallbackModels: FallbackModel[];
|
|
12
20
|
cooldownMs: number;
|
|
13
21
|
enabled: boolean;
|
|
22
|
+
fallbackMode: FallbackMode;
|
|
14
23
|
}
|
|
15
24
|
|
|
16
25
|
const DEFAULT_FALLBACK_MODELS: FallbackModel[] = [
|
|
@@ -23,6 +32,7 @@ const DEFAULT_CONFIG: PluginConfig = {
|
|
|
23
32
|
fallbackModels: DEFAULT_FALLBACK_MODELS,
|
|
24
33
|
cooldownMs: 60 * 1000,
|
|
25
34
|
enabled: true,
|
|
35
|
+
fallbackMode: "cycle",
|
|
26
36
|
};
|
|
27
37
|
|
|
28
38
|
function loadConfig(directory: string): PluginConfig {
|
|
@@ -39,10 +49,13 @@ function loadConfig(directory: string): PluginConfig {
|
|
|
39
49
|
try {
|
|
40
50
|
const content = readFileSync(configPath, "utf-8");
|
|
41
51
|
const userConfig = JSON.parse(content);
|
|
52
|
+
const mode = userConfig.fallbackMode;
|
|
53
|
+
const validModes: FallbackMode[] = ["cycle", "stop", "retry-last"];
|
|
42
54
|
return {
|
|
43
55
|
...DEFAULT_CONFIG,
|
|
44
56
|
...userConfig,
|
|
45
57
|
fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
|
|
58
|
+
fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
|
|
46
59
|
};
|
|
47
60
|
} catch (error) {
|
|
48
61
|
// Config load failed, continue to next path
|
|
@@ -76,7 +89,9 @@ function isRateLimitError(error: any): boolean {
|
|
|
76
89
|
"quota exceeded",
|
|
77
90
|
"resource exhausted",
|
|
78
91
|
"usage limit",
|
|
79
|
-
"
|
|
92
|
+
"high concurrency usage of this api",
|
|
93
|
+
"high concurrency",
|
|
94
|
+
"reduce concurrency",
|
|
80
95
|
"429",
|
|
81
96
|
];
|
|
82
97
|
|
|
@@ -97,6 +112,8 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
97
112
|
|
|
98
113
|
const rateLimitedModels = new Map<string, number>();
|
|
99
114
|
const retryState = new Map<string, { attemptedModels: Set<string>; lastAttemptTime: number }>();
|
|
115
|
+
const currentSessionModel = new Map<string, { providerID: string; modelID: string }>();
|
|
116
|
+
const fallbackInProgress = new Map<string, number>(); // sessionID -> timestamp
|
|
100
117
|
|
|
101
118
|
function isModelRateLimited(providerID: string, modelID: string): boolean {
|
|
102
119
|
const key = getModelKey(providerID, modelID);
|
|
@@ -140,12 +157,28 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
140
157
|
|
|
141
158
|
async function handleRateLimitFallback(sessionID: string, currentProviderID: string, currentModelID: string) {
|
|
142
159
|
try {
|
|
160
|
+
// Prevent duplicate fallback processing within 5 seconds
|
|
161
|
+
const lastFallback = fallbackInProgress.get(sessionID);
|
|
162
|
+
if (lastFallback && Date.now() - lastFallback < 5000) {
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
fallbackInProgress.set(sessionID, Date.now());
|
|
166
|
+
|
|
167
|
+
// If no model info provided, try to get from tracked session model
|
|
168
|
+
if (!currentProviderID || !currentModelID) {
|
|
169
|
+
const tracked = currentSessionModel.get(sessionID);
|
|
170
|
+
if (tracked) {
|
|
171
|
+
currentProviderID = tracked.providerID;
|
|
172
|
+
currentModelID = tracked.modelID;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
143
176
|
await client.session.abort({ path: { id: sessionID } });
|
|
144
177
|
|
|
145
178
|
await client.tui.showToast({
|
|
146
179
|
body: {
|
|
147
180
|
title: "Rate Limit Detected",
|
|
148
|
-
message:
|
|
181
|
+
message: `Switching from ${currentModelID || 'current model'}...`,
|
|
149
182
|
variant: "warning",
|
|
150
183
|
duration: 3000,
|
|
151
184
|
},
|
|
@@ -171,18 +204,61 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
171
204
|
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
172
205
|
}
|
|
173
206
|
|
|
174
|
-
|
|
207
|
+
let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
|
|
208
|
+
|
|
209
|
+
// Handle when no model is found based on fallbackMode
|
|
210
|
+
if (!nextModel && state.attemptedModels.size > 0) {
|
|
211
|
+
if (config.fallbackMode === "cycle") {
|
|
212
|
+
// Reset and retry from the first model
|
|
213
|
+
state.attemptedModels.clear();
|
|
214
|
+
if (currentProviderID && currentModelID) {
|
|
215
|
+
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
216
|
+
}
|
|
217
|
+
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
218
|
+
} else if (config.fallbackMode === "retry-last") {
|
|
219
|
+
// Try the last model in the list once, then reset on next prompt
|
|
220
|
+
const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
|
|
221
|
+
if (lastModel) {
|
|
222
|
+
const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
|
|
223
|
+
const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
|
|
224
|
+
|
|
225
|
+
if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
|
|
226
|
+
// Use the last model for one more try
|
|
227
|
+
nextModel = lastModel;
|
|
228
|
+
await client.tui.showToast({
|
|
229
|
+
body: {
|
|
230
|
+
title: "Last Resort",
|
|
231
|
+
message: `Trying ${lastModel.modelID} one more time...`,
|
|
232
|
+
variant: "warning",
|
|
233
|
+
duration: 3000,
|
|
234
|
+
},
|
|
235
|
+
});
|
|
236
|
+
} else {
|
|
237
|
+
// Last model also failed, reset for next prompt
|
|
238
|
+
state.attemptedModels.clear();
|
|
239
|
+
if (currentProviderID && currentModelID) {
|
|
240
|
+
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
241
|
+
}
|
|
242
|
+
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
// "stop" mode: nextModel remains null, will show error below
|
|
247
|
+
}
|
|
175
248
|
|
|
176
249
|
if (!nextModel) {
|
|
177
250
|
await client.tui.showToast({
|
|
178
251
|
body: {
|
|
179
252
|
title: "No Fallback Available",
|
|
180
|
-
message:
|
|
253
|
+
message: config.fallbackMode === "stop"
|
|
254
|
+
? "All fallback models exhausted"
|
|
255
|
+
: "All models are rate limited",
|
|
181
256
|
variant: "error",
|
|
182
257
|
duration: 5000,
|
|
183
258
|
},
|
|
184
259
|
});
|
|
185
260
|
retryState.delete(stateKey);
|
|
261
|
+
fallbackInProgress.delete(sessionID);
|
|
186
262
|
return;
|
|
187
263
|
}
|
|
188
264
|
|
|
@@ -209,6 +285,9 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
209
285
|
},
|
|
210
286
|
});
|
|
211
287
|
|
|
288
|
+
// Track the new model for this session
|
|
289
|
+
currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
|
|
290
|
+
|
|
212
291
|
await client.session.prompt({
|
|
213
292
|
path: { id: sessionID },
|
|
214
293
|
body: {
|
|
@@ -227,8 +306,11 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
227
306
|
});
|
|
228
307
|
|
|
229
308
|
retryState.delete(stateKey);
|
|
309
|
+
// Clear fallback flag to allow next fallback if needed
|
|
310
|
+
fallbackInProgress.delete(sessionID);
|
|
230
311
|
} catch (err) {
|
|
231
|
-
// Fallback failed
|
|
312
|
+
// Fallback failed, clear the flag
|
|
313
|
+
fallbackInProgress.delete(sessionID);
|
|
232
314
|
}
|
|
233
315
|
}
|
|
234
316
|
|
|
@@ -254,10 +336,15 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
254
336
|
|
|
255
337
|
if (status?.type === "retry" && status?.message) {
|
|
256
338
|
const message = status.message.toLowerCase();
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
339
|
+
const isRateLimitRetry =
|
|
340
|
+
message.includes("usage limit") ||
|
|
341
|
+
message.includes("rate limit") ||
|
|
342
|
+
message.includes("high concurrency") ||
|
|
343
|
+
message.includes("reduce concurrency");
|
|
344
|
+
|
|
345
|
+
if (isRateLimitRetry) {
|
|
346
|
+
// Try fallback on any attempt, handleRateLimitFallback will manage state
|
|
347
|
+
await handleRateLimitFallback(props.sessionID, "", "");
|
|
261
348
|
}
|
|
262
349
|
}
|
|
263
350
|
}
|
package/package.json
CHANGED