@azumag/opencode-rate-limit-fallback 1.0.3 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/npm-publish.yml +21 -0
- package/README.md +13 -1
- package/index.ts +100 -39
- package/package.json +1 -1
- package/rate-limit-fallback.example.json +1 -0
- package/.claude/settings.local.json +0 -18
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
name: npm publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
- uses: actions/setup-node@v4
|
|
14
|
+
with:
|
|
15
|
+
node-version: '20'
|
|
16
|
+
registry-url: 'https://registry.npmjs.org'
|
|
17
|
+
|
|
18
|
+
- run: npm install
|
|
19
|
+
- run: npm publish
|
|
20
|
+
env:
|
|
21
|
+
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/README.md
CHANGED
|
@@ -6,9 +6,11 @@ OpenCode plugin that automatically switches to fallback models when rate limited
|
|
|
6
6
|
|
|
7
7
|
## Features
|
|
8
8
|
|
|
9
|
-
- Detects rate limit errors (429, "usage limit", "quota exceeded", etc.)
|
|
9
|
+
- Detects rate limit errors (429, "usage limit", "quota exceeded", "high concurrency", etc.)
|
|
10
10
|
- Automatically aborts the current request and retries with a fallback model
|
|
11
11
|
- Configurable fallback model list with priority order
|
|
12
|
+
- Three fallback modes: `cycle`, `stop`, and `retry-last`
|
|
13
|
+
- Session model tracking for sequential fallback across multiple rate limits
|
|
12
14
|
- Cooldown period to prevent immediate retry on rate-limited models
|
|
13
15
|
- Toast notifications for user feedback
|
|
14
16
|
|
|
@@ -51,6 +53,7 @@ Create a configuration file at one of these locations:
|
|
|
51
53
|
{
|
|
52
54
|
"enabled": true,
|
|
53
55
|
"cooldownMs": 60000,
|
|
56
|
+
"fallbackMode": "cycle",
|
|
54
57
|
"fallbackModels": [
|
|
55
58
|
{ "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
|
|
56
59
|
{ "providerID": "google", "modelID": "gemini-2.5-pro" },
|
|
@@ -65,8 +68,17 @@ Create a configuration file at one of these locations:
|
|
|
65
68
|
|--------|------|---------|-------------|
|
|
66
69
|
| `enabled` | boolean | `true` | Enable/disable the plugin |
|
|
67
70
|
| `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
|
|
71
|
+
| `fallbackMode` | string | `"cycle"` | Behavior when all models are exhausted (see below) |
|
|
68
72
|
| `fallbackModels` | array | See below | List of fallback models in priority order |
|
|
69
73
|
|
|
74
|
+
### Fallback Modes
|
|
75
|
+
|
|
76
|
+
| Mode | Description |
|
|
77
|
+
|------|-------------|
|
|
78
|
+
| `"cycle"` | Reset and retry from the first model when all models are exhausted (default) |
|
|
79
|
+
| `"stop"` | Stop and show error when all models are exhausted |
|
|
80
|
+
| `"retry-last"` | Try the last model once more, then reset to first on next prompt |
|
|
81
|
+
|
|
70
82
|
### Default Fallback Models
|
|
71
83
|
|
|
72
84
|
If no configuration is provided, the following models are used:
|
package/index.ts
CHANGED
|
@@ -7,10 +7,19 @@ interface FallbackModel {
|
|
|
7
7
|
modelID: string;
|
|
8
8
|
}
|
|
9
9
|
|
|
10
|
+
/**
|
|
11
|
+
* Fallback mode when all models are exhausted:
|
|
12
|
+
* - "cycle": Reset and retry from the first model (default)
|
|
13
|
+
* - "stop": Stop and show error message
|
|
14
|
+
* - "retry-last": Try the last model once, then reset to first on next prompt
|
|
15
|
+
*/
|
|
16
|
+
type FallbackMode = "cycle" | "stop" | "retry-last";
|
|
17
|
+
|
|
10
18
|
interface PluginConfig {
|
|
11
19
|
fallbackModels: FallbackModel[];
|
|
12
20
|
cooldownMs: number;
|
|
13
21
|
enabled: boolean;
|
|
22
|
+
fallbackMode: FallbackMode;
|
|
14
23
|
}
|
|
15
24
|
|
|
16
25
|
const DEFAULT_FALLBACK_MODELS: FallbackModel[] = [
|
|
@@ -23,6 +32,7 @@ const DEFAULT_CONFIG: PluginConfig = {
|
|
|
23
32
|
fallbackModels: DEFAULT_FALLBACK_MODELS,
|
|
24
33
|
cooldownMs: 60 * 1000,
|
|
25
34
|
enabled: true,
|
|
35
|
+
fallbackMode: "cycle",
|
|
26
36
|
};
|
|
27
37
|
|
|
28
38
|
function loadConfig(directory: string): PluginConfig {
|
|
@@ -39,10 +49,13 @@ function loadConfig(directory: string): PluginConfig {
|
|
|
39
49
|
try {
|
|
40
50
|
const content = readFileSync(configPath, "utf-8");
|
|
41
51
|
const userConfig = JSON.parse(content);
|
|
52
|
+
const mode = userConfig.fallbackMode;
|
|
53
|
+
const validModes: FallbackMode[] = ["cycle", "stop", "retry-last"];
|
|
42
54
|
return {
|
|
43
55
|
...DEFAULT_CONFIG,
|
|
44
56
|
...userConfig,
|
|
45
57
|
fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
|
|
58
|
+
fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
|
|
46
59
|
};
|
|
47
60
|
} catch (error) {
|
|
48
61
|
// Config load failed, continue to next path
|
|
@@ -102,6 +115,38 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
102
115
|
const currentSessionModel = new Map<string, { providerID: string; modelID: string }>();
|
|
103
116
|
const fallbackInProgress = new Map<string, number>(); // sessionID -> timestamp
|
|
104
117
|
|
|
118
|
+
async function logOrToast(message: string, variant: "info" | "success" | "warning" | "error" = "info") {
|
|
119
|
+
try {
|
|
120
|
+
await client.tui.showToast({
|
|
121
|
+
body: { message, variant },
|
|
122
|
+
});
|
|
123
|
+
} catch {
|
|
124
|
+
await client.app.log({
|
|
125
|
+
body: {
|
|
126
|
+
service: "rate-limit-fallback",
|
|
127
|
+
level: variant,
|
|
128
|
+
message,
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async function toast(title: string, message: string, variant: "info" | "success" | "warning" | "error" = "info") {
|
|
135
|
+
try {
|
|
136
|
+
await client.tui.showToast({
|
|
137
|
+
body: { title, message, variant },
|
|
138
|
+
});
|
|
139
|
+
} catch {
|
|
140
|
+
await client.app.log({
|
|
141
|
+
body: {
|
|
142
|
+
service: "rate-limit-fallback",
|
|
143
|
+
level: variant,
|
|
144
|
+
message: `${title}: ${message}`,
|
|
145
|
+
},
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
105
150
|
function isModelRateLimited(providerID: string, modelID: string): boolean {
|
|
106
151
|
const key = getModelKey(providerID, modelID);
|
|
107
152
|
const limitedAt = rateLimitedModels.get(key);
|
|
@@ -121,8 +166,21 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
121
166
|
function findNextAvailableModel(currentProviderID: string, currentModelID: string, attemptedModels: Set<string>): FallbackModel | null {
|
|
122
167
|
const currentKey = getModelKey(currentProviderID, currentModelID);
|
|
123
168
|
let startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
|
|
124
|
-
if (startIndex === -1) startIndex = -1;
|
|
125
169
|
|
|
170
|
+
// If current model is not in the fallback list, search from the beginning
|
|
171
|
+
if (startIndex === -1) {
|
|
172
|
+
// Only search through all models once (first loop handles this)
|
|
173
|
+
for (let i = 0; i < config.fallbackModels.length; i++) {
|
|
174
|
+
const model = config.fallbackModels[i];
|
|
175
|
+
const key = getModelKey(model.providerID, model.modelID);
|
|
176
|
+
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
177
|
+
return model;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Search for the next model after current position
|
|
126
184
|
for (let i = startIndex + 1; i < config.fallbackModels.length; i++) {
|
|
127
185
|
const model = config.fallbackModels[i];
|
|
128
186
|
const key = getModelKey(model.providerID, model.modelID);
|
|
@@ -131,6 +189,7 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
131
189
|
}
|
|
132
190
|
}
|
|
133
191
|
|
|
192
|
+
// Search from the beginning to current position (wrap around)
|
|
134
193
|
for (let i = 0; i <= startIndex && i < config.fallbackModels.length; i++) {
|
|
135
194
|
const model = config.fallbackModels[i];
|
|
136
195
|
const key = getModelKey(model.providerID, model.modelID);
|
|
@@ -162,14 +221,7 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
162
221
|
|
|
163
222
|
await client.session.abort({ path: { id: sessionID } });
|
|
164
223
|
|
|
165
|
-
await
|
|
166
|
-
body: {
|
|
167
|
-
title: "Rate Limit Detected",
|
|
168
|
-
message: `Switching from ${currentModelID || 'current model'}...`,
|
|
169
|
-
variant: "warning",
|
|
170
|
-
duration: 3000,
|
|
171
|
-
},
|
|
172
|
-
});
|
|
224
|
+
await toast("Rate Limit Detected", `Switching from ${currentModelID || 'current model'}...`, "warning");
|
|
173
225
|
|
|
174
226
|
const messagesResult = await client.session.messages({ path: { id: sessionID } });
|
|
175
227
|
if (!messagesResult.data) return;
|
|
@@ -193,26 +245,49 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
193
245
|
|
|
194
246
|
let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
|
|
195
247
|
|
|
196
|
-
//
|
|
248
|
+
// Handle when no model is found based on fallbackMode
|
|
197
249
|
if (!nextModel && state.attemptedModels.size > 0) {
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
250
|
+
if (config.fallbackMode === "cycle") {
|
|
251
|
+
// Reset and retry from the first model
|
|
252
|
+
state.attemptedModels.clear();
|
|
253
|
+
if (currentProviderID && currentModelID) {
|
|
254
|
+
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
255
|
+
}
|
|
256
|
+
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
257
|
+
} else if (config.fallbackMode === "retry-last") {
|
|
258
|
+
// Try the last model in the list once, then reset on next prompt
|
|
259
|
+
const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
|
|
260
|
+
if (lastModel) {
|
|
261
|
+
const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
|
|
262
|
+
const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
|
|
263
|
+
|
|
264
|
+
if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
|
|
265
|
+
// Use the last model for one more try
|
|
266
|
+
nextModel = lastModel;
|
|
267
|
+
await toast("Last Resort", `Trying ${lastModel.modelID} one more time...`, "warning");
|
|
268
|
+
} else {
|
|
269
|
+
// Last model also failed, reset for next prompt
|
|
270
|
+
state.attemptedModels.clear();
|
|
271
|
+
if (currentProviderID && currentModelID) {
|
|
272
|
+
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
273
|
+
}
|
|
274
|
+
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
202
277
|
}
|
|
203
|
-
|
|
278
|
+
// "stop" mode: nextModel remains null, will show error below
|
|
204
279
|
}
|
|
205
280
|
|
|
206
281
|
if (!nextModel) {
|
|
207
|
-
await
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
});
|
|
282
|
+
await toast(
|
|
283
|
+
"No Fallback Available",
|
|
284
|
+
config.fallbackMode === "stop"
|
|
285
|
+
? "All fallback models exhausted"
|
|
286
|
+
: "All models are rate limited",
|
|
287
|
+
"error"
|
|
288
|
+
);
|
|
215
289
|
retryState.delete(stateKey);
|
|
290
|
+
fallbackInProgress.delete(sessionID);
|
|
216
291
|
return;
|
|
217
292
|
}
|
|
218
293
|
|
|
@@ -230,14 +305,7 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
230
305
|
|
|
231
306
|
if (parts.length === 0) return;
|
|
232
307
|
|
|
233
|
-
await
|
|
234
|
-
body: {
|
|
235
|
-
title: "Retrying",
|
|
236
|
-
message: `Using ${nextModel.providerID}/${nextModel.modelID}`,
|
|
237
|
-
variant: "info",
|
|
238
|
-
duration: 3000,
|
|
239
|
-
},
|
|
240
|
-
});
|
|
308
|
+
await toast("Retrying", `Using ${nextModel.providerID}/${nextModel.modelID}`, "info");
|
|
241
309
|
|
|
242
310
|
// Track the new model for this session
|
|
243
311
|
currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
|
|
@@ -250,14 +318,7 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
250
318
|
},
|
|
251
319
|
});
|
|
252
320
|
|
|
253
|
-
await
|
|
254
|
-
body: {
|
|
255
|
-
title: "Fallback Successful",
|
|
256
|
-
message: `Now using ${nextModel.modelID}`,
|
|
257
|
-
variant: "success",
|
|
258
|
-
duration: 3000,
|
|
259
|
-
},
|
|
260
|
-
});
|
|
321
|
+
await toast("Fallback Successful", `Now using ${nextModel.modelID}`, "success");
|
|
261
322
|
|
|
262
323
|
retryState.delete(stateKey);
|
|
263
324
|
// Clear fallback flag to allow next fallback if needed
|
package/package.json
CHANGED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"permissions": {
|
|
3
|
-
"allow": [
|
|
4
|
-
"Bash(npm publish:*)",
|
|
5
|
-
"Bash(npm whoami:*)",
|
|
6
|
-
"Bash(npm config:*)",
|
|
7
|
-
"WebSearch",
|
|
8
|
-
"WebFetch(domain:github.blog)",
|
|
9
|
-
"Bash(npm login:*)",
|
|
10
|
-
"Bash(npm token create:*)",
|
|
11
|
-
"Bash(npm view:*)",
|
|
12
|
-
"Bash(git add:*)",
|
|
13
|
-
"Bash(git commit -m \"$\\(cat <<''EOF''\nPublish as scoped package @azumag/opencode-rate-limit-fallback\n\n- Rename package to @azumag/opencode-rate-limit-fallback\n- Add npm installation instructions to README\n- Add npm version badge\n\nCo-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>\nEOF\n\\)\")",
|
|
14
|
-
"Bash(git push:*)",
|
|
15
|
-
"Bash(git commit:*)"
|
|
16
|
-
]
|
|
17
|
-
}
|
|
18
|
-
}
|