@azumag/opencode-rate-limit-fallback 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,9 +6,11 @@ OpenCode plugin that automatically switches to fallback models when rate limited
6
6
 
7
7
  ## Features
8
8
 
9
- - Detects rate limit errors (429, "usage limit", "quota exceeded", etc.)
9
+ - Detects rate limit errors (429, "usage limit", "quota exceeded", "high concurrency", etc.)
10
10
  - Automatically aborts the current request and retries with a fallback model
11
11
  - Configurable fallback model list with priority order
12
+ - Three fallback modes: `cycle`, `stop`, and `retry-last`
13
+ - Session model tracking for sequential fallback across multiple rate limits
12
14
  - Cooldown period to prevent immediate retry on rate-limited models
13
15
  - Toast notifications for user feedback
14
16
 
@@ -51,6 +53,7 @@ Create a configuration file at one of these locations:
51
53
  {
52
54
  "enabled": true,
53
55
  "cooldownMs": 60000,
56
+ "fallbackMode": "cycle",
54
57
  "fallbackModels": [
55
58
  { "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
56
59
  { "providerID": "google", "modelID": "gemini-2.5-pro" },
@@ -65,8 +68,17 @@ Create a configuration file at one of these locations:
65
68
  |--------|------|---------|-------------|
66
69
  | `enabled` | boolean | `true` | Enable/disable the plugin |
67
70
  | `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
71
+ | `fallbackMode` | string | `"cycle"` | Behavior when all models are exhausted (see below) |
68
72
  | `fallbackModels` | array | See below | List of fallback models in priority order |
69
73
 
74
+ ### Fallback Modes
75
+
76
+ | Mode | Description |
77
+ |------|-------------|
78
+ | `"cycle"` | Reset and retry from the first model when all models are exhausted (default) |
79
+ | `"stop"` | Stop and show error when all models are exhausted |
80
+ | `"retry-last"` | Try the last model once more, then reset to first on next prompt |
81
+
70
82
  ### Default Fallback Models
71
83
 
72
84
  If no configuration is provided, the following models are used:
package/index.ts CHANGED
@@ -7,10 +7,19 @@ interface FallbackModel {
7
7
  modelID: string;
8
8
  }
9
9
 
10
+ /**
11
+ * Fallback mode when all models are exhausted:
12
+ * - "cycle": Reset and retry from the first model (default)
13
+ * - "stop": Stop and show error message
14
+ * - "retry-last": Try the last model once, then reset to first on next prompt
15
+ */
16
+ type FallbackMode = "cycle" | "stop" | "retry-last";
17
+
10
18
  interface PluginConfig {
11
19
  fallbackModels: FallbackModel[];
12
20
  cooldownMs: number;
13
21
  enabled: boolean;
22
+ fallbackMode: FallbackMode;
14
23
  }
15
24
 
16
25
  const DEFAULT_FALLBACK_MODELS: FallbackModel[] = [
@@ -23,6 +32,7 @@ const DEFAULT_CONFIG: PluginConfig = {
23
32
  fallbackModels: DEFAULT_FALLBACK_MODELS,
24
33
  cooldownMs: 60 * 1000,
25
34
  enabled: true,
35
+ fallbackMode: "cycle",
26
36
  };
27
37
 
28
38
  function loadConfig(directory: string): PluginConfig {
@@ -39,10 +49,13 @@ function loadConfig(directory: string): PluginConfig {
39
49
  try {
40
50
  const content = readFileSync(configPath, "utf-8");
41
51
  const userConfig = JSON.parse(content);
52
+ const mode = userConfig.fallbackMode;
53
+ const validModes: FallbackMode[] = ["cycle", "stop", "retry-last"];
42
54
  return {
43
55
  ...DEFAULT_CONFIG,
44
56
  ...userConfig,
45
57
  fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
58
+ fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
46
59
  };
47
60
  } catch (error) {
48
61
  // Config load failed, continue to next path
@@ -76,7 +89,9 @@ function isRateLimitError(error: any): boolean {
76
89
  "quota exceeded",
77
90
  "resource exhausted",
78
91
  "usage limit",
79
- "High concurrency usage of this API",
92
+ "high concurrency usage of this api",
93
+ "high concurrency",
94
+ "reduce concurrency",
80
95
  "429",
81
96
  ];
82
97
 
@@ -97,6 +112,8 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
97
112
 
98
113
  const rateLimitedModels = new Map<string, number>();
99
114
  const retryState = new Map<string, { attemptedModels: Set<string>; lastAttemptTime: number }>();
115
+ const currentSessionModel = new Map<string, { providerID: string; modelID: string }>();
116
+ const fallbackInProgress = new Map<string, number>(); // sessionID -> timestamp
100
117
 
101
118
  function isModelRateLimited(providerID: string, modelID: string): boolean {
102
119
  const key = getModelKey(providerID, modelID);
@@ -140,12 +157,28 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
140
157
 
141
158
  async function handleRateLimitFallback(sessionID: string, currentProviderID: string, currentModelID: string) {
142
159
  try {
160
+ // Prevent duplicate fallback processing within 5 seconds
161
+ const lastFallback = fallbackInProgress.get(sessionID);
162
+ if (lastFallback && Date.now() - lastFallback < 5000) {
163
+ return;
164
+ }
165
+ fallbackInProgress.set(sessionID, Date.now());
166
+
167
+ // If no model info provided, try to get from tracked session model
168
+ if (!currentProviderID || !currentModelID) {
169
+ const tracked = currentSessionModel.get(sessionID);
170
+ if (tracked) {
171
+ currentProviderID = tracked.providerID;
172
+ currentModelID = tracked.modelID;
173
+ }
174
+ }
175
+
143
176
  await client.session.abort({ path: { id: sessionID } });
144
177
 
145
178
  await client.tui.showToast({
146
179
  body: {
147
180
  title: "Rate Limit Detected",
148
- message: "Switching to fallback model...",
181
+ message: `Switching from ${currentModelID || 'current model'}...`,
149
182
  variant: "warning",
150
183
  duration: 3000,
151
184
  },
@@ -171,18 +204,61 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
171
204
  state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
172
205
  }
173
206
 
174
- const nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
207
+ let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
208
+
209
+ // Handle when no model is found based on fallbackMode
210
+ if (!nextModel && state.attemptedModels.size > 0) {
211
+ if (config.fallbackMode === "cycle") {
212
+ // Reset and retry from the first model
213
+ state.attemptedModels.clear();
214
+ if (currentProviderID && currentModelID) {
215
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
216
+ }
217
+ nextModel = findNextAvailableModel("", "", state.attemptedModels);
218
+ } else if (config.fallbackMode === "retry-last") {
219
+ // Try the last model in the list once, then reset on next prompt
220
+ const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
221
+ if (lastModel) {
222
+ const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
223
+ const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
224
+
225
+ if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
226
+ // Use the last model for one more try
227
+ nextModel = lastModel;
228
+ await client.tui.showToast({
229
+ body: {
230
+ title: "Last Resort",
231
+ message: `Trying ${lastModel.modelID} one more time...`,
232
+ variant: "warning",
233
+ duration: 3000,
234
+ },
235
+ });
236
+ } else {
237
+ // Last model also failed, reset for next prompt
238
+ state.attemptedModels.clear();
239
+ if (currentProviderID && currentModelID) {
240
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
241
+ }
242
+ nextModel = findNextAvailableModel("", "", state.attemptedModels);
243
+ }
244
+ }
245
+ }
246
+ // "stop" mode: nextModel remains null, will show error below
247
+ }
175
248
 
176
249
  if (!nextModel) {
177
250
  await client.tui.showToast({
178
251
  body: {
179
252
  title: "No Fallback Available",
180
- message: "All models are rate limited",
253
+ message: config.fallbackMode === "stop"
254
+ ? "All fallback models exhausted"
255
+ : "All models are rate limited",
181
256
  variant: "error",
182
257
  duration: 5000,
183
258
  },
184
259
  });
185
260
  retryState.delete(stateKey);
261
+ fallbackInProgress.delete(sessionID);
186
262
  return;
187
263
  }
188
264
 
@@ -209,6 +285,9 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
209
285
  },
210
286
  });
211
287
 
288
+ // Track the new model for this session
289
+ currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
290
+
212
291
  await client.session.prompt({
213
292
  path: { id: sessionID },
214
293
  body: {
@@ -227,8 +306,11 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
227
306
  });
228
307
 
229
308
  retryState.delete(stateKey);
309
+ // Clear fallback flag to allow next fallback if needed
310
+ fallbackInProgress.delete(sessionID);
230
311
  } catch (err) {
231
- // Fallback failed silently
312
+ // Fallback failed, clear the flag
313
+ fallbackInProgress.delete(sessionID);
232
314
  }
233
315
  }
234
316
 
@@ -254,10 +336,15 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
254
336
 
255
337
  if (status?.type === "retry" && status?.message) {
256
338
  const message = status.message.toLowerCase();
257
- if (message.includes("usage limit") || message.includes("rate limit")) {
258
- if (status.attempt === 1) {
259
- await handleRateLimitFallback(props.sessionID, "", "");
260
- }
339
+ const isRateLimitRetry =
340
+ message.includes("usage limit") ||
341
+ message.includes("rate limit") ||
342
+ message.includes("high concurrency") ||
343
+ message.includes("reduce concurrency");
344
+
345
+ if (isRateLimitRetry) {
346
+ // Try fallback on any attempt, handleRateLimitFallback will manage state
347
+ await handleRateLimitFallback(props.sessionID, "", "");
261
348
  }
262
349
  }
263
350
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@azumag/opencode-rate-limit-fallback",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
5
5
  "main": "index.ts",
6
6
  "type": "module",
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "enabled": true,
3
3
  "cooldownMs": 60000,
4
+ "fallbackMode": "cycle",
4
5
  "fallbackModels": [
5
6
  { "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
6
7
  { "providerID": "google", "modelID": "gemini-2.5-pro" },