@azumag/opencode-rate-limit-fallback 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,9 +6,11 @@ OpenCode plugin that automatically switches to fallback models when rate limited
6
6
 
7
7
  ## Features
8
8
 
9
- - Detects rate limit errors (429, "usage limit", "quota exceeded", etc.)
9
+ - Detects rate limit errors (429, "usage limit", "quota exceeded", "high concurrency", etc.)
10
10
  - Automatically aborts the current request and retries with a fallback model
11
11
  - Configurable fallback model list with priority order
12
+ - Three fallback modes: `cycle`, `stop`, and `retry-last`
13
+ - Session model tracking for sequential fallback across multiple rate limits
12
14
  - Cooldown period to prevent immediate retry on rate-limited models
13
15
  - Toast notifications for user feedback
14
16
 
@@ -51,6 +53,7 @@ Create a configuration file at one of these locations:
51
53
  {
52
54
  "enabled": true,
53
55
  "cooldownMs": 60000,
56
+ "fallbackMode": "cycle",
54
57
  "fallbackModels": [
55
58
  { "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
56
59
  { "providerID": "google", "modelID": "gemini-2.5-pro" },
@@ -65,8 +68,17 @@ Create a configuration file at one of these locations:
65
68
  |--------|------|---------|-------------|
66
69
  | `enabled` | boolean | `true` | Enable/disable the plugin |
67
70
  | `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
71
+ | `fallbackMode` | string | `"cycle"` | Behavior when all models are exhausted (see below) |
68
72
  | `fallbackModels` | array | See below | List of fallback models in priority order |
69
73
 
74
+ ### Fallback Modes
75
+
76
+ | Mode | Description |
77
+ |------|-------------|
78
+ | `"cycle"` | Reset and retry from the first model when all models are exhausted (default) |
79
+ | `"stop"` | Stop and show error when all models are exhausted |
80
+ | `"retry-last"` | Try the last model once more, then reset to first on next prompt |
81
+
70
82
  ### Default Fallback Models
71
83
 
72
84
  If no configuration is provided, the following models are used:
package/index.ts CHANGED
@@ -7,10 +7,19 @@ interface FallbackModel {
7
7
  modelID: string;
8
8
  }
9
9
 
10
+ /**
11
+ * Fallback mode when all models are exhausted:
12
+ * - "cycle": Reset and retry from the first model (default)
13
+ * - "stop": Stop and show error message
14
+ * - "retry-last": Try the last model once, then reset to first on next prompt
15
+ */
16
+ type FallbackMode = "cycle" | "stop" | "retry-last";
17
+
10
18
  interface PluginConfig {
11
19
  fallbackModels: FallbackModel[];
12
20
  cooldownMs: number;
13
21
  enabled: boolean;
22
+ fallbackMode: FallbackMode;
14
23
  }
15
24
 
16
25
  const DEFAULT_FALLBACK_MODELS: FallbackModel[] = [
@@ -23,6 +32,7 @@ const DEFAULT_CONFIG: PluginConfig = {
23
32
  fallbackModels: DEFAULT_FALLBACK_MODELS,
24
33
  cooldownMs: 60 * 1000,
25
34
  enabled: true,
35
+ fallbackMode: "cycle",
26
36
  };
27
37
 
28
38
  function loadConfig(directory: string): PluginConfig {
@@ -39,10 +49,13 @@ function loadConfig(directory: string): PluginConfig {
39
49
  try {
40
50
  const content = readFileSync(configPath, "utf-8");
41
51
  const userConfig = JSON.parse(content);
52
+ const mode = userConfig.fallbackMode;
53
+ const validModes: FallbackMode[] = ["cycle", "stop", "retry-last"];
42
54
  return {
43
55
  ...DEFAULT_CONFIG,
44
56
  ...userConfig,
45
57
  fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
58
+ fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
46
59
  };
47
60
  } catch (error) {
48
61
  // Config load failed, continue to next path
@@ -193,26 +206,59 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
193
206
 
194
207
  let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
195
208
 
196
- // If no model found and we've attempted models, reset and try again from the beginning
209
+ // Handle when no model is found based on fallbackMode
197
210
  if (!nextModel && state.attemptedModels.size > 0) {
198
- state.attemptedModels.clear();
199
- // Keep the current model marked as attempted to avoid immediate retry
200
- if (currentProviderID && currentModelID) {
201
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
211
+ if (config.fallbackMode === "cycle") {
212
+ // Reset and retry from the first model
213
+ state.attemptedModels.clear();
214
+ if (currentProviderID && currentModelID) {
215
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
216
+ }
217
+ nextModel = findNextAvailableModel("", "", state.attemptedModels);
218
+ } else if (config.fallbackMode === "retry-last") {
219
+ // Try the last model in the list once, then reset on next prompt
220
+ const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
221
+ if (lastModel) {
222
+ const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
223
+ const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
224
+
225
+ if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
226
+ // Use the last model for one more try
227
+ nextModel = lastModel;
228
+ await client.tui.showToast({
229
+ body: {
230
+ title: "Last Resort",
231
+ message: `Trying ${lastModel.modelID} one more time...`,
232
+ variant: "warning",
233
+ duration: 3000,
234
+ },
235
+ });
236
+ } else {
237
+ // Last model also failed, reset for next prompt
238
+ state.attemptedModels.clear();
239
+ if (currentProviderID && currentModelID) {
240
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
241
+ }
242
+ nextModel = findNextAvailableModel("", "", state.attemptedModels);
243
+ }
244
+ }
202
245
  }
203
- nextModel = findNextAvailableModel("", "", state.attemptedModels);
246
+ // "stop" mode: nextModel remains null, will show error below
204
247
  }
205
248
 
206
249
  if (!nextModel) {
207
250
  await client.tui.showToast({
208
251
  body: {
209
252
  title: "No Fallback Available",
210
- message: "All models are rate limited",
253
+ message: config.fallbackMode === "stop"
254
+ ? "All fallback models exhausted"
255
+ : "All models are rate limited",
211
256
  variant: "error",
212
257
  duration: 5000,
213
258
  },
214
259
  });
215
260
  retryState.delete(stateKey);
261
+ fallbackInProgress.delete(sessionID);
216
262
  return;
217
263
  }
218
264
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@azumag/opencode-rate-limit-fallback",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
5
5
  "main": "index.ts",
6
6
  "type": "module",
@@ -1,6 +1,7 @@
1
1
  {
2
2
  "enabled": true,
3
3
  "cooldownMs": 60000,
4
+ "fallbackMode": "cycle",
4
5
  "fallbackModels": [
5
6
  { "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
6
7
  { "providerID": "google", "modelID": "gemini-2.5-pro" },