@azumag/opencode-rate-limit-fallback 1.0.4 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ import type { Plugin } from "@opencode-ai/plugin";
2
+ export declare const RateLimitFallback: Plugin;
3
+ export default RateLimitFallback;
package/dist/index.js ADDED
@@ -0,0 +1,316 @@
1
+ import { existsSync, readFileSync } from "fs";
2
+ import { join } from "path";
3
+ const DEFAULT_FALLBACK_MODELS = [
4
+ { providerID: "anthropic", modelID: "claude-sonnet-4-20250514" },
5
+ { providerID: "google", modelID: "gemini-2.5-pro" },
6
+ { providerID: "google", modelID: "gemini-2.5-flash" },
7
+ ];
8
+ const DEFAULT_CONFIG = {
9
+ fallbackModels: DEFAULT_FALLBACK_MODELS,
10
+ cooldownMs: 60 * 1000,
11
+ enabled: true,
12
+ fallbackMode: "cycle",
13
+ };
14
+ function loadConfig(directory) {
15
+ const homedir = process.env.HOME || "";
16
+ const configPaths = [
17
+ join(directory, ".opencode", "rate-limit-fallback.json"),
18
+ join(directory, "rate-limit-fallback.json"),
19
+ join(homedir, ".opencode", "rate-limit-fallback.json"),
20
+ join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
21
+ ];
22
+ for (const configPath of configPaths) {
23
+ if (existsSync(configPath)) {
24
+ try {
25
+ const content = readFileSync(configPath, "utf-8");
26
+ const userConfig = JSON.parse(content);
27
+ const mode = userConfig.fallbackMode;
28
+ const validModes = ["cycle", "stop", "retry-last"];
29
+ return {
30
+ ...DEFAULT_CONFIG,
31
+ ...userConfig,
32
+ fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
33
+ fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
34
+ };
35
+ }
36
+ catch (error) {
37
+ // Config load failed, continue to next path
38
+ }
39
+ }
40
+ }
41
+ return DEFAULT_CONFIG;
42
+ }
43
+ function getModelKey(providerID, modelID) {
44
+ return `${providerID}/${modelID}`;
45
+ }
46
+ function isRateLimitError(error) {
47
+ if (!error)
48
+ return false;
49
+ if (error.name === "APIError" && error.data?.statusCode === 429) {
50
+ return true;
51
+ }
52
+ const responseBody = (error.data?.responseBody || "").toLowerCase();
53
+ const message = (error.data?.message || error.message || "").toLowerCase();
54
+ const errorName = (error.name || "").toLowerCase();
55
+ const rateLimitIndicators = [
56
+ "rate limit",
57
+ "rate_limit",
58
+ "ratelimit",
59
+ "too many requests",
60
+ "quota exceeded",
61
+ "resource exhausted",
62
+ "usage limit",
63
+ "high concurrency usage of this api",
64
+ "high concurrency",
65
+ "reduce concurrency",
66
+ "429",
67
+ ];
68
+ return rateLimitIndicators.some((indicator) => responseBody.includes(indicator) ||
69
+ message.includes(indicator) ||
70
+ errorName.includes(indicator));
71
+ }
72
+ export const RateLimitFallback = async ({ client, directory }) => {
73
+ const config = loadConfig(directory);
74
+ if (!config.enabled) {
75
+ return {};
76
+ }
77
+ const rateLimitedModels = new Map();
78
+ const retryState = new Map();
79
+ const currentSessionModel = new Map();
80
+ const fallbackInProgress = new Map(); // sessionID -> timestamp
81
+ async function logOrToast(message, variant = "info") {
82
+ try {
83
+ await client.tui.showToast({
84
+ body: { message, variant },
85
+ });
86
+ }
87
+ catch {
88
+ const variantMap = {
89
+ info: "info",
90
+ success: "info",
91
+ warning: "warn",
92
+ error: "error",
93
+ };
94
+ await client.app.log({
95
+ body: {
96
+ service: "rate-limit-fallback",
97
+ level: variantMap[variant],
98
+ message,
99
+ },
100
+ });
101
+ }
102
+ }
103
+ async function toast(title, message, variant = "info") {
104
+ try {
105
+ await client.tui.showToast({
106
+ body: { title, message, variant },
107
+ });
108
+ }
109
+ catch {
110
+ const variantMap = {
111
+ info: "info",
112
+ success: "info",
113
+ warning: "warn",
114
+ error: "error",
115
+ };
116
+ await client.app.log({
117
+ body: {
118
+ service: "rate-limit-fallback",
119
+ level: variantMap[variant],
120
+ message: `${title}: ${message}`,
121
+ },
122
+ });
123
+ }
124
+ }
125
+ function isModelRateLimited(providerID, modelID) {
126
+ const key = getModelKey(providerID, modelID);
127
+ const limitedAt = rateLimitedModels.get(key);
128
+ if (!limitedAt)
129
+ return false;
130
+ if (Date.now() - limitedAt > config.cooldownMs) {
131
+ rateLimitedModels.delete(key);
132
+ return false;
133
+ }
134
+ return true;
135
+ }
136
+ function markModelRateLimited(providerID, modelID) {
137
+ const key = getModelKey(providerID, modelID);
138
+ rateLimitedModels.set(key, Date.now());
139
+ }
140
+ function findNextAvailableModel(currentProviderID, currentModelID, attemptedModels) {
141
+ const currentKey = getModelKey(currentProviderID, currentModelID);
142
+ let startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
143
+ // If current model is not in the fallback list, search from the beginning
144
+ if (startIndex === -1) {
145
+ // Only search through all models once (first loop handles this)
146
+ for (let i = 0; i < config.fallbackModels.length; i++) {
147
+ const model = config.fallbackModels[i];
148
+ const key = getModelKey(model.providerID, model.modelID);
149
+ if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
150
+ return model;
151
+ }
152
+ }
153
+ return null;
154
+ }
155
+ // Search for the next model after current position
156
+ for (let i = startIndex + 1; i < config.fallbackModels.length; i++) {
157
+ const model = config.fallbackModels[i];
158
+ const key = getModelKey(model.providerID, model.modelID);
159
+ if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
160
+ return model;
161
+ }
162
+ }
163
+ // Search from the beginning to current position (wrap around)
164
+ for (let i = 0; i <= startIndex && i < config.fallbackModels.length; i++) {
165
+ const model = config.fallbackModels[i];
166
+ const key = getModelKey(model.providerID, model.modelID);
167
+ if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
168
+ return model;
169
+ }
170
+ }
171
+ return null;
172
+ }
173
+ async function handleRateLimitFallback(sessionID, currentProviderID, currentModelID) {
174
+ try {
175
+ // Prevent duplicate fallback processing within 5 seconds
176
+ const lastFallback = fallbackInProgress.get(sessionID);
177
+ if (lastFallback && Date.now() - lastFallback < 5000) {
178
+ return;
179
+ }
180
+ fallbackInProgress.set(sessionID, Date.now());
181
+ // If no model info provided, try to get from tracked session model
182
+ if (!currentProviderID || !currentModelID) {
183
+ const tracked = currentSessionModel.get(sessionID);
184
+ if (tracked) {
185
+ currentProviderID = tracked.providerID;
186
+ currentModelID = tracked.modelID;
187
+ }
188
+ }
189
+ await client.session.abort({ path: { id: sessionID } });
190
+ await toast("Rate Limit Detected", `Switching from ${currentModelID || 'current model'}...`, "warning");
191
+ const messagesResult = await client.session.messages({ path: { id: sessionID } });
192
+ if (!messagesResult.data)
193
+ return;
194
+ const messages = messagesResult.data;
195
+ const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
196
+ if (!lastUserMessage)
197
+ return;
198
+ const stateKey = `${sessionID}:${lastUserMessage.info.id}`;
199
+ let state = retryState.get(stateKey);
200
+ if (!state || Date.now() - state.lastAttemptTime > 30000) {
201
+ state = { attemptedModels: new Set(), lastAttemptTime: Date.now() };
202
+ retryState.set(stateKey, state);
203
+ }
204
+ if (currentProviderID && currentModelID) {
205
+ markModelRateLimited(currentProviderID, currentModelID);
206
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
207
+ }
208
+ let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
209
+ // Handle when no model is found based on fallbackMode
210
+ if (!nextModel && state.attemptedModels.size > 0) {
211
+ if (config.fallbackMode === "cycle") {
212
+ // Reset and retry from the first model
213
+ state.attemptedModels.clear();
214
+ if (currentProviderID && currentModelID) {
215
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
216
+ }
217
+ nextModel = findNextAvailableModel("", "", state.attemptedModels);
218
+ }
219
+ else if (config.fallbackMode === "retry-last") {
220
+ // Try the last model in the list once, then reset on next prompt
221
+ const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
222
+ if (lastModel) {
223
+ const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
224
+ const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
225
+ if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
226
+ // Use the last model for one more try
227
+ nextModel = lastModel;
228
+ await toast("Last Resort", `Trying ${lastModel.modelID} one more time...`, "warning");
229
+ }
230
+ else {
231
+ // Last model also failed, reset for next prompt
232
+ state.attemptedModels.clear();
233
+ if (currentProviderID && currentModelID) {
234
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
235
+ }
236
+ nextModel = findNextAvailableModel("", "", state.attemptedModels);
237
+ }
238
+ }
239
+ }
240
+ // "stop" mode: nextModel remains null, will show error below
241
+ }
242
+ if (!nextModel) {
243
+ await toast("No Fallback Available", config.fallbackMode === "stop"
244
+ ? "All fallback models exhausted"
245
+ : "All models are rate limited", "error");
246
+ retryState.delete(stateKey);
247
+ fallbackInProgress.delete(sessionID);
248
+ return;
249
+ }
250
+ state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
251
+ state.lastAttemptTime = Date.now();
252
+ const parts = lastUserMessage.parts
253
+ .filter((p) => p.type === "text" || p.type === "file")
254
+ .map((p) => {
255
+ if (p.type === "text")
256
+ return { type: "text", text: p.text };
257
+ if (p.type === "file")
258
+ return { type: "file", path: p.path, mediaType: p.mediaType };
259
+ return null;
260
+ })
261
+ .filter(Boolean);
262
+ if (parts.length === 0)
263
+ return;
264
+ await toast("Retrying", `Using ${nextModel.providerID}/${nextModel.modelID}`, "info");
265
+ // Track the new model for this session
266
+ currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
267
+ await client.session.prompt({
268
+ path: { id: sessionID },
269
+ body: {
270
+ parts: parts,
271
+ model: { providerID: nextModel.providerID, modelID: nextModel.modelID },
272
+ },
273
+ });
274
+ await toast("Fallback Successful", `Now using ${nextModel.modelID}`, "success");
275
+ retryState.delete(stateKey);
276
+ // Clear fallback flag to allow next fallback if needed
277
+ fallbackInProgress.delete(sessionID);
278
+ }
279
+ catch (err) {
280
+ // Fallback failed, clear the flag
281
+ fallbackInProgress.delete(sessionID);
282
+ }
283
+ }
284
+ return {
285
+ event: async ({ event }) => {
286
+ if (event.type === "session.error") {
287
+ const { sessionID, error } = event.properties;
288
+ if (sessionID && error && isRateLimitError(error)) {
289
+ await handleRateLimitFallback(sessionID, "", "");
290
+ }
291
+ }
292
+ if (event.type === "message.updated") {
293
+ const info = event.properties?.info;
294
+ if (info?.error && isRateLimitError(info.error)) {
295
+ await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
296
+ }
297
+ }
298
+ if (event.type === "session.status") {
299
+ const props = event.properties;
300
+ const status = props?.status;
301
+ if (status?.type === "retry" && status?.message) {
302
+ const message = status.message.toLowerCase();
303
+ const isRateLimitRetry = message.includes("usage limit") ||
304
+ message.includes("rate limit") ||
305
+ message.includes("high concurrency") ||
306
+ message.includes("reduce concurrency");
307
+ if (isRateLimitRetry) {
308
+ // Try fallback on any attempt, handleRateLimitFallback will manage state
309
+ await handleRateLimitFallback(props.sessionID, "", "");
310
+ }
311
+ }
312
+ }
313
+ },
314
+ };
315
+ };
316
+ export default RateLimitFallback;
package/package.json CHANGED
@@ -1,9 +1,12 @@
1
1
  {
2
2
  "name": "@azumag/opencode-rate-limit-fallback",
3
- "version": "1.0.4",
3
+ "version": "1.0.7",
4
4
  "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
5
- "main": "index.ts",
6
5
  "type": "module",
6
+ "scripts": {
7
+ "build": "tsc",
8
+ "prepublishOnly": "npm run build"
9
+ },
7
10
  "keywords": [
8
11
  "opencode",
9
12
  "plugin",
@@ -21,7 +24,21 @@
21
24
  "url": "https://github.com/azumag/opencode-rate-limit-fallback/issues"
22
25
  },
23
26
  "homepage": "https://github.com/azumag/opencode-rate-limit-fallback#readme",
27
+ "files": [
28
+ "dist"
29
+ ],
30
+ "exports": {
31
+ ".": {
32
+ "import": "./dist/index.js",
33
+ "types": "./dist/index.d.ts"
34
+ }
35
+ },
24
36
  "dependencies": {
25
37
  "@opencode-ai/plugin": "latest"
38
+ },
39
+ "devDependencies": {
40
+ "@tsconfig/node22": "^22.0.5",
41
+ "@types/node": "^25.2.2",
42
+ "typescript": "^5.9.3"
26
43
  }
27
44
  }
@@ -1,18 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(npm publish:*)",
5
- "Bash(npm whoami:*)",
6
- "Bash(npm config:*)",
7
- "WebSearch",
8
- "WebFetch(domain:github.blog)",
9
- "Bash(npm login:*)",
10
- "Bash(npm token create:*)",
11
- "Bash(npm view:*)",
12
- "Bash(git add:*)",
13
- "Bash(git commit -m \"$\\(cat <<''EOF''\nPublish as scoped package @azumag/opencode-rate-limit-fallback\n\n- Rename package to @azumag/opencode-rate-limit-fallback\n- Add npm installation instructions to README\n- Add npm version badge\n\nCo-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>\nEOF\n\\)\")",
14
- "Bash(git push:*)",
15
- "Bash(git commit:*)"
16
- ]
17
- }
18
- }
package/index.ts DELETED
@@ -1,355 +0,0 @@
1
- import type { Plugin } from "@opencode-ai/plugin";
2
- import { existsSync, readFileSync } from "fs";
3
- import { join } from "path";
4
-
5
- interface FallbackModel {
6
- providerID: string;
7
- modelID: string;
8
- }
9
-
10
- /**
11
- * Fallback mode when all models are exhausted:
12
- * - "cycle": Reset and retry from the first model (default)
13
- * - "stop": Stop and show error message
14
- * - "retry-last": Try the last model once, then reset to first on next prompt
15
- */
16
- type FallbackMode = "cycle" | "stop" | "retry-last";
17
-
18
- interface PluginConfig {
19
- fallbackModels: FallbackModel[];
20
- cooldownMs: number;
21
- enabled: boolean;
22
- fallbackMode: FallbackMode;
23
- }
24
-
25
- const DEFAULT_FALLBACK_MODELS: FallbackModel[] = [
26
- { providerID: "anthropic", modelID: "claude-sonnet-4-20250514" },
27
- { providerID: "google", modelID: "gemini-2.5-pro" },
28
- { providerID: "google", modelID: "gemini-2.5-flash" },
29
- ];
30
-
31
- const DEFAULT_CONFIG: PluginConfig = {
32
- fallbackModels: DEFAULT_FALLBACK_MODELS,
33
- cooldownMs: 60 * 1000,
34
- enabled: true,
35
- fallbackMode: "cycle",
36
- };
37
-
38
- function loadConfig(directory: string): PluginConfig {
39
- const homedir = process.env.HOME || "";
40
- const configPaths = [
41
- join(directory, ".opencode", "rate-limit-fallback.json"),
42
- join(directory, "rate-limit-fallback.json"),
43
- join(homedir, ".opencode", "rate-limit-fallback.json"),
44
- join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
45
- ];
46
-
47
- for (const configPath of configPaths) {
48
- if (existsSync(configPath)) {
49
- try {
50
- const content = readFileSync(configPath, "utf-8");
51
- const userConfig = JSON.parse(content);
52
- const mode = userConfig.fallbackMode;
53
- const validModes: FallbackMode[] = ["cycle", "stop", "retry-last"];
54
- return {
55
- ...DEFAULT_CONFIG,
56
- ...userConfig,
57
- fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
58
- fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
59
- };
60
- } catch (error) {
61
- // Config load failed, continue to next path
62
- }
63
- }
64
- }
65
-
66
- return DEFAULT_CONFIG;
67
- }
68
-
69
- function getModelKey(providerID: string, modelID: string): string {
70
- return `${providerID}/${modelID}`;
71
- }
72
-
73
- function isRateLimitError(error: any): boolean {
74
- if (!error) return false;
75
-
76
- if (error.name === "APIError" && error.data?.statusCode === 429) {
77
- return true;
78
- }
79
-
80
- const responseBody = (error.data?.responseBody || "").toLowerCase();
81
- const message = (error.data?.message || error.message || "").toLowerCase();
82
- const errorName = (error.name || "").toLowerCase();
83
-
84
- const rateLimitIndicators = [
85
- "rate limit",
86
- "rate_limit",
87
- "ratelimit",
88
- "too many requests",
89
- "quota exceeded",
90
- "resource exhausted",
91
- "usage limit",
92
- "high concurrency usage of this api",
93
- "high concurrency",
94
- "reduce concurrency",
95
- "429",
96
- ];
97
-
98
- return rateLimitIndicators.some(
99
- (indicator) =>
100
- responseBody.includes(indicator) ||
101
- message.includes(indicator) ||
102
- errorName.includes(indicator)
103
- );
104
- }
105
-
106
- export const RateLimitFallback: Plugin = async ({ client, directory }) => {
107
- const config = loadConfig(directory);
108
-
109
- if (!config.enabled) {
110
- return {};
111
- }
112
-
113
- const rateLimitedModels = new Map<string, number>();
114
- const retryState = new Map<string, { attemptedModels: Set<string>; lastAttemptTime: number }>();
115
- const currentSessionModel = new Map<string, { providerID: string; modelID: string }>();
116
- const fallbackInProgress = new Map<string, number>(); // sessionID -> timestamp
117
-
118
- function isModelRateLimited(providerID: string, modelID: string): boolean {
119
- const key = getModelKey(providerID, modelID);
120
- const limitedAt = rateLimitedModels.get(key);
121
- if (!limitedAt) return false;
122
- if (Date.now() - limitedAt > config.cooldownMs) {
123
- rateLimitedModels.delete(key);
124
- return false;
125
- }
126
- return true;
127
- }
128
-
129
- function markModelRateLimited(providerID: string, modelID: string): void {
130
- const key = getModelKey(providerID, modelID);
131
- rateLimitedModels.set(key, Date.now());
132
- }
133
-
134
- function findNextAvailableModel(currentProviderID: string, currentModelID: string, attemptedModels: Set<string>): FallbackModel | null {
135
- const currentKey = getModelKey(currentProviderID, currentModelID);
136
- let startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
137
- if (startIndex === -1) startIndex = -1;
138
-
139
- for (let i = startIndex + 1; i < config.fallbackModels.length; i++) {
140
- const model = config.fallbackModels[i];
141
- const key = getModelKey(model.providerID, model.modelID);
142
- if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
143
- return model;
144
- }
145
- }
146
-
147
- for (let i = 0; i <= startIndex && i < config.fallbackModels.length; i++) {
148
- const model = config.fallbackModels[i];
149
- const key = getModelKey(model.providerID, model.modelID);
150
- if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
151
- return model;
152
- }
153
- }
154
-
155
- return null;
156
- }
157
-
158
- async function handleRateLimitFallback(sessionID: string, currentProviderID: string, currentModelID: string) {
159
- try {
160
- // Prevent duplicate fallback processing within 5 seconds
161
- const lastFallback = fallbackInProgress.get(sessionID);
162
- if (lastFallback && Date.now() - lastFallback < 5000) {
163
- return;
164
- }
165
- fallbackInProgress.set(sessionID, Date.now());
166
-
167
- // If no model info provided, try to get from tracked session model
168
- if (!currentProviderID || !currentModelID) {
169
- const tracked = currentSessionModel.get(sessionID);
170
- if (tracked) {
171
- currentProviderID = tracked.providerID;
172
- currentModelID = tracked.modelID;
173
- }
174
- }
175
-
176
- await client.session.abort({ path: { id: sessionID } });
177
-
178
- await client.tui.showToast({
179
- body: {
180
- title: "Rate Limit Detected",
181
- message: `Switching from ${currentModelID || 'current model'}...`,
182
- variant: "warning",
183
- duration: 3000,
184
- },
185
- });
186
-
187
- const messagesResult = await client.session.messages({ path: { id: sessionID } });
188
- if (!messagesResult.data) return;
189
-
190
- const messages = messagesResult.data;
191
- const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
192
- if (!lastUserMessage) return;
193
-
194
- const stateKey = `${sessionID}:${lastUserMessage.info.id}`;
195
- let state = retryState.get(stateKey);
196
-
197
- if (!state || Date.now() - state.lastAttemptTime > 30000) {
198
- state = { attemptedModels: new Set<string>(), lastAttemptTime: Date.now() };
199
- retryState.set(stateKey, state);
200
- }
201
-
202
- if (currentProviderID && currentModelID) {
203
- markModelRateLimited(currentProviderID, currentModelID);
204
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
205
- }
206
-
207
- let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
208
-
209
- // Handle when no model is found based on fallbackMode
210
- if (!nextModel && state.attemptedModels.size > 0) {
211
- if (config.fallbackMode === "cycle") {
212
- // Reset and retry from the first model
213
- state.attemptedModels.clear();
214
- if (currentProviderID && currentModelID) {
215
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
216
- }
217
- nextModel = findNextAvailableModel("", "", state.attemptedModels);
218
- } else if (config.fallbackMode === "retry-last") {
219
- // Try the last model in the list once, then reset on next prompt
220
- const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
221
- if (lastModel) {
222
- const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
223
- const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
224
-
225
- if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
226
- // Use the last model for one more try
227
- nextModel = lastModel;
228
- await client.tui.showToast({
229
- body: {
230
- title: "Last Resort",
231
- message: `Trying ${lastModel.modelID} one more time...`,
232
- variant: "warning",
233
- duration: 3000,
234
- },
235
- });
236
- } else {
237
- // Last model also failed, reset for next prompt
238
- state.attemptedModels.clear();
239
- if (currentProviderID && currentModelID) {
240
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
241
- }
242
- nextModel = findNextAvailableModel("", "", state.attemptedModels);
243
- }
244
- }
245
- }
246
- // "stop" mode: nextModel remains null, will show error below
247
- }
248
-
249
- if (!nextModel) {
250
- await client.tui.showToast({
251
- body: {
252
- title: "No Fallback Available",
253
- message: config.fallbackMode === "stop"
254
- ? "All fallback models exhausted"
255
- : "All models are rate limited",
256
- variant: "error",
257
- duration: 5000,
258
- },
259
- });
260
- retryState.delete(stateKey);
261
- fallbackInProgress.delete(sessionID);
262
- return;
263
- }
264
-
265
- state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
266
- state.lastAttemptTime = Date.now();
267
-
268
- const parts = lastUserMessage.parts
269
- .filter((p: any) => p.type === "text" || p.type === "file")
270
- .map((p: any) => {
271
- if (p.type === "text") return { type: "text" as const, text: p.text };
272
- if (p.type === "file") return { type: "file" as const, path: p.path, mediaType: p.mediaType };
273
- return null;
274
- })
275
- .filter(Boolean);
276
-
277
- if (parts.length === 0) return;
278
-
279
- await client.tui.showToast({
280
- body: {
281
- title: "Retrying",
282
- message: `Using ${nextModel.providerID}/${nextModel.modelID}`,
283
- variant: "info",
284
- duration: 3000,
285
- },
286
- });
287
-
288
- // Track the new model for this session
289
- currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
290
-
291
- await client.session.prompt({
292
- path: { id: sessionID },
293
- body: {
294
- parts: parts as any,
295
- model: { providerID: nextModel.providerID, modelID: nextModel.modelID },
296
- },
297
- });
298
-
299
- await client.tui.showToast({
300
- body: {
301
- title: "Fallback Successful",
302
- message: `Now using ${nextModel.modelID}`,
303
- variant: "success",
304
- duration: 3000,
305
- },
306
- });
307
-
308
- retryState.delete(stateKey);
309
- // Clear fallback flag to allow next fallback if needed
310
- fallbackInProgress.delete(sessionID);
311
- } catch (err) {
312
- // Fallback failed, clear the flag
313
- fallbackInProgress.delete(sessionID);
314
- }
315
- }
316
-
317
- return {
318
- event: async ({ event }) => {
319
- if (event.type === "session.error") {
320
- const { sessionID, error } = event.properties as any;
321
- if (sessionID && error && isRateLimitError(error)) {
322
- await handleRateLimitFallback(sessionID, "", "");
323
- }
324
- }
325
-
326
- if (event.type === "message.updated") {
327
- const info = (event.properties as any)?.info;
328
- if (info?.error && isRateLimitError(info.error)) {
329
- await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
330
- }
331
- }
332
-
333
- if (event.type === "session.status") {
334
- const props = event.properties as any;
335
- const status = props?.status;
336
-
337
- if (status?.type === "retry" && status?.message) {
338
- const message = status.message.toLowerCase();
339
- const isRateLimitRetry =
340
- message.includes("usage limit") ||
341
- message.includes("rate limit") ||
342
- message.includes("high concurrency") ||
343
- message.includes("reduce concurrency");
344
-
345
- if (isRateLimitRetry) {
346
- // Try fallback on any attempt, handleRateLimitFallback will manage state
347
- await handleRateLimitFallback(props.sessionID, "", "");
348
- }
349
- }
350
- }
351
- },
352
- };
353
- };
354
-
355
- export default RateLimitFallback;
@@ -1,10 +0,0 @@
1
- {
2
- "enabled": true,
3
- "cooldownMs": 60000,
4
- "fallbackMode": "cycle",
5
- "fallbackModels": [
6
- { "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
7
- { "providerID": "google", "modelID": "gemini-2.5-pro" },
8
- { "providerID": "google", "modelID": "gemini-2.5-flash" }
9
- ]
10
- }