@azumag/opencode-rate-limit-fallback 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ import type { Plugin } from "@opencode-ai/plugin";
2
+ export declare const RateLimitFallback: Plugin;
3
+ export default RateLimitFallback;
package/dist/index.js ADDED
@@ -0,0 +1,316 @@
1
+ import { existsSync, readFileSync } from "fs";
2
+ import { join } from "path";
3
+ const DEFAULT_FALLBACK_MODELS = [
4
+ { providerID: "anthropic", modelID: "claude-sonnet-4-20250514" },
5
+ { providerID: "google", modelID: "gemini-2.5-pro" },
6
+ { providerID: "google", modelID: "gemini-2.5-flash" },
7
+ ];
8
+ const DEFAULT_CONFIG = {
9
+ fallbackModels: DEFAULT_FALLBACK_MODELS,
10
+ cooldownMs: 60 * 1000,
11
+ enabled: true,
12
+ fallbackMode: "cycle",
13
+ };
14
+ function loadConfig(directory) {
15
+ const homedir = process.env.HOME || "";
16
+ const configPaths = [
17
+ join(directory, ".opencode", "rate-limit-fallback.json"),
18
+ join(directory, "rate-limit-fallback.json"),
19
+ join(homedir, ".opencode", "rate-limit-fallback.json"),
20
+ join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
21
+ ];
22
+ for (const configPath of configPaths) {
23
+ if (existsSync(configPath)) {
24
+ try {
25
+ const content = readFileSync(configPath, "utf-8");
26
+ const userConfig = JSON.parse(content);
27
+ const mode = userConfig.fallbackMode;
28
+ const validModes = ["cycle", "stop", "retry-last"];
29
+ return {
30
+ ...DEFAULT_CONFIG,
31
+ ...userConfig,
32
+ fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
33
+ fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
34
+ };
35
+ }
36
+ catch (error) {
37
+ // Config load failed, continue to next path
38
+ }
39
+ }
40
+ }
41
+ return DEFAULT_CONFIG;
42
+ }
43
+ function getModelKey(providerID, modelID) {
44
+ return `${providerID}/${modelID}`;
45
+ }
46
+ function isRateLimitError(error) {
47
+ if (!error)
48
+ return false;
49
+ if (error.name === "APIError" && error.data?.statusCode === 429) {
50
+ return true;
51
+ }
52
+ const responseBody = (error.data?.responseBody || "").toLowerCase();
53
+ const message = (error.data?.message || error.message || "").toLowerCase();
54
+ const errorName = (error.name || "").toLowerCase();
55
+ const rateLimitIndicators = [
56
+ "rate limit",
57
+ "rate_limit",
58
+ "ratelimit",
59
+ "too many requests",
60
+ "quota exceeded",
61
+ "resource exhausted",
62
+ "usage limit",
63
+ "high concurrency usage of this api",
64
+ "high concurrency",
65
+ "reduce concurrency",
66
+ "429",
67
+ ];
68
+ return rateLimitIndicators.some((indicator) => responseBody.includes(indicator) ||
69
+ message.includes(indicator) ||
70
+ errorName.includes(indicator));
71
+ }
72
+ export const RateLimitFallback = async ({ client, directory }) => {
73
+ const config = loadConfig(directory);
74
+ if (!config.enabled) {
75
+ return {};
76
+ }
77
+ const rateLimitedModels = new Map();
78
+ const retryState = new Map();
79
+ const currentSessionModel = new Map();
80
+ const fallbackInProgress = new Map(); // sessionID -> timestamp
81
+ async function logOrToast(message, variant = "info") {
82
+ try {
83
+ await client.tui.showToast({
84
+ body: { message, variant },
85
+ });
86
+ }
87
+ catch {
88
+ const variantMap = {
89
+ info: "info",
90
+ success: "info",
91
+ warning: "warn",
92
+ error: "error",
93
+ };
94
+ await client.app.log({
95
+ body: {
96
+ service: "rate-limit-fallback",
97
+ level: variantMap[variant],
98
+ message,
99
+ },
100
+ });
101
+ }
102
+ }
103
+ async function toast(title, message, variant = "info") {
104
+ try {
105
+ await client.tui.showToast({
106
+ body: { title, message, variant },
107
+ });
108
+ }
109
+ catch {
110
+ const variantMap = {
111
+ info: "info",
112
+ success: "info",
113
+ warning: "warn",
114
+ error: "error",
115
+ };
116
+ await client.app.log({
117
+ body: {
118
+ service: "rate-limit-fallback",
119
+ level: variantMap[variant],
120
+ message: `${title}: ${message}`,
121
+ },
122
+ });
123
+ }
124
+ }
125
+ function isModelRateLimited(providerID, modelID) {
126
+ const key = getModelKey(providerID, modelID);
127
+ const limitedAt = rateLimitedModels.get(key);
128
+ if (!limitedAt)
129
+ return false;
130
+ if (Date.now() - limitedAt > config.cooldownMs) {
131
+ rateLimitedModels.delete(key);
132
+ return false;
133
+ }
134
+ return true;
135
+ }
136
+ function markModelRateLimited(providerID, modelID) {
137
+ const key = getModelKey(providerID, modelID);
138
+ rateLimitedModels.set(key, Date.now());
139
+ }
140
+ function findNextAvailableModel(currentProviderID, currentModelID, attemptedModels) {
141
+ const currentKey = getModelKey(currentProviderID, currentModelID);
142
+ let startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
143
+ // If current model is not in the fallback list, search from the beginning
144
+ if (startIndex === -1) {
145
+ // Only search through all models once (first loop handles this)
146
+ for (let i = 0; i < config.fallbackModels.length; i++) {
147
+ const model = config.fallbackModels[i];
148
+ const key = getModelKey(model.providerID, model.modelID);
149
+ if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
150
+ return model;
151
+ }
152
+ }
153
+ return null;
154
+ }
155
+ // Search for the next model after current position
156
+ for (let i = startIndex + 1; i < config.fallbackModels.length; i++) {
157
+ const model = config.fallbackModels[i];
158
+ const key = getModelKey(model.providerID, model.modelID);
159
+ if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
160
+ return model;
161
+ }
162
+ }
163
+ // Search from the beginning to current position (wrap around)
164
+ for (let i = 0; i <= startIndex && i < config.fallbackModels.length; i++) {
165
+ const model = config.fallbackModels[i];
166
+ const key = getModelKey(model.providerID, model.modelID);
167
+ if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
168
+ return model;
169
+ }
170
+ }
171
+ return null;
172
+ }
173
+ async function handleRateLimitFallback(sessionID, currentProviderID, currentModelID) {
174
+ try {
175
+ // Prevent duplicate fallback processing within 5 seconds
176
+ const lastFallback = fallbackInProgress.get(sessionID);
177
+ if (lastFallback && Date.now() - lastFallback < 5000) {
178
+ return;
179
+ }
180
+ fallbackInProgress.set(sessionID, Date.now());
181
+ // If no model info provided, try to get from tracked session model
182
+ if (!currentProviderID || !currentModelID) {
183
+ const tracked = currentSessionModel.get(sessionID);
184
+ if (tracked) {
185
+ currentProviderID = tracked.providerID;
186
+ currentModelID = tracked.modelID;
187
+ }
188
+ }
189
+ await client.session.abort({ path: { id: sessionID } });
190
+ await toast("Rate Limit Detected", `Switching from ${currentModelID || 'current model'}...`, "warning");
191
+ const messagesResult = await client.session.messages({ path: { id: sessionID } });
192
+ if (!messagesResult.data)
193
+ return;
194
+ const messages = messagesResult.data;
195
+ const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
196
+ if (!lastUserMessage)
197
+ return;
198
+ const stateKey = `${sessionID}:${lastUserMessage.info.id}`;
199
+ let state = retryState.get(stateKey);
200
+ if (!state || Date.now() - state.lastAttemptTime > 30000) {
201
+ state = { attemptedModels: new Set(), lastAttemptTime: Date.now() };
202
+ retryState.set(stateKey, state);
203
+ }
204
+ if (currentProviderID && currentModelID) {
205
+ markModelRateLimited(currentProviderID, currentModelID);
206
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
207
+ }
208
+ let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
209
+ // Handle when no model is found based on fallbackMode
210
+ if (!nextModel && state.attemptedModels.size > 0) {
211
+ if (config.fallbackMode === "cycle") {
212
+ // Reset and retry from the first model
213
+ state.attemptedModels.clear();
214
+ if (currentProviderID && currentModelID) {
215
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
216
+ }
217
+ nextModel = findNextAvailableModel("", "", state.attemptedModels);
218
+ }
219
+ else if (config.fallbackMode === "retry-last") {
220
+ // Try the last model in the list once, then reset on next prompt
221
+ const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
222
+ if (lastModel) {
223
+ const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
224
+ const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
225
+ if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
226
+ // Use the last model for one more try
227
+ nextModel = lastModel;
228
+ await toast("Last Resort", `Trying ${lastModel.modelID} one more time...`, "warning");
229
+ }
230
+ else {
231
+ // Last model also failed, reset for next prompt
232
+ state.attemptedModels.clear();
233
+ if (currentProviderID && currentModelID) {
234
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
235
+ }
236
+ nextModel = findNextAvailableModel("", "", state.attemptedModels);
237
+ }
238
+ }
239
+ }
240
+ // "stop" mode: nextModel remains null, will show error below
241
+ }
242
+ if (!nextModel) {
243
+ await toast("No Fallback Available", config.fallbackMode === "stop"
244
+ ? "All fallback models exhausted"
245
+ : "All models are rate limited", "error");
246
+ retryState.delete(stateKey);
247
+ fallbackInProgress.delete(sessionID);
248
+ return;
249
+ }
250
+ state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
251
+ state.lastAttemptTime = Date.now();
252
+ const parts = lastUserMessage.parts
253
+ .filter((p) => p.type === "text" || p.type === "file")
254
+ .map((p) => {
255
+ if (p.type === "text")
256
+ return { type: "text", text: p.text };
257
+ if (p.type === "file")
258
+ return { type: "file", path: p.path, mediaType: p.mediaType };
259
+ return null;
260
+ })
261
+ .filter(Boolean);
262
+ if (parts.length === 0)
263
+ return;
264
+ await toast("Retrying", `Using ${nextModel.providerID}/${nextModel.modelID}`, "info");
265
+ // Track the new model for this session
266
+ currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
267
+ await client.session.prompt({
268
+ path: { id: sessionID },
269
+ body: {
270
+ parts: parts,
271
+ model: { providerID: nextModel.providerID, modelID: nextModel.modelID },
272
+ },
273
+ });
274
+ await toast("Fallback Successful", `Now using ${nextModel.modelID}`, "success");
275
+ retryState.delete(stateKey);
276
+ // Clear fallback flag to allow next fallback if needed
277
+ fallbackInProgress.delete(sessionID);
278
+ }
279
+ catch (err) {
280
+ // Fallback failed, clear the flag
281
+ fallbackInProgress.delete(sessionID);
282
+ }
283
+ }
284
+ return {
285
+ event: async ({ event }) => {
286
+ if (event.type === "session.error") {
287
+ const { sessionID, error } = event.properties;
288
+ if (sessionID && error && isRateLimitError(error)) {
289
+ await handleRateLimitFallback(sessionID, "", "");
290
+ }
291
+ }
292
+ if (event.type === "message.updated") {
293
+ const info = event.properties?.info;
294
+ if (info?.error && isRateLimitError(info.error)) {
295
+ await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
296
+ }
297
+ }
298
+ if (event.type === "session.status") {
299
+ const props = event.properties;
300
+ const status = props?.status;
301
+ if (status?.type === "retry" && status?.message) {
302
+ const message = status.message.toLowerCase();
303
+ const isRateLimitRetry = message.includes("usage limit") ||
304
+ message.includes("rate limit") ||
305
+ message.includes("high concurrency") ||
306
+ message.includes("reduce concurrency");
307
+ if (isRateLimitRetry) {
308
+ // Try fallback on any attempt, handleRateLimitFallback will manage state
309
+ await handleRateLimitFallback(props.sessionID, "", "");
310
+ }
311
+ }
312
+ }
313
+ },
314
+ };
315
+ };
316
+ export default RateLimitFallback;
package/package.json CHANGED
@@ -1,9 +1,12 @@
1
1
  {
2
2
  "name": "@azumag/opencode-rate-limit-fallback",
3
- "version": "1.0.6",
3
+ "version": "1.0.8",
4
4
  "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
5
- "main": "index.ts",
6
5
  "type": "module",
6
+ "scripts": {
7
+ "build": "tsc",
8
+ "prepublishOnly": "npm run build"
9
+ },
7
10
  "keywords": [
8
11
  "opencode",
9
12
  "plugin",
@@ -21,7 +24,21 @@
21
24
  "url": "https://github.com/azumag/opencode-rate-limit-fallback/issues"
22
25
  },
23
26
  "homepage": "https://github.com/azumag/opencode-rate-limit-fallback#readme",
27
+ "files": [
28
+ "dist"
29
+ ],
30
+ "exports": {
31
+ ".": {
32
+ "import": "./dist/index.js",
33
+ "types": "./dist/index.d.ts"
34
+ }
35
+ },
24
36
  "dependencies": {
25
37
  "@opencode-ai/plugin": "latest"
38
+ },
39
+ "devDependencies": {
40
+ "@tsconfig/node22": "^22.0.5",
41
+ "@types/node": "^25.2.2",
42
+ "typescript": "^5.9.3"
26
43
  }
27
44
  }
@@ -1,21 +0,0 @@
1
- name: npm publish
2
-
3
- on:
4
- push:
5
- branches:
6
- - main
7
-
8
- jobs:
9
- publish:
10
- runs-on: ubuntu-latest
11
- steps:
12
- - uses: actions/checkout@v4
13
- - uses: actions/setup-node@v4
14
- with:
15
- node-version: '20'
16
- registry-url: 'https://registry.npmjs.org'
17
-
18
- - run: npm install
19
- - run: npm publish
20
- env:
21
- NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
package/index.ts DELETED
@@ -1,370 +0,0 @@
1
- import type { Plugin } from "@opencode-ai/plugin";
2
- import { existsSync, readFileSync } from "fs";
3
- import { join } from "path";
4
-
5
- interface FallbackModel {
6
- providerID: string;
7
- modelID: string;
8
- }
9
-
10
- /**
11
- * Fallback mode when all models are exhausted:
12
- * - "cycle": Reset and retry from the first model (default)
13
- * - "stop": Stop and show error message
14
- * - "retry-last": Try the last model once, then reset to first on next prompt
15
- */
16
- type FallbackMode = "cycle" | "stop" | "retry-last";
17
-
18
- interface PluginConfig {
19
- fallbackModels: FallbackModel[];
20
- cooldownMs: number;
21
- enabled: boolean;
22
- fallbackMode: FallbackMode;
23
- }
24
-
25
- const DEFAULT_FALLBACK_MODELS: FallbackModel[] = [
26
- { providerID: "anthropic", modelID: "claude-sonnet-4-20250514" },
27
- { providerID: "google", modelID: "gemini-2.5-pro" },
28
- { providerID: "google", modelID: "gemini-2.5-flash" },
29
- ];
30
-
31
- const DEFAULT_CONFIG: PluginConfig = {
32
- fallbackModels: DEFAULT_FALLBACK_MODELS,
33
- cooldownMs: 60 * 1000,
34
- enabled: true,
35
- fallbackMode: "cycle",
36
- };
37
-
38
- function loadConfig(directory: string): PluginConfig {
39
- const homedir = process.env.HOME || "";
40
- const configPaths = [
41
- join(directory, ".opencode", "rate-limit-fallback.json"),
42
- join(directory, "rate-limit-fallback.json"),
43
- join(homedir, ".opencode", "rate-limit-fallback.json"),
44
- join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
45
- ];
46
-
47
- for (const configPath of configPaths) {
48
- if (existsSync(configPath)) {
49
- try {
50
- const content = readFileSync(configPath, "utf-8");
51
- const userConfig = JSON.parse(content);
52
- const mode = userConfig.fallbackMode;
53
- const validModes: FallbackMode[] = ["cycle", "stop", "retry-last"];
54
- return {
55
- ...DEFAULT_CONFIG,
56
- ...userConfig,
57
- fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
58
- fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
59
- };
60
- } catch (error) {
61
- // Config load failed, continue to next path
62
- }
63
- }
64
- }
65
-
66
- return DEFAULT_CONFIG;
67
- }
68
-
69
- function getModelKey(providerID: string, modelID: string): string {
70
- return `${providerID}/${modelID}`;
71
- }
72
-
73
- function isRateLimitError(error: any): boolean {
74
- if (!error) return false;
75
-
76
- if (error.name === "APIError" && error.data?.statusCode === 429) {
77
- return true;
78
- }
79
-
80
- const responseBody = (error.data?.responseBody || "").toLowerCase();
81
- const message = (error.data?.message || error.message || "").toLowerCase();
82
- const errorName = (error.name || "").toLowerCase();
83
-
84
- const rateLimitIndicators = [
85
- "rate limit",
86
- "rate_limit",
87
- "ratelimit",
88
- "too many requests",
89
- "quota exceeded",
90
- "resource exhausted",
91
- "usage limit",
92
- "high concurrency usage of this api",
93
- "high concurrency",
94
- "reduce concurrency",
95
- "429",
96
- ];
97
-
98
- return rateLimitIndicators.some(
99
- (indicator) =>
100
- responseBody.includes(indicator) ||
101
- message.includes(indicator) ||
102
- errorName.includes(indicator)
103
- );
104
- }
105
-
106
- export const RateLimitFallback: Plugin = async ({ client, directory }) => {
107
- const config = loadConfig(directory);
108
-
109
- if (!config.enabled) {
110
- return {};
111
- }
112
-
113
- const rateLimitedModels = new Map<string, number>();
114
- const retryState = new Map<string, { attemptedModels: Set<string>; lastAttemptTime: number }>();
115
- const currentSessionModel = new Map<string, { providerID: string; modelID: string }>();
116
- const fallbackInProgress = new Map<string, number>(); // sessionID -> timestamp
117
-
118
- async function logOrToast(message: string, variant: "info" | "success" | "warning" | "error" = "info") {
119
- try {
120
- await client.tui.showToast({
121
- body: { message, variant },
122
- });
123
- } catch {
124
- await client.app.log({
125
- body: {
126
- service: "rate-limit-fallback",
127
- level: variant,
128
- message,
129
- },
130
- });
131
- }
132
- }
133
-
134
- async function toast(title: string, message: string, variant: "info" | "success" | "warning" | "error" = "info") {
135
- try {
136
- await client.tui.showToast({
137
- body: { title, message, variant },
138
- });
139
- } catch {
140
- await client.app.log({
141
- body: {
142
- service: "rate-limit-fallback",
143
- level: variant,
144
- message: `${title}: ${message}`,
145
- },
146
- });
147
- }
148
- }
149
-
150
- function isModelRateLimited(providerID: string, modelID: string): boolean {
151
- const key = getModelKey(providerID, modelID);
152
- const limitedAt = rateLimitedModels.get(key);
153
- if (!limitedAt) return false;
154
- if (Date.now() - limitedAt > config.cooldownMs) {
155
- rateLimitedModels.delete(key);
156
- return false;
157
- }
158
- return true;
159
- }
160
-
161
- function markModelRateLimited(providerID: string, modelID: string): void {
162
- const key = getModelKey(providerID, modelID);
163
- rateLimitedModels.set(key, Date.now());
164
- }
165
-
166
- function findNextAvailableModel(currentProviderID: string, currentModelID: string, attemptedModels: Set<string>): FallbackModel | null {
167
- const currentKey = getModelKey(currentProviderID, currentModelID);
168
- let startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
169
-
170
- // If current model is not in the fallback list, search from the beginning
171
- if (startIndex === -1) {
172
- // Only search through all models once (first loop handles this)
173
- for (let i = 0; i < config.fallbackModels.length; i++) {
174
- const model = config.fallbackModels[i];
175
- const key = getModelKey(model.providerID, model.modelID);
176
- if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
177
- return model;
178
- }
179
- }
180
- return null;
181
- }
182
-
183
- // Search for the next model after current position
184
- for (let i = startIndex + 1; i < config.fallbackModels.length; i++) {
185
- const model = config.fallbackModels[i];
186
- const key = getModelKey(model.providerID, model.modelID);
187
- if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
188
- return model;
189
- }
190
- }
191
-
192
- // Search from the beginning to current position (wrap around)
193
- for (let i = 0; i <= startIndex && i < config.fallbackModels.length; i++) {
194
- const model = config.fallbackModels[i];
195
- const key = getModelKey(model.providerID, model.modelID);
196
- if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
197
- return model;
198
- }
199
- }
200
-
201
- return null;
202
- }
203
-
204
- async function handleRateLimitFallback(sessionID: string, currentProviderID: string, currentModelID: string) {
205
- try {
206
- // Prevent duplicate fallback processing within 5 seconds
207
- const lastFallback = fallbackInProgress.get(sessionID);
208
- if (lastFallback && Date.now() - lastFallback < 5000) {
209
- return;
210
- }
211
- fallbackInProgress.set(sessionID, Date.now());
212
-
213
- // If no model info provided, try to get from tracked session model
214
- if (!currentProviderID || !currentModelID) {
215
- const tracked = currentSessionModel.get(sessionID);
216
- if (tracked) {
217
- currentProviderID = tracked.providerID;
218
- currentModelID = tracked.modelID;
219
- }
220
- }
221
-
222
- await client.session.abort({ path: { id: sessionID } });
223
-
224
- await toast("Rate Limit Detected", `Switching from ${currentModelID || 'current model'}...`, "warning");
225
-
226
- const messagesResult = await client.session.messages({ path: { id: sessionID } });
227
- if (!messagesResult.data) return;
228
-
229
- const messages = messagesResult.data;
230
- const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
231
- if (!lastUserMessage) return;
232
-
233
- const stateKey = `${sessionID}:${lastUserMessage.info.id}`;
234
- let state = retryState.get(stateKey);
235
-
236
- if (!state || Date.now() - state.lastAttemptTime > 30000) {
237
- state = { attemptedModels: new Set<string>(), lastAttemptTime: Date.now() };
238
- retryState.set(stateKey, state);
239
- }
240
-
241
- if (currentProviderID && currentModelID) {
242
- markModelRateLimited(currentProviderID, currentModelID);
243
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
244
- }
245
-
246
- let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
247
-
248
- // Handle when no model is found based on fallbackMode
249
- if (!nextModel && state.attemptedModels.size > 0) {
250
- if (config.fallbackMode === "cycle") {
251
- // Reset and retry from the first model
252
- state.attemptedModels.clear();
253
- if (currentProviderID && currentModelID) {
254
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
255
- }
256
- nextModel = findNextAvailableModel("", "", state.attemptedModels);
257
- } else if (config.fallbackMode === "retry-last") {
258
- // Try the last model in the list once, then reset on next prompt
259
- const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
260
- if (lastModel) {
261
- const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
262
- const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
263
-
264
- if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
265
- // Use the last model for one more try
266
- nextModel = lastModel;
267
- await toast("Last Resort", `Trying ${lastModel.modelID} one more time...`, "warning");
268
- } else {
269
- // Last model also failed, reset for next prompt
270
- state.attemptedModels.clear();
271
- if (currentProviderID && currentModelID) {
272
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
273
- }
274
- nextModel = findNextAvailableModel("", "", state.attemptedModels);
275
- }
276
- }
277
- }
278
- // "stop" mode: nextModel remains null, will show error below
279
- }
280
-
281
- if (!nextModel) {
282
- await toast(
283
- "No Fallback Available",
284
- config.fallbackMode === "stop"
285
- ? "All fallback models exhausted"
286
- : "All models are rate limited",
287
- "error"
288
- );
289
- retryState.delete(stateKey);
290
- fallbackInProgress.delete(sessionID);
291
- return;
292
- }
293
-
294
- state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
295
- state.lastAttemptTime = Date.now();
296
-
297
- const parts = lastUserMessage.parts
298
- .filter((p: any) => p.type === "text" || p.type === "file")
299
- .map((p: any) => {
300
- if (p.type === "text") return { type: "text" as const, text: p.text };
301
- if (p.type === "file") return { type: "file" as const, path: p.path, mediaType: p.mediaType };
302
- return null;
303
- })
304
- .filter(Boolean);
305
-
306
- if (parts.length === 0) return;
307
-
308
- await toast("Retrying", `Using ${nextModel.providerID}/${nextModel.modelID}`, "info");
309
-
310
- // Track the new model for this session
311
- currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
312
-
313
- await client.session.prompt({
314
- path: { id: sessionID },
315
- body: {
316
- parts: parts as any,
317
- model: { providerID: nextModel.providerID, modelID: nextModel.modelID },
318
- },
319
- });
320
-
321
- await toast("Fallback Successful", `Now using ${nextModel.modelID}`, "success");
322
-
323
- retryState.delete(stateKey);
324
- // Clear fallback flag to allow next fallback if needed
325
- fallbackInProgress.delete(sessionID);
326
- } catch (err) {
327
- // Fallback failed, clear the flag
328
- fallbackInProgress.delete(sessionID);
329
- }
330
- }
331
-
332
- return {
333
- event: async ({ event }) => {
334
- if (event.type === "session.error") {
335
- const { sessionID, error } = event.properties as any;
336
- if (sessionID && error && isRateLimitError(error)) {
337
- await handleRateLimitFallback(sessionID, "", "");
338
- }
339
- }
340
-
341
- if (event.type === "message.updated") {
342
- const info = (event.properties as any)?.info;
343
- if (info?.error && isRateLimitError(info.error)) {
344
- await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
345
- }
346
- }
347
-
348
- if (event.type === "session.status") {
349
- const props = event.properties as any;
350
- const status = props?.status;
351
-
352
- if (status?.type === "retry" && status?.message) {
353
- const message = status.message.toLowerCase();
354
- const isRateLimitRetry =
355
- message.includes("usage limit") ||
356
- message.includes("rate limit") ||
357
- message.includes("high concurrency") ||
358
- message.includes("reduce concurrency");
359
-
360
- if (isRateLimitRetry) {
361
- // Try fallback on any attempt, handleRateLimitFallback will manage state
362
- await handleRateLimitFallback(props.sessionID, "", "");
363
- }
364
- }
365
- }
366
- },
367
- };
368
- };
369
-
370
- export default RateLimitFallback;
@@ -1,10 +0,0 @@
1
- {
2
- "enabled": true,
3
- "cooldownMs": 60000,
4
- "fallbackMode": "cycle",
5
- "fallbackModels": [
6
- { "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
7
- { "providerID": "google", "modelID": "gemini-2.5-pro" },
8
- { "providerID": "google", "modelID": "gemini-2.5-flash" }
9
- ]
10
- }