@azumag/opencode-rate-limit-fallback 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +3 -0
- package/dist/index.js +316 -0
- package/package.json +19 -2
- package/.github/workflows/npm-publish.yml +0 -21
- package/index.ts +0 -370
- package/rate-limit-fallback.example.json +0 -10
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "fs";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
const DEFAULT_FALLBACK_MODELS = [
|
|
4
|
+
{ providerID: "anthropic", modelID: "claude-sonnet-4-20250514" },
|
|
5
|
+
{ providerID: "google", modelID: "gemini-2.5-pro" },
|
|
6
|
+
{ providerID: "google", modelID: "gemini-2.5-flash" },
|
|
7
|
+
];
|
|
8
|
+
const DEFAULT_CONFIG = {
|
|
9
|
+
fallbackModels: DEFAULT_FALLBACK_MODELS,
|
|
10
|
+
cooldownMs: 60 * 1000,
|
|
11
|
+
enabled: true,
|
|
12
|
+
fallbackMode: "cycle",
|
|
13
|
+
};
|
|
14
|
+
function loadConfig(directory) {
|
|
15
|
+
const homedir = process.env.HOME || "";
|
|
16
|
+
const configPaths = [
|
|
17
|
+
join(directory, ".opencode", "rate-limit-fallback.json"),
|
|
18
|
+
join(directory, "rate-limit-fallback.json"),
|
|
19
|
+
join(homedir, ".opencode", "rate-limit-fallback.json"),
|
|
20
|
+
join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
|
|
21
|
+
];
|
|
22
|
+
for (const configPath of configPaths) {
|
|
23
|
+
if (existsSync(configPath)) {
|
|
24
|
+
try {
|
|
25
|
+
const content = readFileSync(configPath, "utf-8");
|
|
26
|
+
const userConfig = JSON.parse(content);
|
|
27
|
+
const mode = userConfig.fallbackMode;
|
|
28
|
+
const validModes = ["cycle", "stop", "retry-last"];
|
|
29
|
+
return {
|
|
30
|
+
...DEFAULT_CONFIG,
|
|
31
|
+
...userConfig,
|
|
32
|
+
fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
|
|
33
|
+
fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
catch (error) {
|
|
37
|
+
// Config load failed, continue to next path
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return DEFAULT_CONFIG;
|
|
42
|
+
}
|
|
43
|
+
function getModelKey(providerID, modelID) {
|
|
44
|
+
return `${providerID}/${modelID}`;
|
|
45
|
+
}
|
|
46
|
+
function isRateLimitError(error) {
|
|
47
|
+
if (!error)
|
|
48
|
+
return false;
|
|
49
|
+
if (error.name === "APIError" && error.data?.statusCode === 429) {
|
|
50
|
+
return true;
|
|
51
|
+
}
|
|
52
|
+
const responseBody = (error.data?.responseBody || "").toLowerCase();
|
|
53
|
+
const message = (error.data?.message || error.message || "").toLowerCase();
|
|
54
|
+
const errorName = (error.name || "").toLowerCase();
|
|
55
|
+
const rateLimitIndicators = [
|
|
56
|
+
"rate limit",
|
|
57
|
+
"rate_limit",
|
|
58
|
+
"ratelimit",
|
|
59
|
+
"too many requests",
|
|
60
|
+
"quota exceeded",
|
|
61
|
+
"resource exhausted",
|
|
62
|
+
"usage limit",
|
|
63
|
+
"high concurrency usage of this api",
|
|
64
|
+
"high concurrency",
|
|
65
|
+
"reduce concurrency",
|
|
66
|
+
"429",
|
|
67
|
+
];
|
|
68
|
+
return rateLimitIndicators.some((indicator) => responseBody.includes(indicator) ||
|
|
69
|
+
message.includes(indicator) ||
|
|
70
|
+
errorName.includes(indicator));
|
|
71
|
+
}
|
|
72
|
+
export const RateLimitFallback = async ({ client, directory }) => {
|
|
73
|
+
const config = loadConfig(directory);
|
|
74
|
+
if (!config.enabled) {
|
|
75
|
+
return {};
|
|
76
|
+
}
|
|
77
|
+
const rateLimitedModels = new Map();
|
|
78
|
+
const retryState = new Map();
|
|
79
|
+
const currentSessionModel = new Map();
|
|
80
|
+
const fallbackInProgress = new Map(); // sessionID -> timestamp
|
|
81
|
+
async function logOrToast(message, variant = "info") {
|
|
82
|
+
try {
|
|
83
|
+
await client.tui.showToast({
|
|
84
|
+
body: { message, variant },
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
catch {
|
|
88
|
+
const variantMap = {
|
|
89
|
+
info: "info",
|
|
90
|
+
success: "info",
|
|
91
|
+
warning: "warn",
|
|
92
|
+
error: "error",
|
|
93
|
+
};
|
|
94
|
+
await client.app.log({
|
|
95
|
+
body: {
|
|
96
|
+
service: "rate-limit-fallback",
|
|
97
|
+
level: variantMap[variant],
|
|
98
|
+
message,
|
|
99
|
+
},
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
async function toast(title, message, variant = "info") {
|
|
104
|
+
try {
|
|
105
|
+
await client.tui.showToast({
|
|
106
|
+
body: { title, message, variant },
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
const variantMap = {
|
|
111
|
+
info: "info",
|
|
112
|
+
success: "info",
|
|
113
|
+
warning: "warn",
|
|
114
|
+
error: "error",
|
|
115
|
+
};
|
|
116
|
+
await client.app.log({
|
|
117
|
+
body: {
|
|
118
|
+
service: "rate-limit-fallback",
|
|
119
|
+
level: variantMap[variant],
|
|
120
|
+
message: `${title}: ${message}`,
|
|
121
|
+
},
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
function isModelRateLimited(providerID, modelID) {
|
|
126
|
+
const key = getModelKey(providerID, modelID);
|
|
127
|
+
const limitedAt = rateLimitedModels.get(key);
|
|
128
|
+
if (!limitedAt)
|
|
129
|
+
return false;
|
|
130
|
+
if (Date.now() - limitedAt > config.cooldownMs) {
|
|
131
|
+
rateLimitedModels.delete(key);
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
return true;
|
|
135
|
+
}
|
|
136
|
+
function markModelRateLimited(providerID, modelID) {
|
|
137
|
+
const key = getModelKey(providerID, modelID);
|
|
138
|
+
rateLimitedModels.set(key, Date.now());
|
|
139
|
+
}
|
|
140
|
+
function findNextAvailableModel(currentProviderID, currentModelID, attemptedModels) {
|
|
141
|
+
const currentKey = getModelKey(currentProviderID, currentModelID);
|
|
142
|
+
let startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
|
|
143
|
+
// If current model is not in the fallback list, search from the beginning
|
|
144
|
+
if (startIndex === -1) {
|
|
145
|
+
// Only search through all models once (first loop handles this)
|
|
146
|
+
for (let i = 0; i < config.fallbackModels.length; i++) {
|
|
147
|
+
const model = config.fallbackModels[i];
|
|
148
|
+
const key = getModelKey(model.providerID, model.modelID);
|
|
149
|
+
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
150
|
+
return model;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
// Search for the next model after current position
|
|
156
|
+
for (let i = startIndex + 1; i < config.fallbackModels.length; i++) {
|
|
157
|
+
const model = config.fallbackModels[i];
|
|
158
|
+
const key = getModelKey(model.providerID, model.modelID);
|
|
159
|
+
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
160
|
+
return model;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
// Search from the beginning to current position (wrap around)
|
|
164
|
+
for (let i = 0; i <= startIndex && i < config.fallbackModels.length; i++) {
|
|
165
|
+
const model = config.fallbackModels[i];
|
|
166
|
+
const key = getModelKey(model.providerID, model.modelID);
|
|
167
|
+
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
168
|
+
return model;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return null;
|
|
172
|
+
}
|
|
173
|
+
async function handleRateLimitFallback(sessionID, currentProviderID, currentModelID) {
|
|
174
|
+
try {
|
|
175
|
+
// Prevent duplicate fallback processing within 5 seconds
|
|
176
|
+
const lastFallback = fallbackInProgress.get(sessionID);
|
|
177
|
+
if (lastFallback && Date.now() - lastFallback < 5000) {
|
|
178
|
+
return;
|
|
179
|
+
}
|
|
180
|
+
fallbackInProgress.set(sessionID, Date.now());
|
|
181
|
+
// If no model info provided, try to get from tracked session model
|
|
182
|
+
if (!currentProviderID || !currentModelID) {
|
|
183
|
+
const tracked = currentSessionModel.get(sessionID);
|
|
184
|
+
if (tracked) {
|
|
185
|
+
currentProviderID = tracked.providerID;
|
|
186
|
+
currentModelID = tracked.modelID;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
await client.session.abort({ path: { id: sessionID } });
|
|
190
|
+
await toast("Rate Limit Detected", `Switching from ${currentModelID || 'current model'}...`, "warning");
|
|
191
|
+
const messagesResult = await client.session.messages({ path: { id: sessionID } });
|
|
192
|
+
if (!messagesResult.data)
|
|
193
|
+
return;
|
|
194
|
+
const messages = messagesResult.data;
|
|
195
|
+
const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
|
|
196
|
+
if (!lastUserMessage)
|
|
197
|
+
return;
|
|
198
|
+
const stateKey = `${sessionID}:${lastUserMessage.info.id}`;
|
|
199
|
+
let state = retryState.get(stateKey);
|
|
200
|
+
if (!state || Date.now() - state.lastAttemptTime > 30000) {
|
|
201
|
+
state = { attemptedModels: new Set(), lastAttemptTime: Date.now() };
|
|
202
|
+
retryState.set(stateKey, state);
|
|
203
|
+
}
|
|
204
|
+
if (currentProviderID && currentModelID) {
|
|
205
|
+
markModelRateLimited(currentProviderID, currentModelID);
|
|
206
|
+
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
207
|
+
}
|
|
208
|
+
let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
|
|
209
|
+
// Handle when no model is found based on fallbackMode
|
|
210
|
+
if (!nextModel && state.attemptedModels.size > 0) {
|
|
211
|
+
if (config.fallbackMode === "cycle") {
|
|
212
|
+
// Reset and retry from the first model
|
|
213
|
+
state.attemptedModels.clear();
|
|
214
|
+
if (currentProviderID && currentModelID) {
|
|
215
|
+
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
216
|
+
}
|
|
217
|
+
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
218
|
+
}
|
|
219
|
+
else if (config.fallbackMode === "retry-last") {
|
|
220
|
+
// Try the last model in the list once, then reset on next prompt
|
|
221
|
+
const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
|
|
222
|
+
if (lastModel) {
|
|
223
|
+
const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
|
|
224
|
+
const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
|
|
225
|
+
if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
|
|
226
|
+
// Use the last model for one more try
|
|
227
|
+
nextModel = lastModel;
|
|
228
|
+
await toast("Last Resort", `Trying ${lastModel.modelID} one more time...`, "warning");
|
|
229
|
+
}
|
|
230
|
+
else {
|
|
231
|
+
// Last model also failed, reset for next prompt
|
|
232
|
+
state.attemptedModels.clear();
|
|
233
|
+
if (currentProviderID && currentModelID) {
|
|
234
|
+
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
235
|
+
}
|
|
236
|
+
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
// "stop" mode: nextModel remains null, will show error below
|
|
241
|
+
}
|
|
242
|
+
if (!nextModel) {
|
|
243
|
+
await toast("No Fallback Available", config.fallbackMode === "stop"
|
|
244
|
+
? "All fallback models exhausted"
|
|
245
|
+
: "All models are rate limited", "error");
|
|
246
|
+
retryState.delete(stateKey);
|
|
247
|
+
fallbackInProgress.delete(sessionID);
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
|
|
251
|
+
state.lastAttemptTime = Date.now();
|
|
252
|
+
const parts = lastUserMessage.parts
|
|
253
|
+
.filter((p) => p.type === "text" || p.type === "file")
|
|
254
|
+
.map((p) => {
|
|
255
|
+
if (p.type === "text")
|
|
256
|
+
return { type: "text", text: p.text };
|
|
257
|
+
if (p.type === "file")
|
|
258
|
+
return { type: "file", path: p.path, mediaType: p.mediaType };
|
|
259
|
+
return null;
|
|
260
|
+
})
|
|
261
|
+
.filter(Boolean);
|
|
262
|
+
if (parts.length === 0)
|
|
263
|
+
return;
|
|
264
|
+
await toast("Retrying", `Using ${nextModel.providerID}/${nextModel.modelID}`, "info");
|
|
265
|
+
// Track the new model for this session
|
|
266
|
+
currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
|
|
267
|
+
await client.session.prompt({
|
|
268
|
+
path: { id: sessionID },
|
|
269
|
+
body: {
|
|
270
|
+
parts: parts,
|
|
271
|
+
model: { providerID: nextModel.providerID, modelID: nextModel.modelID },
|
|
272
|
+
},
|
|
273
|
+
});
|
|
274
|
+
await toast("Fallback Successful", `Now using ${nextModel.modelID}`, "success");
|
|
275
|
+
retryState.delete(stateKey);
|
|
276
|
+
// Clear fallback flag to allow next fallback if needed
|
|
277
|
+
fallbackInProgress.delete(sessionID);
|
|
278
|
+
}
|
|
279
|
+
catch (err) {
|
|
280
|
+
// Fallback failed, clear the flag
|
|
281
|
+
fallbackInProgress.delete(sessionID);
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return {
|
|
285
|
+
event: async ({ event }) => {
|
|
286
|
+
if (event.type === "session.error") {
|
|
287
|
+
const { sessionID, error } = event.properties;
|
|
288
|
+
if (sessionID && error && isRateLimitError(error)) {
|
|
289
|
+
await handleRateLimitFallback(sessionID, "", "");
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
if (event.type === "message.updated") {
|
|
293
|
+
const info = event.properties?.info;
|
|
294
|
+
if (info?.error && isRateLimitError(info.error)) {
|
|
295
|
+
await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
if (event.type === "session.status") {
|
|
299
|
+
const props = event.properties;
|
|
300
|
+
const status = props?.status;
|
|
301
|
+
if (status?.type === "retry" && status?.message) {
|
|
302
|
+
const message = status.message.toLowerCase();
|
|
303
|
+
const isRateLimitRetry = message.includes("usage limit") ||
|
|
304
|
+
message.includes("rate limit") ||
|
|
305
|
+
message.includes("high concurrency") ||
|
|
306
|
+
message.includes("reduce concurrency");
|
|
307
|
+
if (isRateLimitRetry) {
|
|
308
|
+
// Try fallback on any attempt, handleRateLimitFallback will manage state
|
|
309
|
+
await handleRateLimitFallback(props.sessionID, "", "");
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
},
|
|
314
|
+
};
|
|
315
|
+
};
|
|
316
|
+
export default RateLimitFallback;
|
package/package.json
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@azumag/opencode-rate-limit-fallback",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.8",
|
|
4
4
|
"description": "OpenCode plugin that automatically switches to fallback models when rate limited",
|
|
5
|
-
"main": "index.ts",
|
|
6
5
|
"type": "module",
|
|
6
|
+
"scripts": {
|
|
7
|
+
"build": "tsc",
|
|
8
|
+
"prepublishOnly": "npm run build"
|
|
9
|
+
},
|
|
7
10
|
"keywords": [
|
|
8
11
|
"opencode",
|
|
9
12
|
"plugin",
|
|
@@ -21,7 +24,21 @@
|
|
|
21
24
|
"url": "https://github.com/azumag/opencode-rate-limit-fallback/issues"
|
|
22
25
|
},
|
|
23
26
|
"homepage": "https://github.com/azumag/opencode-rate-limit-fallback#readme",
|
|
27
|
+
"files": [
|
|
28
|
+
"dist"
|
|
29
|
+
],
|
|
30
|
+
"exports": {
|
|
31
|
+
".": {
|
|
32
|
+
"import": "./dist/index.js",
|
|
33
|
+
"types": "./dist/index.d.ts"
|
|
34
|
+
}
|
|
35
|
+
},
|
|
24
36
|
"dependencies": {
|
|
25
37
|
"@opencode-ai/plugin": "latest"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@tsconfig/node22": "^22.0.5",
|
|
41
|
+
"@types/node": "^25.2.2",
|
|
42
|
+
"typescript": "^5.9.3"
|
|
26
43
|
}
|
|
27
44
|
}
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
name: npm publish
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
branches:
|
|
6
|
-
- main
|
|
7
|
-
|
|
8
|
-
jobs:
|
|
9
|
-
publish:
|
|
10
|
-
runs-on: ubuntu-latest
|
|
11
|
-
steps:
|
|
12
|
-
- uses: actions/checkout@v4
|
|
13
|
-
- uses: actions/setup-node@v4
|
|
14
|
-
with:
|
|
15
|
-
node-version: '20'
|
|
16
|
-
registry-url: 'https://registry.npmjs.org'
|
|
17
|
-
|
|
18
|
-
- run: npm install
|
|
19
|
-
- run: npm publish
|
|
20
|
-
env:
|
|
21
|
-
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
package/index.ts
DELETED
|
@@ -1,370 +0,0 @@
|
|
|
1
|
-
import type { Plugin } from "@opencode-ai/plugin";
|
|
2
|
-
import { existsSync, readFileSync } from "fs";
|
|
3
|
-
import { join } from "path";
|
|
4
|
-
|
|
5
|
-
interface FallbackModel {
|
|
6
|
-
providerID: string;
|
|
7
|
-
modelID: string;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Fallback mode when all models are exhausted:
|
|
12
|
-
* - "cycle": Reset and retry from the first model (default)
|
|
13
|
-
* - "stop": Stop and show error message
|
|
14
|
-
* - "retry-last": Try the last model once, then reset to first on next prompt
|
|
15
|
-
*/
|
|
16
|
-
type FallbackMode = "cycle" | "stop" | "retry-last";
|
|
17
|
-
|
|
18
|
-
interface PluginConfig {
|
|
19
|
-
fallbackModels: FallbackModel[];
|
|
20
|
-
cooldownMs: number;
|
|
21
|
-
enabled: boolean;
|
|
22
|
-
fallbackMode: FallbackMode;
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
const DEFAULT_FALLBACK_MODELS: FallbackModel[] = [
|
|
26
|
-
{ providerID: "anthropic", modelID: "claude-sonnet-4-20250514" },
|
|
27
|
-
{ providerID: "google", modelID: "gemini-2.5-pro" },
|
|
28
|
-
{ providerID: "google", modelID: "gemini-2.5-flash" },
|
|
29
|
-
];
|
|
30
|
-
|
|
31
|
-
const DEFAULT_CONFIG: PluginConfig = {
|
|
32
|
-
fallbackModels: DEFAULT_FALLBACK_MODELS,
|
|
33
|
-
cooldownMs: 60 * 1000,
|
|
34
|
-
enabled: true,
|
|
35
|
-
fallbackMode: "cycle",
|
|
36
|
-
};
|
|
37
|
-
|
|
38
|
-
function loadConfig(directory: string): PluginConfig {
|
|
39
|
-
const homedir = process.env.HOME || "";
|
|
40
|
-
const configPaths = [
|
|
41
|
-
join(directory, ".opencode", "rate-limit-fallback.json"),
|
|
42
|
-
join(directory, "rate-limit-fallback.json"),
|
|
43
|
-
join(homedir, ".opencode", "rate-limit-fallback.json"),
|
|
44
|
-
join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
|
|
45
|
-
];
|
|
46
|
-
|
|
47
|
-
for (const configPath of configPaths) {
|
|
48
|
-
if (existsSync(configPath)) {
|
|
49
|
-
try {
|
|
50
|
-
const content = readFileSync(configPath, "utf-8");
|
|
51
|
-
const userConfig = JSON.parse(content);
|
|
52
|
-
const mode = userConfig.fallbackMode;
|
|
53
|
-
const validModes: FallbackMode[] = ["cycle", "stop", "retry-last"];
|
|
54
|
-
return {
|
|
55
|
-
...DEFAULT_CONFIG,
|
|
56
|
-
...userConfig,
|
|
57
|
-
fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
|
|
58
|
-
fallbackMode: validModes.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
|
|
59
|
-
};
|
|
60
|
-
} catch (error) {
|
|
61
|
-
// Config load failed, continue to next path
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
return DEFAULT_CONFIG;
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
function getModelKey(providerID: string, modelID: string): string {
|
|
70
|
-
return `${providerID}/${modelID}`;
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
function isRateLimitError(error: any): boolean {
|
|
74
|
-
if (!error) return false;
|
|
75
|
-
|
|
76
|
-
if (error.name === "APIError" && error.data?.statusCode === 429) {
|
|
77
|
-
return true;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
const responseBody = (error.data?.responseBody || "").toLowerCase();
|
|
81
|
-
const message = (error.data?.message || error.message || "").toLowerCase();
|
|
82
|
-
const errorName = (error.name || "").toLowerCase();
|
|
83
|
-
|
|
84
|
-
const rateLimitIndicators = [
|
|
85
|
-
"rate limit",
|
|
86
|
-
"rate_limit",
|
|
87
|
-
"ratelimit",
|
|
88
|
-
"too many requests",
|
|
89
|
-
"quota exceeded",
|
|
90
|
-
"resource exhausted",
|
|
91
|
-
"usage limit",
|
|
92
|
-
"high concurrency usage of this api",
|
|
93
|
-
"high concurrency",
|
|
94
|
-
"reduce concurrency",
|
|
95
|
-
"429",
|
|
96
|
-
];
|
|
97
|
-
|
|
98
|
-
return rateLimitIndicators.some(
|
|
99
|
-
(indicator) =>
|
|
100
|
-
responseBody.includes(indicator) ||
|
|
101
|
-
message.includes(indicator) ||
|
|
102
|
-
errorName.includes(indicator)
|
|
103
|
-
);
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
107
|
-
const config = loadConfig(directory);
|
|
108
|
-
|
|
109
|
-
if (!config.enabled) {
|
|
110
|
-
return {};
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
const rateLimitedModels = new Map<string, number>();
|
|
114
|
-
const retryState = new Map<string, { attemptedModels: Set<string>; lastAttemptTime: number }>();
|
|
115
|
-
const currentSessionModel = new Map<string, { providerID: string; modelID: string }>();
|
|
116
|
-
const fallbackInProgress = new Map<string, number>(); // sessionID -> timestamp
|
|
117
|
-
|
|
118
|
-
async function logOrToast(message: string, variant: "info" | "success" | "warning" | "error" = "info") {
|
|
119
|
-
try {
|
|
120
|
-
await client.tui.showToast({
|
|
121
|
-
body: { message, variant },
|
|
122
|
-
});
|
|
123
|
-
} catch {
|
|
124
|
-
await client.app.log({
|
|
125
|
-
body: {
|
|
126
|
-
service: "rate-limit-fallback",
|
|
127
|
-
level: variant,
|
|
128
|
-
message,
|
|
129
|
-
},
|
|
130
|
-
});
|
|
131
|
-
}
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
async function toast(title: string, message: string, variant: "info" | "success" | "warning" | "error" = "info") {
|
|
135
|
-
try {
|
|
136
|
-
await client.tui.showToast({
|
|
137
|
-
body: { title, message, variant },
|
|
138
|
-
});
|
|
139
|
-
} catch {
|
|
140
|
-
await client.app.log({
|
|
141
|
-
body: {
|
|
142
|
-
service: "rate-limit-fallback",
|
|
143
|
-
level: variant,
|
|
144
|
-
message: `${title}: ${message}`,
|
|
145
|
-
},
|
|
146
|
-
});
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
function isModelRateLimited(providerID: string, modelID: string): boolean {
|
|
151
|
-
const key = getModelKey(providerID, modelID);
|
|
152
|
-
const limitedAt = rateLimitedModels.get(key);
|
|
153
|
-
if (!limitedAt) return false;
|
|
154
|
-
if (Date.now() - limitedAt > config.cooldownMs) {
|
|
155
|
-
rateLimitedModels.delete(key);
|
|
156
|
-
return false;
|
|
157
|
-
}
|
|
158
|
-
return true;
|
|
159
|
-
}
|
|
160
|
-
|
|
161
|
-
function markModelRateLimited(providerID: string, modelID: string): void {
|
|
162
|
-
const key = getModelKey(providerID, modelID);
|
|
163
|
-
rateLimitedModels.set(key, Date.now());
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
function findNextAvailableModel(currentProviderID: string, currentModelID: string, attemptedModels: Set<string>): FallbackModel | null {
|
|
167
|
-
const currentKey = getModelKey(currentProviderID, currentModelID);
|
|
168
|
-
let startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
|
|
169
|
-
|
|
170
|
-
// If current model is not in the fallback list, search from the beginning
|
|
171
|
-
if (startIndex === -1) {
|
|
172
|
-
// Only search through all models once (first loop handles this)
|
|
173
|
-
for (let i = 0; i < config.fallbackModels.length; i++) {
|
|
174
|
-
const model = config.fallbackModels[i];
|
|
175
|
-
const key = getModelKey(model.providerID, model.modelID);
|
|
176
|
-
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
177
|
-
return model;
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
return null;
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
// Search for the next model after current position
|
|
184
|
-
for (let i = startIndex + 1; i < config.fallbackModels.length; i++) {
|
|
185
|
-
const model = config.fallbackModels[i];
|
|
186
|
-
const key = getModelKey(model.providerID, model.modelID);
|
|
187
|
-
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
188
|
-
return model;
|
|
189
|
-
}
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// Search from the beginning to current position (wrap around)
|
|
193
|
-
for (let i = 0; i <= startIndex && i < config.fallbackModels.length; i++) {
|
|
194
|
-
const model = config.fallbackModels[i];
|
|
195
|
-
const key = getModelKey(model.providerID, model.modelID);
|
|
196
|
-
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
197
|
-
return model;
|
|
198
|
-
}
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
return null;
|
|
202
|
-
}
|
|
203
|
-
|
|
204
|
-
async function handleRateLimitFallback(sessionID: string, currentProviderID: string, currentModelID: string) {
|
|
205
|
-
try {
|
|
206
|
-
// Prevent duplicate fallback processing within 5 seconds
|
|
207
|
-
const lastFallback = fallbackInProgress.get(sessionID);
|
|
208
|
-
if (lastFallback && Date.now() - lastFallback < 5000) {
|
|
209
|
-
return;
|
|
210
|
-
}
|
|
211
|
-
fallbackInProgress.set(sessionID, Date.now());
|
|
212
|
-
|
|
213
|
-
// If no model info provided, try to get from tracked session model
|
|
214
|
-
if (!currentProviderID || !currentModelID) {
|
|
215
|
-
const tracked = currentSessionModel.get(sessionID);
|
|
216
|
-
if (tracked) {
|
|
217
|
-
currentProviderID = tracked.providerID;
|
|
218
|
-
currentModelID = tracked.modelID;
|
|
219
|
-
}
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
await client.session.abort({ path: { id: sessionID } });
|
|
223
|
-
|
|
224
|
-
await toast("Rate Limit Detected", `Switching from ${currentModelID || 'current model'}...`, "warning");
|
|
225
|
-
|
|
226
|
-
const messagesResult = await client.session.messages({ path: { id: sessionID } });
|
|
227
|
-
if (!messagesResult.data) return;
|
|
228
|
-
|
|
229
|
-
const messages = messagesResult.data;
|
|
230
|
-
const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
|
|
231
|
-
if (!lastUserMessage) return;
|
|
232
|
-
|
|
233
|
-
const stateKey = `${sessionID}:${lastUserMessage.info.id}`;
|
|
234
|
-
let state = retryState.get(stateKey);
|
|
235
|
-
|
|
236
|
-
if (!state || Date.now() - state.lastAttemptTime > 30000) {
|
|
237
|
-
state = { attemptedModels: new Set<string>(), lastAttemptTime: Date.now() };
|
|
238
|
-
retryState.set(stateKey, state);
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
if (currentProviderID && currentModelID) {
|
|
242
|
-
markModelRateLimited(currentProviderID, currentModelID);
|
|
243
|
-
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
|
|
247
|
-
|
|
248
|
-
// Handle when no model is found based on fallbackMode
|
|
249
|
-
if (!nextModel && state.attemptedModels.size > 0) {
|
|
250
|
-
if (config.fallbackMode === "cycle") {
|
|
251
|
-
// Reset and retry from the first model
|
|
252
|
-
state.attemptedModels.clear();
|
|
253
|
-
if (currentProviderID && currentModelID) {
|
|
254
|
-
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
255
|
-
}
|
|
256
|
-
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
257
|
-
} else if (config.fallbackMode === "retry-last") {
|
|
258
|
-
// Try the last model in the list once, then reset on next prompt
|
|
259
|
-
const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
|
|
260
|
-
if (lastModel) {
|
|
261
|
-
const lastKey = getModelKey(lastModel.providerID, lastModel.modelID);
|
|
262
|
-
const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
|
|
263
|
-
|
|
264
|
-
if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
|
|
265
|
-
// Use the last model for one more try
|
|
266
|
-
nextModel = lastModel;
|
|
267
|
-
await toast("Last Resort", `Trying ${lastModel.modelID} one more time...`, "warning");
|
|
268
|
-
} else {
|
|
269
|
-
// Last model also failed, reset for next prompt
|
|
270
|
-
state.attemptedModels.clear();
|
|
271
|
-
if (currentProviderID && currentModelID) {
|
|
272
|
-
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
273
|
-
}
|
|
274
|
-
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
// "stop" mode: nextModel remains null, will show error below
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
if (!nextModel) {
|
|
282
|
-
await toast(
|
|
283
|
-
"No Fallback Available",
|
|
284
|
-
config.fallbackMode === "stop"
|
|
285
|
-
? "All fallback models exhausted"
|
|
286
|
-
: "All models are rate limited",
|
|
287
|
-
"error"
|
|
288
|
-
);
|
|
289
|
-
retryState.delete(stateKey);
|
|
290
|
-
fallbackInProgress.delete(sessionID);
|
|
291
|
-
return;
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
|
|
295
|
-
state.lastAttemptTime = Date.now();
|
|
296
|
-
|
|
297
|
-
const parts = lastUserMessage.parts
|
|
298
|
-
.filter((p: any) => p.type === "text" || p.type === "file")
|
|
299
|
-
.map((p: any) => {
|
|
300
|
-
if (p.type === "text") return { type: "text" as const, text: p.text };
|
|
301
|
-
if (p.type === "file") return { type: "file" as const, path: p.path, mediaType: p.mediaType };
|
|
302
|
-
return null;
|
|
303
|
-
})
|
|
304
|
-
.filter(Boolean);
|
|
305
|
-
|
|
306
|
-
if (parts.length === 0) return;
|
|
307
|
-
|
|
308
|
-
await toast("Retrying", `Using ${nextModel.providerID}/${nextModel.modelID}`, "info");
|
|
309
|
-
|
|
310
|
-
// Track the new model for this session
|
|
311
|
-
currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
|
|
312
|
-
|
|
313
|
-
await client.session.prompt({
|
|
314
|
-
path: { id: sessionID },
|
|
315
|
-
body: {
|
|
316
|
-
parts: parts as any,
|
|
317
|
-
model: { providerID: nextModel.providerID, modelID: nextModel.modelID },
|
|
318
|
-
},
|
|
319
|
-
});
|
|
320
|
-
|
|
321
|
-
await toast("Fallback Successful", `Now using ${nextModel.modelID}`, "success");
|
|
322
|
-
|
|
323
|
-
retryState.delete(stateKey);
|
|
324
|
-
// Clear fallback flag to allow next fallback if needed
|
|
325
|
-
fallbackInProgress.delete(sessionID);
|
|
326
|
-
} catch (err) {
|
|
327
|
-
// Fallback failed, clear the flag
|
|
328
|
-
fallbackInProgress.delete(sessionID);
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
return {
|
|
333
|
-
event: async ({ event }) => {
|
|
334
|
-
if (event.type === "session.error") {
|
|
335
|
-
const { sessionID, error } = event.properties as any;
|
|
336
|
-
if (sessionID && error && isRateLimitError(error)) {
|
|
337
|
-
await handleRateLimitFallback(sessionID, "", "");
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
|
|
341
|
-
if (event.type === "message.updated") {
|
|
342
|
-
const info = (event.properties as any)?.info;
|
|
343
|
-
if (info?.error && isRateLimitError(info.error)) {
|
|
344
|
-
await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
|
|
345
|
-
}
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
if (event.type === "session.status") {
|
|
349
|
-
const props = event.properties as any;
|
|
350
|
-
const status = props?.status;
|
|
351
|
-
|
|
352
|
-
if (status?.type === "retry" && status?.message) {
|
|
353
|
-
const message = status.message.toLowerCase();
|
|
354
|
-
const isRateLimitRetry =
|
|
355
|
-
message.includes("usage limit") ||
|
|
356
|
-
message.includes("rate limit") ||
|
|
357
|
-
message.includes("high concurrency") ||
|
|
358
|
-
message.includes("reduce concurrency");
|
|
359
|
-
|
|
360
|
-
if (isRateLimitRetry) {
|
|
361
|
-
// Try fallback on any attempt, handleRateLimitFallback will manage state
|
|
362
|
-
await handleRateLimitFallback(props.sessionID, "", "");
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
},
|
|
367
|
-
};
|
|
368
|
-
};
|
|
369
|
-
|
|
370
|
-
export default RateLimitFallback;
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"enabled": true,
|
|
3
|
-
"cooldownMs": 60000,
|
|
4
|
-
"fallbackMode": "cycle",
|
|
5
|
-
"fallbackModels": [
|
|
6
|
-
{ "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
|
|
7
|
-
{ "providerID": "google", "modelID": "gemini-2.5-pro" },
|
|
8
|
-
{ "providerID": "google", "modelID": "gemini-2.5-flash" }
|
|
9
|
-
]
|
|
10
|
-
}
|