@azumag/opencode-rate-limit-fallback 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(npm publish:*)",
5
+ "Bash(npm whoami:*)",
6
+ "Bash(npm config:*)",
7
+ "WebSearch",
8
+ "WebFetch(domain:github.blog)",
9
+ "Bash(npm login:*)",
10
+ "Bash(npm token create:*)",
11
+ "Bash(npm view:*)"
12
+ ]
13
+ }
14
+ }
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 azumag
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,87 @@
1
+ # opencode-rate-limit-fallback
2
+
3
+ OpenCode plugin that automatically switches to fallback models when rate limited.
4
+
5
+ ## Features
6
+
7
+ - Detects rate limit errors (429, "usage limit", "quota exceeded", etc.)
8
+ - Automatically aborts the current request and retries with a fallback model
9
+ - Configurable fallback model list with priority order
10
+ - Cooldown period to prevent immediate retry on rate-limited models
11
+ - Toast notifications for user feedback
12
+
13
+ ## Installation
14
+
15
+ Copy `index.ts` to your OpenCode plugins directory:
16
+
17
+ ```bash
18
+ mkdir -p ~/.config/opencode/plugins
19
+ curl -o ~/.config/opencode/plugins/rate-limit-fallback.ts \
20
+ https://raw.githubusercontent.com/azumag/opencode-rate-limit-fallback/main/index.ts
21
+ ```
22
+
23
+ Or manually download and copy:
24
+
25
+ ```bash
26
+ cp index.ts ~/.config/opencode/plugins/rate-limit-fallback.ts
27
+ ```
28
+
29
+ Dependencies (`@opencode-ai/plugin`) will be automatically installed by OpenCode on startup.
30
+
31
+ Restart OpenCode to load the plugin.
32
+
33
+ ## Configuration
34
+
35
+ Create a configuration file at one of these locations:
36
+
37
+ - `~/.opencode/rate-limit-fallback.json` (recommended)
38
+ - `~/.config/opencode/rate-limit-fallback.json`
39
+ - `<project>/.opencode/rate-limit-fallback.json`
40
+ - `<project>/rate-limit-fallback.json`
41
+
42
+ ### Example Configuration
43
+
44
+ ```json
45
+ {
46
+ "enabled": true,
47
+ "cooldownMs": 60000,
48
+ "fallbackModels": [
49
+ { "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
50
+ { "providerID": "google", "modelID": "gemini-2.5-pro" },
51
+ { "providerID": "google", "modelID": "gemini-2.5-flash" }
52
+ ]
53
+ }
54
+ ```
55
+
56
+ ### Configuration Options
57
+
58
+ | Option | Type | Default | Description |
59
+ |--------|------|---------|-------------|
60
+ | `enabled` | boolean | `true` | Enable/disable the plugin |
61
+ | `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
62
+ | `fallbackModels` | array | See below | List of fallback models in priority order |
63
+
64
+ ### Default Fallback Models
65
+
66
+ If no configuration is provided, the following models are used:
67
+
68
+ 1. `anthropic/claude-sonnet-4-20250514`
69
+ 2. `google/gemini-2.5-pro`
70
+ 3. `google/gemini-2.5-flash`
71
+
72
+ ## How It Works
73
+
74
+ 1. **Detection**: The plugin listens for rate limit errors via:
75
+ - `session.error` events
76
+ - `message.updated` events with errors
77
+ - `session.status` events with `type: "retry"`
78
+
79
+ 2. **Abort**: When a rate limit is detected, the current session is aborted to stop OpenCode's internal retry mechanism.
80
+
81
+ 3. **Fallback**: The plugin selects the next available model from the fallback list and resends the last user message.
82
+
83
+ 4. **Cooldown**: Rate-limited models are tracked and skipped for the configured cooldown period.
84
+
85
+ ## License
86
+
87
+ MIT
package/index.ts ADDED
@@ -0,0 +1,267 @@
1
+ import type { Plugin } from "@opencode-ai/plugin";
2
+ import { existsSync, readFileSync } from "fs";
3
+ import { join } from "path";
4
+
5
+ interface FallbackModel {
6
+ providerID: string;
7
+ modelID: string;
8
+ }
9
+
10
+ interface PluginConfig {
11
+ fallbackModels: FallbackModel[];
12
+ cooldownMs: number;
13
+ enabled: boolean;
14
+ }
15
+
16
+ const DEFAULT_FALLBACK_MODELS: FallbackModel[] = [
17
+ { providerID: "anthropic", modelID: "claude-sonnet-4-20250514" },
18
+ { providerID: "google", modelID: "gemini-2.5-pro" },
19
+ { providerID: "google", modelID: "gemini-2.5-flash" },
20
+ ];
21
+
22
+ const DEFAULT_CONFIG: PluginConfig = {
23
+ fallbackModels: DEFAULT_FALLBACK_MODELS,
24
+ cooldownMs: 60 * 1000,
25
+ enabled: true,
26
+ };
27
+
28
+ function loadConfig(directory: string): PluginConfig {
29
+ const homedir = process.env.HOME || "";
30
+ const configPaths = [
31
+ join(directory, ".opencode", "rate-limit-fallback.json"),
32
+ join(directory, "rate-limit-fallback.json"),
33
+ join(homedir, ".opencode", "rate-limit-fallback.json"),
34
+ join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
35
+ ];
36
+
37
+ for (const configPath of configPaths) {
38
+ if (existsSync(configPath)) {
39
+ try {
40
+ const content = readFileSync(configPath, "utf-8");
41
+ const userConfig = JSON.parse(content);
42
+ return {
43
+ ...DEFAULT_CONFIG,
44
+ ...userConfig,
45
+ fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
46
+ };
47
+ } catch (error) {
48
+ // Config load failed, continue to next path
49
+ }
50
+ }
51
+ }
52
+
53
+ return DEFAULT_CONFIG;
54
+ }
55
+
56
+ function getModelKey(providerID: string, modelID: string): string {
57
+ return `${providerID}/${modelID}`;
58
+ }
59
+
60
+ function isRateLimitError(error: any): boolean {
61
+ if (!error) return false;
62
+
63
+ if (error.name === "APIError" && error.data?.statusCode === 429) {
64
+ return true;
65
+ }
66
+
67
+ const responseBody = (error.data?.responseBody || "").toLowerCase();
68
+ const message = (error.data?.message || error.message || "").toLowerCase();
69
+ const errorName = (error.name || "").toLowerCase();
70
+
71
+ const rateLimitIndicators = [
72
+ "rate limit",
73
+ "rate_limit",
74
+ "ratelimit",
75
+ "too many requests",
76
+ "quota exceeded",
77
+ "resource exhausted",
78
+ "usage limit",
79
+ "429",
80
+ ];
81
+
82
+ return rateLimitIndicators.some(
83
+ (indicator) =>
84
+ responseBody.includes(indicator) ||
85
+ message.includes(indicator) ||
86
+ errorName.includes(indicator)
87
+ );
88
+ }
89
+
90
+ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
91
+ const config = loadConfig(directory);
92
+
93
+ if (!config.enabled) {
94
+ return {};
95
+ }
96
+
97
+ const rateLimitedModels = new Map<string, number>();
98
+ const retryState = new Map<string, { attemptedModels: Set<string>; lastAttemptTime: number }>();
99
+
100
+ function isModelRateLimited(providerID: string, modelID: string): boolean {
101
+ const key = getModelKey(providerID, modelID);
102
+ const limitedAt = rateLimitedModels.get(key);
103
+ if (!limitedAt) return false;
104
+ if (Date.now() - limitedAt > config.cooldownMs) {
105
+ rateLimitedModels.delete(key);
106
+ return false;
107
+ }
108
+ return true;
109
+ }
110
+
111
+ function markModelRateLimited(providerID: string, modelID: string): void {
112
+ const key = getModelKey(providerID, modelID);
113
+ rateLimitedModels.set(key, Date.now());
114
+ }
115
+
116
+ function findNextAvailableModel(currentProviderID: string, currentModelID: string, attemptedModels: Set<string>): FallbackModel | null {
117
+ const currentKey = getModelKey(currentProviderID, currentModelID);
118
+ let startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
119
+ if (startIndex === -1) startIndex = -1;
120
+
121
+ for (let i = startIndex + 1; i < config.fallbackModels.length; i++) {
122
+ const model = config.fallbackModels[i];
123
+ const key = getModelKey(model.providerID, model.modelID);
124
+ if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
125
+ return model;
126
+ }
127
+ }
128
+
129
+ for (let i = 0; i <= startIndex && i < config.fallbackModels.length; i++) {
130
+ const model = config.fallbackModels[i];
131
+ const key = getModelKey(model.providerID, model.modelID);
132
+ if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
133
+ return model;
134
+ }
135
+ }
136
+
137
+ return null;
138
+ }
139
+
140
+ async function handleRateLimitFallback(sessionID: string, currentProviderID: string, currentModelID: string) {
141
+ try {
142
+ await client.session.abort({ path: { id: sessionID } });
143
+
144
+ await client.tui.showToast({
145
+ body: {
146
+ title: "Rate Limit Detected",
147
+ message: "Switching to fallback model...",
148
+ variant: "warning",
149
+ duration: 3000,
150
+ },
151
+ });
152
+
153
+ const messagesResult = await client.session.messages({ path: { id: sessionID } });
154
+ if (!messagesResult.data) return;
155
+
156
+ const messages = messagesResult.data;
157
+ const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
158
+ if (!lastUserMessage) return;
159
+
160
+ const stateKey = `${sessionID}:${lastUserMessage.info.id}`;
161
+ let state = retryState.get(stateKey);
162
+
163
+ if (!state || Date.now() - state.lastAttemptTime > 30000) {
164
+ state = { attemptedModels: new Set<string>(), lastAttemptTime: Date.now() };
165
+ retryState.set(stateKey, state);
166
+ }
167
+
168
+ if (currentProviderID && currentModelID) {
169
+ markModelRateLimited(currentProviderID, currentModelID);
170
+ state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
171
+ }
172
+
173
+ const nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
174
+
175
+ if (!nextModel) {
176
+ await client.tui.showToast({
177
+ body: {
178
+ title: "No Fallback Available",
179
+ message: "All models are rate limited",
180
+ variant: "error",
181
+ duration: 5000,
182
+ },
183
+ });
184
+ retryState.delete(stateKey);
185
+ return;
186
+ }
187
+
188
+ state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
189
+ state.lastAttemptTime = Date.now();
190
+
191
+ const parts = lastUserMessage.parts
192
+ .filter((p: any) => p.type === "text" || p.type === "file")
193
+ .map((p: any) => {
194
+ if (p.type === "text") return { type: "text" as const, text: p.text };
195
+ if (p.type === "file") return { type: "file" as const, path: p.path, mediaType: p.mediaType };
196
+ return null;
197
+ })
198
+ .filter(Boolean);
199
+
200
+ if (parts.length === 0) return;
201
+
202
+ await client.tui.showToast({
203
+ body: {
204
+ title: "Retrying",
205
+ message: `Using ${nextModel.providerID}/${nextModel.modelID}`,
206
+ variant: "info",
207
+ duration: 3000,
208
+ },
209
+ });
210
+
211
+ await client.session.prompt({
212
+ path: { id: sessionID },
213
+ body: {
214
+ parts: parts as any,
215
+ model: { providerID: nextModel.providerID, modelID: nextModel.modelID },
216
+ },
217
+ });
218
+
219
+ await client.tui.showToast({
220
+ body: {
221
+ title: "Fallback Successful",
222
+ message: `Now using ${nextModel.modelID}`,
223
+ variant: "success",
224
+ duration: 3000,
225
+ },
226
+ });
227
+
228
+ retryState.delete(stateKey);
229
+ } catch (err) {
230
+ // Fallback failed silently
231
+ }
232
+ }
233
+
234
+ return {
235
+ event: async ({ event }) => {
236
+ if (event.type === "session.error") {
237
+ const { sessionID, error } = event.properties as any;
238
+ if (sessionID && error && isRateLimitError(error)) {
239
+ await handleRateLimitFallback(sessionID, "", "");
240
+ }
241
+ }
242
+
243
+ if (event.type === "message.updated") {
244
+ const info = (event.properties as any)?.info;
245
+ if (info?.error && isRateLimitError(info.error)) {
246
+ await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
247
+ }
248
+ }
249
+
250
+ if (event.type === "session.status") {
251
+ const props = event.properties as any;
252
+ const status = props?.status;
253
+
254
+ if (status?.type === "retry" && status?.message) {
255
+ const message = status.message.toLowerCase();
256
+ if (message.includes("usage limit") || message.includes("rate limit")) {
257
+ if (status.attempt === 1) {
258
+ await handleRateLimitFallback(props.sessionID, "", "");
259
+ }
260
+ }
261
+ }
262
+ }
263
+ },
264
+ };
265
+ };
266
+
267
+ export default RateLimitFallback;
package/package.json ADDED
@@ -0,0 +1,27 @@
1
+ {
2
+ "name": "@azumag/opencode-rate-limit-fallback",
3
+ "version": "1.0.0",
4
+ "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
5
+ "main": "index.ts",
6
+ "type": "module",
7
+ "keywords": [
8
+ "opencode",
9
+ "plugin",
10
+ "rate-limit",
11
+ "fallback",
12
+ "ai"
13
+ ],
14
+ "author": "azumag",
15
+ "license": "MIT",
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "git+https://github.com/azumag/opencode-rate-limit-fallback.git"
19
+ },
20
+ "bugs": {
21
+ "url": "https://github.com/azumag/opencode-rate-limit-fallback/issues"
22
+ },
23
+ "homepage": "https://github.com/azumag/opencode-rate-limit-fallback#readme",
24
+ "dependencies": {
25
+ "@opencode-ai/plugin": "latest"
26
+ }
27
+ }
@@ -0,0 +1,9 @@
1
+ {
2
+ "enabled": true,
3
+ "cooldownMs": 60000,
4
+ "fallbackModels": [
5
+ { "providerID": "anthropic", "modelID": "claude-sonnet-4-20250514" },
6
+ { "providerID": "google", "modelID": "gemini-2.5-pro" },
7
+ { "providerID": "google", "modelID": "gemini-2.5-flash" }
8
+ ]
9
+ }