@azumag/opencode-rate-limit-fallback 1.68.0 → 1.70.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -10,6 +10,7 @@ OpenCode plugin that automatically switches to fallback models when rate limited
10
10
  - Automatically aborts the current request and retries with a fallback model
11
11
  - Configurable fallback model list with priority order
12
12
  - Three fallback modes: `cycle`, `stop`, and `retry-last`
13
+ - **Headless mode support** (`opencode run`): disable fallback or abort on rate limit
13
14
  - Session model tracking for sequential fallback across multiple rate limits
14
15
  - Cooldown period to prevent immediate retry on rate-limited models
15
16
  - **Exponential backoff with configurable retry policies**
@@ -117,6 +118,7 @@ Create a configuration file at one of these locations:
117
118
  | `enabled` | boolean | `true` | Enable/disable the plugin |
118
119
  | `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
119
120
  | `fallbackMode` | string | `"cycle"` | Behavior when all models are exhausted (see below) |
121
+ | `headlessOnRateLimit` | string | `undefined` | Headless mode behavior on rate limit (see below) |
120
122
  | `fallbackModels` | array | See below | List of fallback models in priority order |
121
123
  | `maxSubagentDepth` | number | `10` | Maximum nesting depth for subagent hierarchies |
122
124
  | `enableSubagentFallback` | boolean | `true` | Enable/disable fallback for subagent sessions |
@@ -236,6 +238,26 @@ my-repo/
236
238
 
237
239
  > **Note**: If you're using git worktrees and want different configurations per worktree, create config files in the worktree directories (locations 1-2). Otherwise, a single project-level or global config is sufficient.
238
240
 
241
+ ### Headless Mode (`opencode run`)
242
+
243
+ When running in headless mode (no TUI), model fallback is disabled by default because headless sessions should use their configured model only.
244
+
245
+ You can control what happens when a rate limit is detected in headless mode using the `headlessOnRateLimit` option:
246
+
247
+ | Value | Description |
248
+ |-------|-------------|
249
+ | *(not set)* | Default behavior — do nothing, let the server's retry loop handle it |
250
+ | `"ignore"` | Same as default — do nothing |
251
+ | `"abort"` | Abort the session immediately to terminate the prompt |
252
+
253
+ The `"abort"` option is useful when you want `opencode run` to fail fast on rate limits rather than waiting for the server's retry loop, which may retry indefinitely.
254
+
255
+ ```json
256
+ {
257
+ "headlessOnRateLimit": "abort"
258
+ }
259
+ ```
260
+
239
261
  ### Fallback Modes
240
262
 
241
263
  | Mode | Description |
package/dist/index.js CHANGED
@@ -158,8 +158,63 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
158
158
  if (!config.enabled) {
159
159
  return {};
160
160
  }
161
- // Disable fallback in headless mode — headless sessions should use their configured model only
161
+ // Headless mode — no model fallback, but optionally abort on rate limit
162
162
  if (isHeadless) {
163
+ if (config.headlessOnRateLimit === "abort") {
164
+ logger.info("Headless mode — will abort session on rate limit");
165
+ // Minimal setup: only error pattern detection + abort
166
+ const errorPatternRegistry = new ErrorPatternRegistry(logger);
167
+ if (config.errorPatterns?.custom) {
168
+ errorPatternRegistry.registerMany(config.errorPatterns.custom);
169
+ }
170
+ // Track sessions already aborted to avoid duplicate abort calls
171
+ const abortedSessions = new Set();
172
+ const abortSession = async (sessionID, source) => {
173
+ if (abortedSessions.has(sessionID))
174
+ return;
175
+ abortedSessions.add(sessionID);
176
+ logger.info(`Rate limit detected (${source}) — aborting session ${sessionID}`);
177
+ try {
178
+ await client.session.abort({ path: { id: sessionID } });
179
+ }
180
+ catch (err) {
181
+ logger.warn(`Failed to abort session ${sessionID}`, {
182
+ error: err instanceof Error ? err.message : String(err),
183
+ });
184
+ }
185
+ };
186
+ return {
187
+ event: async ({ event }) => {
188
+ if (isSessionErrorEvent(event)) {
189
+ const { sessionID, error } = event.properties;
190
+ if (sessionID && error && errorPatternRegistry.isRateLimitError(error)) {
191
+ await abortSession(sessionID, "session.error");
192
+ }
193
+ }
194
+ if (isMessageUpdatedEvent(event)) {
195
+ const info = event.properties.info;
196
+ if (info?.error && errorPatternRegistry.isRateLimitError(info.error)) {
197
+ await abortSession(info.sessionID, "message.updated");
198
+ }
199
+ }
200
+ if (isSessionStatusEvent(event)) {
201
+ const props = event.properties;
202
+ const status = props?.status;
203
+ if (status?.type === "retry" && status?.message) {
204
+ const message = status.message.toLowerCase();
205
+ const isRateLimitRetry = message.includes("usage limit") ||
206
+ message.includes("usage exceeded") ||
207
+ message.includes("rate limit") ||
208
+ message.includes("high concurrency") ||
209
+ message.includes("reduce concurrency");
210
+ if (isRateLimitRetry) {
211
+ await abortSession(props.sessionID, "session.status retry");
212
+ }
213
+ }
214
+ }
215
+ },
216
+ };
217
+ }
163
218
  logger.info("Headless mode detected — model fallback disabled");
164
219
  return {};
165
220
  }
@@ -228,6 +283,12 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
228
283
  }, CLEANUP_INTERVAL_MS);
229
284
  return {
230
285
  event: async ({ event }) => {
286
+ // Debug: log all events to identify how "Free usage exceeded" arrives
287
+ const rawEvt = event;
288
+ const evtJson = JSON.stringify(rawEvt, null, 0);
289
+ if (evtJson.toLowerCase().includes("exceeded") || evtJson.toLowerCase().includes("free usage") || evtJson.toLowerCase().includes("credits")) {
290
+ logger.info("DEBUG rate-limit-related event", { type: rawEvt.type, properties: rawEvt.properties });
291
+ }
231
292
  // Handle session.error events
232
293
  if (isSessionErrorEvent(event)) {
233
294
  const { sessionID, error } = event.properties;
@@ -275,6 +336,7 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
275
336
  if (status?.type === "retry" && status?.message) {
276
337
  const message = status.message.toLowerCase();
277
338
  const isRateLimitRetry = message.includes("usage limit") ||
339
+ message.includes("usage exceeded") ||
278
340
  message.includes("rate limit") ||
279
341
  message.includes("high concurrency") ||
280
342
  message.includes("reduce concurrency");
@@ -42,6 +42,7 @@ export class ErrorPatternRegistry {
42
42
  'ratelimit',
43
43
  'too many requests',
44
44
  'quota exceeded',
45
+ 'usage exceeded',
45
46
  ],
46
47
  priority: 90,
47
48
  });
@@ -43,9 +43,10 @@ export declare class FallbackHandler {
43
43
  modelID: string;
44
44
  } | null;
45
45
  /**
46
- * Queue prompt asynchronously (non-blocking) to schedule fallback.
47
- * The server's retry loop finishes naturally; it then picks up the queued prompt.
48
- * We do NOT call abort its AbortController signal persists and kills the new stream.
46
+ * Abort current session, wait for server to settle, then queue fallback prompt.
47
+ * Abort stops the retry loop for permanent errors (e.g. "Free usage exceeded").
48
+ * The delay allows the server to reset session state / AbortController before
49
+ * the new promptAsync creates a fresh stream.
49
50
  */
50
51
  retryWithModel(targetSessionID: string, model: FallbackModel, parts: MessagePart[], hierarchy: SessionHierarchy | null): Promise<void>;
51
52
  /**
@@ -92,9 +92,10 @@ export class FallbackHandler {
92
92
  return tracked ? { providerID: tracked.providerID, modelID: tracked.modelID } : null;
93
93
  }
94
94
  /**
95
- * Queue prompt asynchronously (non-blocking) to schedule fallback.
96
- * The server's retry loop finishes naturally; it then picks up the queued prompt.
97
- * We do NOT call abort its AbortController signal persists and kills the new stream.
95
+ * Abort current session, wait for server to settle, then queue fallback prompt.
96
+ * Abort stops the retry loop for permanent errors (e.g. "Free usage exceeded").
97
+ * The delay allows the server to reset session state / AbortController before
98
+ * the new promptAsync creates a fresh stream.
98
99
  */
99
100
  async retryWithModel(targetSessionID, model, parts, hierarchy) {
100
101
  // Record model usage for dynamic prioritization
@@ -130,9 +131,21 @@ export class FallbackHandler {
130
131
  const modelKey = getModelKey(model.providerID, model.modelID);
131
132
  this.modelRequestStartTimes.set(modelKey, Date.now());
132
133
  }
133
- // Convert internal MessagePart to SDK-compatible format
134
+ // 1. Abort: stop the current retry loop
135
+ try {
136
+ await this.client.session.abort({ path: { id: targetSessionID } });
137
+ this.logger.info("Aborted session before fallback", { sessionID: targetSessionID });
138
+ }
139
+ catch (err) {
140
+ this.logger.warn("Failed to abort session before fallback", {
141
+ sessionID: targetSessionID,
142
+ error: err instanceof Error ? err.message : String(err),
143
+ });
144
+ }
145
+ // 2. Delay: let the server reset session state / AbortController
146
+ await new Promise(resolve => setTimeout(resolve, 500));
147
+ // 3. promptAsync: queue the fallback prompt (returns immediately)
134
148
  const sdkParts = convertPartsToSDKFormat(parts);
135
- // 1. promptAsync: queue the new prompt (returns immediately, non-blocking)
136
149
  await this.client.session.promptAsync({
137
150
  path: { id: targetSessionID },
138
151
  body: {
@@ -140,10 +153,6 @@ export class FallbackHandler {
140
153
  model: { providerID: model.providerID, modelID: model.modelID },
141
154
  },
142
155
  });
143
- // Do NOT call abort after promptAsync.
144
- // The AbortController signal persists and kills the newly queued stream too,
145
- // causing "interrupted" in TUI mode and server disposal in headless mode.
146
- // Let the server's retry loop finish naturally; it will pick up the queued prompt.
147
156
  await safeShowToast(this.client, {
148
157
  body: {
149
158
  title: "Fallback Queued",
@@ -17,6 +17,12 @@ export interface FallbackModel {
17
17
  * - "retry-last": Try the last model once, then reset to first on next prompt
18
18
  */
19
19
  export type FallbackMode = "cycle" | "stop" | "retry-last";
20
+ /**
21
+ * Headless mode behavior on rate limit:
22
+ * - "ignore": Do nothing, let server handle retries (default)
23
+ * - "abort": Abort the session to terminate the prompt immediately
24
+ */
25
+ export type HeadlessOnRateLimit = "ignore" | "abort";
20
26
  /**
21
27
  * Retry strategy type
22
28
  * - "immediate": Retry immediately without delay
@@ -234,6 +240,7 @@ export interface PluginConfig {
234
240
  cooldownMs: number;
235
241
  enabled: boolean;
236
242
  fallbackMode: FallbackMode;
243
+ headlessOnRateLimit?: HeadlessOnRateLimit;
237
244
  maxSubagentDepth?: number;
238
245
  enableSubagentFallback?: boolean;
239
246
  retryPolicy?: RetryPolicy;
@@ -547,6 +554,10 @@ export declare const DEFAULT_CIRCUIT_BREAKER_CONFIG: CircuitBreakerConfig;
547
554
  * Valid fallback modes
548
555
  */
549
556
  export declare const VALID_FALLBACK_MODES: FallbackMode[];
557
+ /**
558
+ * Valid headless on rate limit options
559
+ */
560
+ export declare const VALID_HEADLESS_ON_RATE_LIMIT: HeadlessOnRateLimit[];
550
561
  /**
551
562
  * Valid retry strategies
552
563
  */
@@ -46,6 +46,10 @@ export const DEFAULT_CIRCUIT_BREAKER_CONFIG = {
46
46
  * Valid fallback modes
47
47
  */
48
48
  export const VALID_FALLBACK_MODES = ["cycle", "stop", "retry-last"];
49
+ /**
50
+ * Valid headless on rate limit options
51
+ */
52
+ export const VALID_HEADLESS_ON_RATE_LIMIT = ["ignore", "abort"];
49
53
  /**
50
54
  * Valid retry strategies
51
55
  */
@@ -3,7 +3,7 @@
3
3
  */
4
4
  import { existsSync, readFileSync } from "fs";
5
5
  import { join, resolve, normalize, relative } from "path";
6
- import { DEFAULT_FALLBACK_MODELS, VALID_FALLBACK_MODES, VALID_RESET_INTERVALS, DEFAULT_RETRY_POLICY, VALID_RETRY_STRATEGIES, DEFAULT_CIRCUIT_BREAKER_CONFIG, } from '../types/index.js';
6
+ import { DEFAULT_FALLBACK_MODELS, VALID_FALLBACK_MODES, VALID_HEADLESS_ON_RATE_LIMIT, VALID_RESET_INTERVALS, DEFAULT_RETRY_POLICY, VALID_RETRY_STRATEGIES, DEFAULT_CIRCUIT_BREAKER_CONFIG, } from '../types/index.js';
7
7
  import { DEFAULT_HEALTH_TRACKER_CONFIG, DEFAULT_COOLDOWN_MS, DEFAULT_FALLBACK_MODE, DEFAULT_LOG_CONFIG, DEFAULT_METRICS_CONFIG, DEFAULT_CONFIG_RELOAD_CONFIG, DEFAULT_DYNAMIC_PRIORITIZATION_CONFIG, DEFAULT_ERROR_PATTERNS_CONFIG, DEFAULT_PATTERN_LEARNING_CONFIG, } from '../config/defaults.js';
8
8
  /**
9
9
  * Default plugin configuration
@@ -53,6 +53,7 @@ function validatePathSafety(path, allowedDirs) {
53
53
  */
54
54
  export function validateConfig(config) {
55
55
  const mode = config.fallbackMode;
56
+ const headlessOnRateLimit = config.headlessOnRateLimit;
56
57
  const resetInterval = config.metrics?.resetInterval;
57
58
  const strategy = config.retryPolicy?.strategy;
58
59
  return {
@@ -60,6 +61,7 @@ export function validateConfig(config) {
60
61
  ...config,
61
62
  fallbackModels: Array.isArray(config.fallbackModels) ? config.fallbackModels : DEFAULT_CONFIG.fallbackModels,
62
63
  fallbackMode: mode && VALID_FALLBACK_MODES.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
64
+ headlessOnRateLimit: headlessOnRateLimit && VALID_HEADLESS_ON_RATE_LIMIT.includes(headlessOnRateLimit) ? headlessOnRateLimit : undefined,
63
65
  retryPolicy: config.retryPolicy ? {
64
66
  ...DEFAULT_CONFIG.retryPolicy,
65
67
  ...config.retryPolicy,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@azumag/opencode-rate-limit-fallback",
3
- "version": "1.68.0",
3
+ "version": "1.70.0",
4
4
  "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",