npm - @azumag/opencode-rate-limit-fallback - Versions diffs - 1.68.0 → 1.70.0 - Mend

@azumag/opencode-rate-limit-fallback 1.68.0 → 1.70.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +22 -0
package/dist/index.js +63 -1
package/dist/src/errors/PatternRegistry.js +1 -0
package/dist/src/fallback/FallbackHandler.d.ts +4 -3
package/dist/src/fallback/FallbackHandler.js +18 -9
package/dist/src/types/index.d.ts +11 -0
package/dist/src/types/index.js +4 -0
package/dist/src/utils/config.js +3 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -10,6 +10,7 @@ OpenCode plugin that automatically switches to fallback models when rate limited
 - Automatically aborts the current request and retries with a fallback model
 - Configurable fallback model list with priority order
 - Three fallback modes: `cycle`, `stop`, and `retry-last`
+- **Headless mode support** (`opencode run`): disable fallback or abort on rate limit
 - Session model tracking for sequential fallback across multiple rate limits
 - Cooldown period to prevent immediate retry on rate-limited models
 - **Exponential backoff with configurable retry policies**
@@ -117,6 +118,7 @@ Create a configuration file at one of these locations:
   | `enabled` | boolean | `true` | Enable/disable the plugin |
   | `cooldownMs` | number | `60000` | Cooldown period (ms) before retrying a rate-limited model |
   | `fallbackMode` | string | `"cycle"` | Behavior when all models are exhausted (see below) |
+  | `headlessOnRateLimit` | string | `undefined` | Headless mode behavior on rate limit (see below) |
   | `fallbackModels` | array | See below | List of fallback models in priority order |
   | `maxSubagentDepth` | number | `10` | Maximum nesting depth for subagent hierarchies |
    | `enableSubagentFallback` | boolean | `true` | Enable/disable fallback for subagent sessions |
@@ -236,6 +238,26 @@ my-repo/
 > **Note**: If you're using git worktrees and want different configurations per worktree, create config files in the worktree directories (locations 1-2). Otherwise, a single project-level or global config is sufficient.
+### Headless Mode (`opencode run`)
+When running in headless mode (no TUI), model fallback is disabled by default because headless sessions should use their configured model only.
+You can control what happens when a rate limit is detected in headless mode using the `headlessOnRateLimit` option:
+| Value | Description |
+|-------|-------------|
+| *(not set)* | Default behavior — do nothing, let the server's retry loop handle it |
+| `"ignore"` | Same as default — do nothing |
+| `"abort"` | Abort the session immediately to terminate the prompt |
+The `"abort"` option is useful when you want `opencode run` to fail fast on rate limits rather than waiting for the server's retry loop, which may retry indefinitely.
+```json
+{
+  "headlessOnRateLimit": "abort"
+}
+```
 ### Fallback Modes
 | Mode | Description |

package/dist/index.js CHANGED Viewed

@@ -158,8 +158,63 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
     if (!config.enabled) {
         return {};
     }
-    // Disable fallback in headless mode — headless sessions should use their configured model only
+    // Headless mode — no model fallback, but optionally abort on rate limit
     if (isHeadless) {
+        if (config.headlessOnRateLimit === "abort") {
+            logger.info("Headless mode — will abort session on rate limit");
+            // Minimal setup: only error pattern detection + abort
+            const errorPatternRegistry = new ErrorPatternRegistry(logger);
+            if (config.errorPatterns?.custom) {
+                errorPatternRegistry.registerMany(config.errorPatterns.custom);
+            }
+            // Track sessions already aborted to avoid duplicate abort calls
+            const abortedSessions = new Set();
+            const abortSession = async (sessionID, source) => {
+                if (abortedSessions.has(sessionID))
+                    return;
+                abortedSessions.add(sessionID);
+                logger.info(`Rate limit detected (${source}) — aborting session ${sessionID}`);
+                try {
+                    await client.session.abort({ path: { id: sessionID } });
+                }
+                catch (err) {
+                    logger.warn(`Failed to abort session ${sessionID}`, {
+                        error: err instanceof Error ? err.message : String(err),
+                    });
+                }
+            };
+            return {
+                event: async ({ event }) => {
+                    if (isSessionErrorEvent(event)) {
+                        const { sessionID, error } = event.properties;
+                        if (sessionID && error && errorPatternRegistry.isRateLimitError(error)) {
+                            await abortSession(sessionID, "session.error");
+                        }
+                    }
+                    if (isMessageUpdatedEvent(event)) {
+                        const info = event.properties.info;
+                        if (info?.error && errorPatternRegistry.isRateLimitError(info.error)) {
+                            await abortSession(info.sessionID, "message.updated");
+                        }
+                    }
+                    if (isSessionStatusEvent(event)) {
+                        const props = event.properties;
+                        const status = props?.status;
+                        if (status?.type === "retry" && status?.message) {
+                            const message = status.message.toLowerCase();
+                            const isRateLimitRetry = message.includes("usage limit") ||
+                                message.includes("usage exceeded") ||
+                                message.includes("rate limit") ||
+                                message.includes("high concurrency") ||
+                                message.includes("reduce concurrency");
+                            if (isRateLimitRetry) {
+                                await abortSession(props.sessionID, "session.status retry");
+                            }
+                        }
+                    }
+                },
+            };
+        }
         logger.info("Headless mode detected — model fallback disabled");
         return {};
     }
@@ -228,6 +283,12 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
     }, CLEANUP_INTERVAL_MS);
     return {
         event: async ({ event }) => {
+            // Debug: log all events to identify how "Free usage exceeded" arrives
+            const rawEvt = event;
+            const evtJson = JSON.stringify(rawEvt, null, 0);
+            if (evtJson.toLowerCase().includes("exceeded") || evtJson.toLowerCase().includes("free usage") || evtJson.toLowerCase().includes("credits")) {
+                logger.info("DEBUG rate-limit-related event", { type: rawEvt.type, properties: rawEvt.properties });
+            }
             // Handle session.error events
             if (isSessionErrorEvent(event)) {
                 const { sessionID, error } = event.properties;
@@ -275,6 +336,7 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
                 if (status?.type === "retry" && status?.message) {
                     const message = status.message.toLowerCase();
                     const isRateLimitRetry = message.includes("usage limit") ||
+                        message.includes("usage exceeded") ||
                         message.includes("rate limit") ||
                         message.includes("high concurrency") ||
                         message.includes("reduce concurrency");

package/dist/src/errors/PatternRegistry.js CHANGED Viewed

@@ -42,6 +42,7 @@ export class ErrorPatternRegistry {
                 'ratelimit',
                 'too many requests',
                 'quota exceeded',
+                'usage exceeded',
             ],
             priority: 90,
         });

package/dist/src/fallback/FallbackHandler.d.ts CHANGED Viewed

@@ -43,9 +43,10 @@ export declare class FallbackHandler {
         modelID: string;
     } | null;
     /**
-     * Queue prompt asynchronously (non-blocking) to schedule fallback.
-     * The server's retry loop finishes naturally; it then picks up the queued prompt.
-     * We do NOT call abort — its AbortController signal persists and kills the new stream.
+     * Abort current session, wait for server to settle, then queue fallback prompt.
+     * Abort stops the retry loop for permanent errors (e.g. "Free usage exceeded").
+     * The delay allows the server to reset session state / AbortController before
+     * the new promptAsync creates a fresh stream.
      */
     retryWithModel(targetSessionID: string, model: FallbackModel, parts: MessagePart[], hierarchy: SessionHierarchy | null): Promise<void>;
     /**

package/dist/src/fallback/FallbackHandler.js CHANGED Viewed

@@ -92,9 +92,10 @@ export class FallbackHandler {
         return tracked ? { providerID: tracked.providerID, modelID: tracked.modelID } : null;
     }
     /**
-     * Queue prompt asynchronously (non-blocking) to schedule fallback.
-     * The server's retry loop finishes naturally; it then picks up the queued prompt.
-     * We do NOT call abort — its AbortController signal persists and kills the new stream.
+     * Abort current session, wait for server to settle, then queue fallback prompt.
+     * Abort stops the retry loop for permanent errors (e.g. "Free usage exceeded").
+     * The delay allows the server to reset session state / AbortController before
+     * the new promptAsync creates a fresh stream.
      */
     async retryWithModel(targetSessionID, model, parts, hierarchy) {
         // Record model usage for dynamic prioritization
@@ -130,9 +131,21 @@ export class FallbackHandler {
             const modelKey = getModelKey(model.providerID, model.modelID);
             this.modelRequestStartTimes.set(modelKey, Date.now());
         }
-        // Convert internal MessagePart to SDK-compatible format
+        // 1. Abort: stop the current retry loop
+        try {
+            await this.client.session.abort({ path: { id: targetSessionID } });
+            this.logger.info("Aborted session before fallback", { sessionID: targetSessionID });
+        }
+        catch (err) {
+            this.logger.warn("Failed to abort session before fallback", {
+                sessionID: targetSessionID,
+                error: err instanceof Error ? err.message : String(err),
+            });
+        }
+        // 2. Delay: let the server reset session state / AbortController
+        await new Promise(resolve => setTimeout(resolve, 500));
+        // 3. promptAsync: queue the fallback prompt (returns immediately)
         const sdkParts = convertPartsToSDKFormat(parts);
-        // 1. promptAsync: queue the new prompt (returns immediately, non-blocking)
         await this.client.session.promptAsync({
             path: { id: targetSessionID },
             body: {
@@ -140,10 +153,6 @@ export class FallbackHandler {
                 model: { providerID: model.providerID, modelID: model.modelID },
             },
         });
-        // Do NOT call abort after promptAsync.
-        // The AbortController signal persists and kills the newly queued stream too,
-        // causing "interrupted" in TUI mode and server disposal in headless mode.
-        // Let the server's retry loop finish naturally; it will pick up the queued prompt.
         await safeShowToast(this.client, {
             body: {
                 title: "Fallback Queued",

package/dist/src/types/index.d.ts CHANGED Viewed

@@ -17,6 +17,12 @@ export interface FallbackModel {
  * - "retry-last": Try the last model once, then reset to first on next prompt
  */
 export type FallbackMode = "cycle" | "stop" | "retry-last";
+/**
+ * Headless mode behavior on rate limit:
+ * - "ignore": Do nothing, let server handle retries (default)
+ * - "abort": Abort the session to terminate the prompt immediately
+ */
+export type HeadlessOnRateLimit = "ignore" | "abort";
 /**
  * Retry strategy type
  * - "immediate": Retry immediately without delay
@@ -234,6 +240,7 @@ export interface PluginConfig {
     cooldownMs: number;
     enabled: boolean;
     fallbackMode: FallbackMode;
+    headlessOnRateLimit?: HeadlessOnRateLimit;
     maxSubagentDepth?: number;
     enableSubagentFallback?: boolean;
     retryPolicy?: RetryPolicy;
@@ -547,6 +554,10 @@ export declare const DEFAULT_CIRCUIT_BREAKER_CONFIG: CircuitBreakerConfig;
  * Valid fallback modes
  */
 export declare const VALID_FALLBACK_MODES: FallbackMode[];
+/**
+ * Valid headless on rate limit options
+ */
+export declare const VALID_HEADLESS_ON_RATE_LIMIT: HeadlessOnRateLimit[];
 /**
  * Valid retry strategies
  */

package/dist/src/types/index.js CHANGED Viewed

@@ -46,6 +46,10 @@ export const DEFAULT_CIRCUIT_BREAKER_CONFIG = {
  * Valid fallback modes
  */
 export const VALID_FALLBACK_MODES = ["cycle", "stop", "retry-last"];
+/**
+ * Valid headless on rate limit options
+ */
+export const VALID_HEADLESS_ON_RATE_LIMIT = ["ignore", "abort"];
 /**
  * Valid retry strategies
  */

package/dist/src/utils/config.js CHANGED Viewed

@@ -3,7 +3,7 @@
  */
 import { existsSync, readFileSync } from "fs";
 import { join, resolve, normalize, relative } from "path";
-import { DEFAULT_FALLBACK_MODELS, VALID_FALLBACK_MODES, VALID_RESET_INTERVALS, DEFAULT_RETRY_POLICY, VALID_RETRY_STRATEGIES, DEFAULT_CIRCUIT_BREAKER_CONFIG, } from '../types/index.js';
+import { DEFAULT_FALLBACK_MODELS, VALID_FALLBACK_MODES, VALID_HEADLESS_ON_RATE_LIMIT, VALID_RESET_INTERVALS, DEFAULT_RETRY_POLICY, VALID_RETRY_STRATEGIES, DEFAULT_CIRCUIT_BREAKER_CONFIG, } from '../types/index.js';
 import { DEFAULT_HEALTH_TRACKER_CONFIG, DEFAULT_COOLDOWN_MS, DEFAULT_FALLBACK_MODE, DEFAULT_LOG_CONFIG, DEFAULT_METRICS_CONFIG, DEFAULT_CONFIG_RELOAD_CONFIG, DEFAULT_DYNAMIC_PRIORITIZATION_CONFIG, DEFAULT_ERROR_PATTERNS_CONFIG, DEFAULT_PATTERN_LEARNING_CONFIG, } from '../config/defaults.js';
 /**
  * Default plugin configuration
@@ -53,6 +53,7 @@ function validatePathSafety(path, allowedDirs) {
  */
 export function validateConfig(config) {
     const mode = config.fallbackMode;
+    const headlessOnRateLimit = config.headlessOnRateLimit;
     const resetInterval = config.metrics?.resetInterval;
     const strategy = config.retryPolicy?.strategy;
     return {
@@ -60,6 +61,7 @@ export function validateConfig(config) {
         ...config,
         fallbackModels: Array.isArray(config.fallbackModels) ? config.fallbackModels : DEFAULT_CONFIG.fallbackModels,
         fallbackMode: mode && VALID_FALLBACK_MODES.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
+        headlessOnRateLimit: headlessOnRateLimit && VALID_HEADLESS_ON_RATE_LIMIT.includes(headlessOnRateLimit) ? headlessOnRateLimit : undefined,
         retryPolicy: config.retryPolicy ? {
             ...DEFAULT_CONFIG.retryPolicy,
             ...config.retryPolicy,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@azumag/opencode-rate-limit-fallback",
-  "version": "1.68.0",
+  "version": "1.70.0",
   "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",