npm - @azumag/opencode-rate-limit-fallback - Versions diffs - 1.69.0 → 1.70.0 - Mend

@azumag/opencode-rate-limit-fallback 1.69.0 → 1.70.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js +8 -0
package/dist/src/errors/PatternRegistry.js +1 -0
package/dist/src/fallback/FallbackHandler.d.ts +4 -3
package/dist/src/fallback/FallbackHandler.js +18 -9
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -203,6 +203,7 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
                         if (status?.type === "retry" && status?.message) {
                             const message = status.message.toLowerCase();
                             const isRateLimitRetry = message.includes("usage limit") ||
+                                message.includes("usage exceeded") ||
                                 message.includes("rate limit") ||
                                 message.includes("high concurrency") ||
                                 message.includes("reduce concurrency");
@@ -282,6 +283,12 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
     }, CLEANUP_INTERVAL_MS);
     return {
         event: async ({ event }) => {
+            // Debug: log all events to identify how "Free usage exceeded" arrives
+            const rawEvt = event;
+            const evtJson = JSON.stringify(rawEvt, null, 0);
+            if (evtJson.toLowerCase().includes("exceeded") || evtJson.toLowerCase().includes("free usage") || evtJson.toLowerCase().includes("credits")) {
+                logger.info("DEBUG rate-limit-related event", { type: rawEvt.type, properties: rawEvt.properties });
+            }
             // Handle session.error events
             if (isSessionErrorEvent(event)) {
                 const { sessionID, error } = event.properties;
@@ -329,6 +336,7 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
                 if (status?.type === "retry" && status?.message) {
                     const message = status.message.toLowerCase();
                     const isRateLimitRetry = message.includes("usage limit") ||
+                        message.includes("usage exceeded") ||
                         message.includes("rate limit") ||
                         message.includes("high concurrency") ||
                         message.includes("reduce concurrency");

package/dist/src/errors/PatternRegistry.js CHANGED Viewed

@@ -42,6 +42,7 @@ export class ErrorPatternRegistry {
                 'ratelimit',
                 'too many requests',
                 'quota exceeded',
+                'usage exceeded',
             ],
             priority: 90,
         });

package/dist/src/fallback/FallbackHandler.d.ts CHANGED Viewed

@@ -43,9 +43,10 @@ export declare class FallbackHandler {
         modelID: string;
     } | null;
     /**
-     * Queue prompt asynchronously (non-blocking) to schedule fallback.
-     * The server's retry loop finishes naturally; it then picks up the queued prompt.
-     * We do NOT call abort — its AbortController signal persists and kills the new stream.
+     * Abort current session, wait for server to settle, then queue fallback prompt.
+     * Abort stops the retry loop for permanent errors (e.g. "Free usage exceeded").
+     * The delay allows the server to reset session state / AbortController before
+     * the new promptAsync creates a fresh stream.
      */
     retryWithModel(targetSessionID: string, model: FallbackModel, parts: MessagePart[], hierarchy: SessionHierarchy | null): Promise<void>;
     /**

package/dist/src/fallback/FallbackHandler.js CHANGED Viewed

@@ -92,9 +92,10 @@ export class FallbackHandler {
         return tracked ? { providerID: tracked.providerID, modelID: tracked.modelID } : null;
     }
     /**
-     * Queue prompt asynchronously (non-blocking) to schedule fallback.
-     * The server's retry loop finishes naturally; it then picks up the queued prompt.
-     * We do NOT call abort — its AbortController signal persists and kills the new stream.
+     * Abort current session, wait for server to settle, then queue fallback prompt.
+     * Abort stops the retry loop for permanent errors (e.g. "Free usage exceeded").
+     * The delay allows the server to reset session state / AbortController before
+     * the new promptAsync creates a fresh stream.
      */
     async retryWithModel(targetSessionID, model, parts, hierarchy) {
         // Record model usage for dynamic prioritization
@@ -130,9 +131,21 @@ export class FallbackHandler {
             const modelKey = getModelKey(model.providerID, model.modelID);
             this.modelRequestStartTimes.set(modelKey, Date.now());
         }
-        // Convert internal MessagePart to SDK-compatible format
+        // 1. Abort: stop the current retry loop
+        try {
+            await this.client.session.abort({ path: { id: targetSessionID } });
+            this.logger.info("Aborted session before fallback", { sessionID: targetSessionID });
+        }
+        catch (err) {
+            this.logger.warn("Failed to abort session before fallback", {
+                sessionID: targetSessionID,
+                error: err instanceof Error ? err.message : String(err),
+            });
+        }
+        // 2. Delay: let the server reset session state / AbortController
+        await new Promise(resolve => setTimeout(resolve, 500));
+        // 3. promptAsync: queue the fallback prompt (returns immediately)
         const sdkParts = convertPartsToSDKFormat(parts);
-        // 1. promptAsync: queue the new prompt (returns immediately, non-blocking)
         await this.client.session.promptAsync({
             path: { id: targetSessionID },
             body: {
@@ -140,10 +153,6 @@ export class FallbackHandler {
                 model: { providerID: model.providerID, modelID: model.modelID },
             },
         });
-        // Do NOT call abort after promptAsync.
-        // The AbortController signal persists and kills the newly queued stream too,
-        // causing "interrupted" in TUI mode and server disposal in headless mode.
-        // Let the server's retry loop finish naturally; it will pick up the queued prompt.
         await safeShowToast(this.client, {
             body: {
                 title: "Fallback Queued",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@azumag/opencode-rate-limit-fallback",
-  "version": "1.69.0",
+  "version": "1.70.0",
   "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",