@azumag/opencode-rate-limit-fallback 1.69.0 → 1.70.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -203,6 +203,7 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
203
203
  if (status?.type === "retry" && status?.message) {
204
204
  const message = status.message.toLowerCase();
205
205
  const isRateLimitRetry = message.includes("usage limit") ||
206
+ message.includes("usage exceeded") ||
206
207
  message.includes("rate limit") ||
207
208
  message.includes("high concurrency") ||
208
209
  message.includes("reduce concurrency");
@@ -282,6 +283,12 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
282
283
  }, CLEANUP_INTERVAL_MS);
283
284
  return {
284
285
  event: async ({ event }) => {
286
+ // Debug: log all events to identify how "Free usage exceeded" arrives
287
+ const rawEvt = event;
288
+ const evtJson = JSON.stringify(rawEvt, null, 0);
289
+ if (evtJson.toLowerCase().includes("exceeded") || evtJson.toLowerCase().includes("free usage") || evtJson.toLowerCase().includes("credits")) {
290
+ logger.info("DEBUG rate-limit-related event", { type: rawEvt.type, properties: rawEvt.properties });
291
+ }
285
292
  // Handle session.error events
286
293
  if (isSessionErrorEvent(event)) {
287
294
  const { sessionID, error } = event.properties;
@@ -329,6 +336,7 @@ export const RateLimitFallback = async ({ client, directory, worktree }) => {
329
336
  if (status?.type === "retry" && status?.message) {
330
337
  const message = status.message.toLowerCase();
331
338
  const isRateLimitRetry = message.includes("usage limit") ||
339
+ message.includes("usage exceeded") ||
332
340
  message.includes("rate limit") ||
333
341
  message.includes("high concurrency") ||
334
342
  message.includes("reduce concurrency");
@@ -42,6 +42,7 @@ export class ErrorPatternRegistry {
42
42
  'ratelimit',
43
43
  'too many requests',
44
44
  'quota exceeded',
45
+ 'usage exceeded',
45
46
  ],
46
47
  priority: 90,
47
48
  });
@@ -43,9 +43,10 @@ export declare class FallbackHandler {
43
43
  modelID: string;
44
44
  } | null;
45
45
  /**
46
- * Queue prompt asynchronously (non-blocking) to schedule fallback.
47
- * The server's retry loop finishes naturally; it then picks up the queued prompt.
48
- * We do NOT call abort its AbortController signal persists and kills the new stream.
46
+ * Abort current session, wait for server to settle, then queue fallback prompt.
47
+ * Abort stops the retry loop for permanent errors (e.g. "Free usage exceeded").
48
+ * The delay allows the server to reset session state / AbortController before
49
+ * the new promptAsync creates a fresh stream.
49
50
  */
50
51
  retryWithModel(targetSessionID: string, model: FallbackModel, parts: MessagePart[], hierarchy: SessionHierarchy | null): Promise<void>;
51
52
  /**
@@ -92,9 +92,10 @@ export class FallbackHandler {
92
92
  return tracked ? { providerID: tracked.providerID, modelID: tracked.modelID } : null;
93
93
  }
94
94
  /**
95
- * Queue prompt asynchronously (non-blocking) to schedule fallback.
96
- * The server's retry loop finishes naturally; it then picks up the queued prompt.
97
- * We do NOT call abort its AbortController signal persists and kills the new stream.
95
+ * Abort current session, wait for server to settle, then queue fallback prompt.
96
+ * Abort stops the retry loop for permanent errors (e.g. "Free usage exceeded").
97
+ * The delay allows the server to reset session state / AbortController before
98
+ * the new promptAsync creates a fresh stream.
98
99
  */
99
100
  async retryWithModel(targetSessionID, model, parts, hierarchy) {
100
101
  // Record model usage for dynamic prioritization
@@ -130,9 +131,21 @@ export class FallbackHandler {
130
131
  const modelKey = getModelKey(model.providerID, model.modelID);
131
132
  this.modelRequestStartTimes.set(modelKey, Date.now());
132
133
  }
133
- // Convert internal MessagePart to SDK-compatible format
134
+ // 1. Abort: stop the current retry loop
135
+ try {
136
+ await this.client.session.abort({ path: { id: targetSessionID } });
137
+ this.logger.info("Aborted session before fallback", { sessionID: targetSessionID });
138
+ }
139
+ catch (err) {
140
+ this.logger.warn("Failed to abort session before fallback", {
141
+ sessionID: targetSessionID,
142
+ error: err instanceof Error ? err.message : String(err),
143
+ });
144
+ }
145
+ // 2. Delay: let the server reset session state / AbortController
146
+ await new Promise(resolve => setTimeout(resolve, 500));
147
+ // 3. promptAsync: queue the fallback prompt (returns immediately)
134
148
  const sdkParts = convertPartsToSDKFormat(parts);
135
- // 1. promptAsync: queue the new prompt (returns immediately, non-blocking)
136
149
  await this.client.session.promptAsync({
137
150
  path: { id: targetSessionID },
138
151
  body: {
@@ -140,10 +153,6 @@ export class FallbackHandler {
140
153
  model: { providerID: model.providerID, modelID: model.modelID },
141
154
  },
142
155
  });
143
- // Do NOT call abort after promptAsync.
144
- // The AbortController signal persists and kills the newly queued stream too,
145
- // causing "interrupted" in TUI mode and server disposal in headless mode.
146
- // Let the server's retry loop finish naturally; it will pick up the queued prompt.
147
156
  await safeShowToast(this.client, {
148
157
  body: {
149
158
  title: "Fallback Queued",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@azumag/opencode-rate-limit-fallback",
3
- "version": "1.69.0",
3
+ "version": "1.70.0",
4
4
  "description": "OpenCode plugin that automatically switches to fallback models when rate limited",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",