@azumag/opencode-rate-limit-fallback 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +49 -8
- package/package.json +1 -1
package/index.ts
CHANGED
|
@@ -76,7 +76,9 @@ function isRateLimitError(error: any): boolean {
|
|
|
76
76
|
"quota exceeded",
|
|
77
77
|
"resource exhausted",
|
|
78
78
|
"usage limit",
|
|
79
|
-
"
|
|
79
|
+
"high concurrency usage of this api",
|
|
80
|
+
"high concurrency",
|
|
81
|
+
"reduce concurrency",
|
|
80
82
|
"429",
|
|
81
83
|
];
|
|
82
84
|
|
|
@@ -97,6 +99,8 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
97
99
|
|
|
98
100
|
const rateLimitedModels = new Map<string, number>();
|
|
99
101
|
const retryState = new Map<string, { attemptedModels: Set<string>; lastAttemptTime: number }>();
|
|
102
|
+
const currentSessionModel = new Map<string, { providerID: string; modelID: string }>();
|
|
103
|
+
const fallbackInProgress = new Map<string, number>(); // sessionID -> timestamp
|
|
100
104
|
|
|
101
105
|
function isModelRateLimited(providerID: string, modelID: string): boolean {
|
|
102
106
|
const key = getModelKey(providerID, modelID);
|
|
@@ -140,12 +144,28 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
140
144
|
|
|
141
145
|
async function handleRateLimitFallback(sessionID: string, currentProviderID: string, currentModelID: string) {
|
|
142
146
|
try {
|
|
147
|
+
// Prevent duplicate fallback processing within 5 seconds
|
|
148
|
+
const lastFallback = fallbackInProgress.get(sessionID);
|
|
149
|
+
if (lastFallback && Date.now() - lastFallback < 5000) {
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
fallbackInProgress.set(sessionID, Date.now());
|
|
153
|
+
|
|
154
|
+
// If no model info provided, try to get from tracked session model
|
|
155
|
+
if (!currentProviderID || !currentModelID) {
|
|
156
|
+
const tracked = currentSessionModel.get(sessionID);
|
|
157
|
+
if (tracked) {
|
|
158
|
+
currentProviderID = tracked.providerID;
|
|
159
|
+
currentModelID = tracked.modelID;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
143
163
|
await client.session.abort({ path: { id: sessionID } });
|
|
144
164
|
|
|
145
165
|
await client.tui.showToast({
|
|
146
166
|
body: {
|
|
147
167
|
title: "Rate Limit Detected",
|
|
148
|
-
message:
|
|
168
|
+
message: `Switching from ${currentModelID || 'current model'}...`,
|
|
149
169
|
variant: "warning",
|
|
150
170
|
duration: 3000,
|
|
151
171
|
},
|
|
@@ -171,7 +191,17 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
171
191
|
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
172
192
|
}
|
|
173
193
|
|
|
174
|
-
|
|
194
|
+
let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
|
|
195
|
+
|
|
196
|
+
// If no model found and we've attempted models, reset and try again from the beginning
|
|
197
|
+
if (!nextModel && state.attemptedModels.size > 0) {
|
|
198
|
+
state.attemptedModels.clear();
|
|
199
|
+
// Keep the current model marked as attempted to avoid immediate retry
|
|
200
|
+
if (currentProviderID && currentModelID) {
|
|
201
|
+
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
202
|
+
}
|
|
203
|
+
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
204
|
+
}
|
|
175
205
|
|
|
176
206
|
if (!nextModel) {
|
|
177
207
|
await client.tui.showToast({
|
|
@@ -209,6 +239,9 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
209
239
|
},
|
|
210
240
|
});
|
|
211
241
|
|
|
242
|
+
// Track the new model for this session
|
|
243
|
+
currentSessionModel.set(sessionID, { providerID: nextModel.providerID, modelID: nextModel.modelID });
|
|
244
|
+
|
|
212
245
|
await client.session.prompt({
|
|
213
246
|
path: { id: sessionID },
|
|
214
247
|
body: {
|
|
@@ -227,8 +260,11 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
227
260
|
});
|
|
228
261
|
|
|
229
262
|
retryState.delete(stateKey);
|
|
263
|
+
// Clear fallback flag to allow next fallback if needed
|
|
264
|
+
fallbackInProgress.delete(sessionID);
|
|
230
265
|
} catch (err) {
|
|
231
|
-
// Fallback failed
|
|
266
|
+
// Fallback failed, clear the flag
|
|
267
|
+
fallbackInProgress.delete(sessionID);
|
|
232
268
|
}
|
|
233
269
|
}
|
|
234
270
|
|
|
@@ -254,10 +290,15 @@ export const RateLimitFallback: Plugin = async ({ client, directory }) => {
|
|
|
254
290
|
|
|
255
291
|
if (status?.type === "retry" && status?.message) {
|
|
256
292
|
const message = status.message.toLowerCase();
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
293
|
+
const isRateLimitRetry =
|
|
294
|
+
message.includes("usage limit") ||
|
|
295
|
+
message.includes("rate limit") ||
|
|
296
|
+
message.includes("high concurrency") ||
|
|
297
|
+
message.includes("reduce concurrency");
|
|
298
|
+
|
|
299
|
+
if (isRateLimitRetry) {
|
|
300
|
+
// Try fallback on any attempt, handleRateLimitFallback will manage state
|
|
301
|
+
await handleRateLimitFallback(props.sessionID, "", "");
|
|
261
302
|
}
|
|
262
303
|
}
|
|
263
304
|
}
|