@azumag/opencode-rate-limit-fallback 1.19.2 → 1.21.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +132 -1
- package/dist/index.d.ts +8 -1
- package/dist/index.js +59 -968
- package/dist/logger.d.ts +0 -1
- package/dist/logger.js +0 -1
- package/dist/src/fallback/FallbackHandler.d.ts +73 -0
- package/dist/src/fallback/FallbackHandler.js +341 -0
- package/dist/src/fallback/ModelSelector.d.ts +37 -0
- package/dist/src/fallback/ModelSelector.js +134 -0
- package/dist/src/metrics/MetricsManager.d.ts +81 -0
- package/dist/src/metrics/MetricsManager.js +377 -0
- package/dist/src/metrics/types.d.ts +11 -0
- package/dist/src/metrics/types.js +11 -0
- package/dist/src/session/SubagentTracker.d.ts +36 -0
- package/dist/src/session/SubagentTracker.js +114 -0
- package/dist/src/types/index.d.ts +262 -0
- package/dist/src/types/index.js +46 -0
- package/dist/src/utils/config.d.ts +16 -0
- package/dist/src/utils/config.js +78 -0
- package/dist/src/utils/errorDetection.d.ts +7 -0
- package/dist/src/utils/errorDetection.js +34 -0
- package/dist/src/utils/helpers.d.ts +34 -0
- package/dist/src/utils/helpers.js +95 -0
- package/package.json +3 -2
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/logger.d.ts.map +0 -1
- package/dist/logger.js.map +0 -1
package/dist/index.js
CHANGED
|
@@ -1,7 +1,23 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Rate Limit Fallback Plugin - Main entry point
|
|
3
|
+
*
|
|
4
|
+
* This plugin automatically switches to fallback models when rate limited
|
|
5
|
+
*/
|
|
6
|
+
import { existsSync, readFileSync } from "fs";
|
|
2
7
|
import { join } from "path";
|
|
3
8
|
import { createLogger } from "./logger.js";
|
|
4
|
-
|
|
9
|
+
import { MetricsManager } from "./src/metrics/MetricsManager.js";
|
|
10
|
+
import { FallbackHandler } from "./src/fallback/FallbackHandler.js";
|
|
11
|
+
import { loadConfig } from "./src/utils/config.js";
|
|
12
|
+
import { isRateLimitError } from "./src/utils/errorDetection.js";
|
|
13
|
+
import { initSubagentTracker, registerSubagent, getRootSession, getHierarchy, cleanupStaleEntries as clearHierarchyEntries, clearAll as clearAllHierarchies, } from "./src/session/SubagentTracker.js";
|
|
14
|
+
import { CLEANUP_INTERVAL_MS } from "./src/types/index.js";
|
|
15
|
+
// ============================================================================
|
|
16
|
+
// Event Type Guards
|
|
17
|
+
// ============================================================================
|
|
18
|
+
/**
|
|
19
|
+
* Check if event is a session error event
|
|
20
|
+
*/
|
|
5
21
|
function isSessionErrorEvent(event) {
|
|
6
22
|
return event.type === "session.error" &&
|
|
7
23
|
typeof event.properties === "object" &&
|
|
@@ -9,18 +25,26 @@ function isSessionErrorEvent(event) {
|
|
|
9
25
|
"sessionID" in event.properties &&
|
|
10
26
|
"error" in event.properties;
|
|
11
27
|
}
|
|
28
|
+
/**
|
|
29
|
+
* Check if event is a message updated event
|
|
30
|
+
*/
|
|
12
31
|
function isMessageUpdatedEvent(event) {
|
|
13
32
|
return event.type === "message.updated" &&
|
|
14
33
|
typeof event.properties === "object" &&
|
|
15
34
|
event.properties !== null &&
|
|
16
35
|
"info" in event.properties;
|
|
17
36
|
}
|
|
37
|
+
/**
|
|
38
|
+
* Check if event is a session status event
|
|
39
|
+
*/
|
|
18
40
|
function isSessionStatusEvent(event) {
|
|
19
41
|
return event.type === "session.status" &&
|
|
20
42
|
typeof event.properties === "object" &&
|
|
21
43
|
event.properties !== null;
|
|
22
44
|
}
|
|
23
|
-
|
|
45
|
+
/**
|
|
46
|
+
* Check if event is a subagent session created event
|
|
47
|
+
*/
|
|
24
48
|
function isSubagentSessionCreatedEvent(event) {
|
|
25
49
|
return event.type === "subagent.session.created" &&
|
|
26
50
|
typeof event.properties === "object" &&
|
|
@@ -28,474 +52,9 @@ function isSubagentSessionCreatedEvent(event) {
|
|
|
28
52
|
"sessionID" in event.properties &&
|
|
29
53
|
"parentSessionID" in event.properties;
|
|
30
54
|
}
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
{ providerID: "google", modelID: "gemini-2.5-flash" },
|
|
35
|
-
];
|
|
36
|
-
const VALID_FALLBACK_MODES = ["cycle", "stop", "retry-last"];
|
|
37
|
-
const VALID_RESET_INTERVALS = ["hourly", "daily", "weekly"];
|
|
38
|
-
const RESET_INTERVAL_MS = {
|
|
39
|
-
hourly: 60 * 60 * 1000,
|
|
40
|
-
daily: 24 * 60 * 60 * 1000,
|
|
41
|
-
weekly: 7 * 24 * 60 * 60 * 1000,
|
|
42
|
-
};
|
|
43
|
-
// Metrics management
|
|
44
|
-
class MetricsManager {
|
|
45
|
-
metrics;
|
|
46
|
-
config;
|
|
47
|
-
logger;
|
|
48
|
-
resetTimer = null;
|
|
49
|
-
constructor(config, logger) {
|
|
50
|
-
this.config = config;
|
|
51
|
-
this.logger = logger;
|
|
52
|
-
this.metrics = {
|
|
53
|
-
rateLimits: new Map(),
|
|
54
|
-
fallbacks: {
|
|
55
|
-
total: 0,
|
|
56
|
-
successful: 0,
|
|
57
|
-
failed: 0,
|
|
58
|
-
averageDuration: 0,
|
|
59
|
-
byTargetModel: new Map(),
|
|
60
|
-
},
|
|
61
|
-
modelPerformance: new Map(),
|
|
62
|
-
startedAt: Date.now(),
|
|
63
|
-
generatedAt: Date.now(),
|
|
64
|
-
};
|
|
65
|
-
if (this.config.enabled) {
|
|
66
|
-
this.startResetTimer();
|
|
67
|
-
}
|
|
68
|
-
}
|
|
69
|
-
startResetTimer() {
|
|
70
|
-
if (this.resetTimer) {
|
|
71
|
-
clearInterval(this.resetTimer);
|
|
72
|
-
}
|
|
73
|
-
const intervalMs = RESET_INTERVAL_MS[this.config.resetInterval];
|
|
74
|
-
this.resetTimer = setInterval(() => {
|
|
75
|
-
this.reset();
|
|
76
|
-
}, intervalMs);
|
|
77
|
-
}
|
|
78
|
-
reset() {
|
|
79
|
-
this.metrics = {
|
|
80
|
-
rateLimits: new Map(),
|
|
81
|
-
fallbacks: {
|
|
82
|
-
total: 0,
|
|
83
|
-
successful: 0,
|
|
84
|
-
failed: 0,
|
|
85
|
-
averageDuration: 0,
|
|
86
|
-
byTargetModel: new Map(),
|
|
87
|
-
},
|
|
88
|
-
modelPerformance: new Map(),
|
|
89
|
-
startedAt: Date.now(),
|
|
90
|
-
generatedAt: Date.now(),
|
|
91
|
-
};
|
|
92
|
-
this.logger.debug("Metrics reset");
|
|
93
|
-
}
|
|
94
|
-
recordRateLimit(providerID, modelID) {
|
|
95
|
-
if (!this.config.enabled)
|
|
96
|
-
return;
|
|
97
|
-
const key = getModelKey(providerID, modelID);
|
|
98
|
-
const now = Date.now();
|
|
99
|
-
const existing = this.metrics.rateLimits.get(key);
|
|
100
|
-
if (existing) {
|
|
101
|
-
const intervalMs = now - existing.lastOccurrence;
|
|
102
|
-
existing.count++;
|
|
103
|
-
existing.lastOccurrence = now;
|
|
104
|
-
existing.averageInterval = existing.averageInterval
|
|
105
|
-
? (existing.averageInterval + intervalMs) / 2
|
|
106
|
-
: intervalMs;
|
|
107
|
-
this.metrics.rateLimits.set(key, existing);
|
|
108
|
-
}
|
|
109
|
-
else {
|
|
110
|
-
this.metrics.rateLimits.set(key, {
|
|
111
|
-
count: 1,
|
|
112
|
-
firstOccurrence: now,
|
|
113
|
-
lastOccurrence: now,
|
|
114
|
-
});
|
|
115
|
-
}
|
|
116
|
-
}
|
|
117
|
-
recordFallbackStart() {
|
|
118
|
-
if (!this.config.enabled)
|
|
119
|
-
return 0;
|
|
120
|
-
return Date.now();
|
|
121
|
-
}
|
|
122
|
-
recordFallbackSuccess(targetProviderID, targetModelID, startTime) {
|
|
123
|
-
if (!this.config.enabled)
|
|
124
|
-
return;
|
|
125
|
-
const duration = Date.now() - startTime;
|
|
126
|
-
const key = getModelKey(targetProviderID, targetModelID);
|
|
127
|
-
this.metrics.fallbacks.total++;
|
|
128
|
-
this.metrics.fallbacks.successful++;
|
|
129
|
-
// Update average duration
|
|
130
|
-
const totalDuration = this.metrics.fallbacks.averageDuration * (this.metrics.fallbacks.successful - 1);
|
|
131
|
-
this.metrics.fallbacks.averageDuration = (totalDuration + duration) / this.metrics.fallbacks.successful;
|
|
132
|
-
// Update target model metrics
|
|
133
|
-
const targetMetrics = this.metrics.fallbacks.byTargetModel.get(key) || {
|
|
134
|
-
usedAsFallback: 0,
|
|
135
|
-
successful: 0,
|
|
136
|
-
failed: 0,
|
|
137
|
-
};
|
|
138
|
-
targetMetrics.usedAsFallback++;
|
|
139
|
-
targetMetrics.successful++;
|
|
140
|
-
this.metrics.fallbacks.byTargetModel.set(key, targetMetrics);
|
|
141
|
-
}
|
|
142
|
-
recordFallbackFailure() {
|
|
143
|
-
if (!this.config.enabled)
|
|
144
|
-
return;
|
|
145
|
-
this.metrics.fallbacks.total++;
|
|
146
|
-
this.metrics.fallbacks.failed++;
|
|
147
|
-
}
|
|
148
|
-
recordModelRequest(providerID, modelID) {
|
|
149
|
-
if (!this.config.enabled)
|
|
150
|
-
return;
|
|
151
|
-
const key = getModelKey(providerID, modelID);
|
|
152
|
-
const existing = this.metrics.modelPerformance.get(key) || {
|
|
153
|
-
requests: 0,
|
|
154
|
-
successes: 0,
|
|
155
|
-
failures: 0,
|
|
156
|
-
};
|
|
157
|
-
existing.requests++;
|
|
158
|
-
this.metrics.modelPerformance.set(key, existing);
|
|
159
|
-
}
|
|
160
|
-
recordModelSuccess(providerID, modelID, responseTime) {
|
|
161
|
-
if (!this.config.enabled)
|
|
162
|
-
return;
|
|
163
|
-
const key = getModelKey(providerID, modelID);
|
|
164
|
-
const existing = this.metrics.modelPerformance.get(key) || {
|
|
165
|
-
requests: 0,
|
|
166
|
-
successes: 0,
|
|
167
|
-
failures: 0,
|
|
168
|
-
};
|
|
169
|
-
existing.successes++;
|
|
170
|
-
// Update average response time
|
|
171
|
-
const totalTime = (existing.averageResponseTime || 0) * (existing.successes - 1);
|
|
172
|
-
existing.averageResponseTime = (totalTime + responseTime) / existing.successes;
|
|
173
|
-
this.metrics.modelPerformance.set(key, existing);
|
|
174
|
-
}
|
|
175
|
-
recordModelFailure(providerID, modelID) {
|
|
176
|
-
if (!this.config.enabled)
|
|
177
|
-
return;
|
|
178
|
-
const key = getModelKey(providerID, modelID);
|
|
179
|
-
const existing = this.metrics.modelPerformance.get(key) || {
|
|
180
|
-
requests: 0,
|
|
181
|
-
successes: 0,
|
|
182
|
-
failures: 0,
|
|
183
|
-
};
|
|
184
|
-
existing.failures++;
|
|
185
|
-
this.metrics.modelPerformance.set(key, existing);
|
|
186
|
-
}
|
|
187
|
-
getMetrics() {
|
|
188
|
-
this.metrics.generatedAt = Date.now();
|
|
189
|
-
return { ...this.metrics };
|
|
190
|
-
}
|
|
191
|
-
export(format = "json") {
|
|
192
|
-
const metrics = this.getMetrics();
|
|
193
|
-
switch (format) {
|
|
194
|
-
case "pretty":
|
|
195
|
-
return this.exportPretty(metrics);
|
|
196
|
-
case "csv":
|
|
197
|
-
return this.exportCSV(metrics);
|
|
198
|
-
case "json":
|
|
199
|
-
default:
|
|
200
|
-
return JSON.stringify(this.toPlainObject(metrics), null, 2);
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
toPlainObject(metrics) {
|
|
204
|
-
return {
|
|
205
|
-
rateLimits: Object.fromEntries(Array.from(metrics.rateLimits.entries()).map(([k, v]) => [k, v])),
|
|
206
|
-
fallbacks: {
|
|
207
|
-
...metrics.fallbacks,
|
|
208
|
-
byTargetModel: Object.fromEntries(Array.from(metrics.fallbacks.byTargetModel.entries()).map(([k, v]) => [k, v])),
|
|
209
|
-
},
|
|
210
|
-
modelPerformance: Object.fromEntries(Array.from(metrics.modelPerformance.entries()).map(([k, v]) => [k, v])),
|
|
211
|
-
startedAt: metrics.startedAt,
|
|
212
|
-
generatedAt: metrics.generatedAt,
|
|
213
|
-
};
|
|
214
|
-
}
|
|
215
|
-
exportPretty(metrics) {
|
|
216
|
-
const lines = [];
|
|
217
|
-
lines.push("=".repeat(60));
|
|
218
|
-
lines.push("Rate Limit Fallback Metrics");
|
|
219
|
-
lines.push("=".repeat(60));
|
|
220
|
-
lines.push(`Started: ${new Date(metrics.startedAt).toISOString()}`);
|
|
221
|
-
lines.push(`Generated: ${new Date(metrics.generatedAt).toISOString()}`);
|
|
222
|
-
lines.push("");
|
|
223
|
-
// Rate Limits
|
|
224
|
-
lines.push("Rate Limits:");
|
|
225
|
-
lines.push("-".repeat(40));
|
|
226
|
-
if (metrics.rateLimits.size === 0) {
|
|
227
|
-
lines.push(" No rate limits recorded");
|
|
228
|
-
}
|
|
229
|
-
else {
|
|
230
|
-
for (const [model, data] of metrics.rateLimits.entries()) {
|
|
231
|
-
lines.push(` ${model}:`);
|
|
232
|
-
lines.push(` Count: ${data.count}`);
|
|
233
|
-
lines.push(` First: ${new Date(data.firstOccurrence).toISOString()}`);
|
|
234
|
-
lines.push(` Last: ${new Date(data.lastOccurrence).toISOString()}`);
|
|
235
|
-
if (data.averageInterval) {
|
|
236
|
-
lines.push(` Avg Interval: ${(data.averageInterval / 1000).toFixed(2)}s`);
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
lines.push("");
|
|
241
|
-
// Fallbacks
|
|
242
|
-
lines.push("Fallbacks:");
|
|
243
|
-
lines.push("-".repeat(40));
|
|
244
|
-
lines.push(` Total: ${metrics.fallbacks.total}`);
|
|
245
|
-
lines.push(` Successful: ${metrics.fallbacks.successful}`);
|
|
246
|
-
lines.push(` Failed: ${metrics.fallbacks.failed}`);
|
|
247
|
-
if (metrics.fallbacks.averageDuration > 0) {
|
|
248
|
-
lines.push(` Avg Duration: ${(metrics.fallbacks.averageDuration / 1000).toFixed(2)}s`);
|
|
249
|
-
}
|
|
250
|
-
if (metrics.fallbacks.byTargetModel.size > 0) {
|
|
251
|
-
lines.push("");
|
|
252
|
-
lines.push(" By Target Model:");
|
|
253
|
-
for (const [model, data] of metrics.fallbacks.byTargetModel.entries()) {
|
|
254
|
-
lines.push(` ${model}:`);
|
|
255
|
-
lines.push(` Used: ${data.usedAsFallback}`);
|
|
256
|
-
lines.push(` Success: ${data.successful}`);
|
|
257
|
-
lines.push(` Failed: ${data.failed}`);
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
lines.push("");
|
|
261
|
-
// Model Performance
|
|
262
|
-
lines.push("Model Performance:");
|
|
263
|
-
lines.push("-".repeat(40));
|
|
264
|
-
if (metrics.modelPerformance.size === 0) {
|
|
265
|
-
lines.push(" No performance data recorded");
|
|
266
|
-
}
|
|
267
|
-
else {
|
|
268
|
-
for (const [model, data] of metrics.modelPerformance.entries()) {
|
|
269
|
-
lines.push(` ${model}:`);
|
|
270
|
-
lines.push(` Requests: ${data.requests}`);
|
|
271
|
-
lines.push(` Successes: ${data.successes}`);
|
|
272
|
-
lines.push(` Failures: ${data.failures}`);
|
|
273
|
-
if (data.averageResponseTime) {
|
|
274
|
-
lines.push(` Avg Response: ${(data.averageResponseTime / 1000).toFixed(2)}s`);
|
|
275
|
-
}
|
|
276
|
-
if (data.requests > 0) {
|
|
277
|
-
const successRate = ((data.successes / data.requests) * 100).toFixed(1);
|
|
278
|
-
lines.push(` Success Rate: ${successRate}%`);
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
return lines.join("\n");
|
|
283
|
-
}
|
|
284
|
-
exportCSV(metrics) {
|
|
285
|
-
const lines = [];
|
|
286
|
-
// Rate Limits CSV
|
|
287
|
-
lines.push("=== RATE_LIMITS ===");
|
|
288
|
-
lines.push("model,count,first_occurrence,last_occurrence,avg_interval_ms");
|
|
289
|
-
for (const [model, data] of metrics.rateLimits.entries()) {
|
|
290
|
-
lines.push([
|
|
291
|
-
model,
|
|
292
|
-
data.count,
|
|
293
|
-
data.firstOccurrence,
|
|
294
|
-
data.lastOccurrence,
|
|
295
|
-
data.averageInterval || 0,
|
|
296
|
-
].join(","));
|
|
297
|
-
}
|
|
298
|
-
lines.push("");
|
|
299
|
-
// Fallbacks Summary CSV
|
|
300
|
-
lines.push("=== FALLBACKS_SUMMARY ===");
|
|
301
|
-
lines.push(`total,successful,failed,avg_duration_ms`);
|
|
302
|
-
lines.push([
|
|
303
|
-
metrics.fallbacks.total,
|
|
304
|
-
metrics.fallbacks.successful,
|
|
305
|
-
metrics.fallbacks.failed,
|
|
306
|
-
metrics.fallbacks.averageDuration || 0,
|
|
307
|
-
].join(","));
|
|
308
|
-
lines.push("");
|
|
309
|
-
// Fallbacks by Model CSV
|
|
310
|
-
lines.push("=== FALLBACKS_BY_MODEL ===");
|
|
311
|
-
lines.push("model,used_as_fallback,successful,failed");
|
|
312
|
-
for (const [model, data] of metrics.fallbacks.byTargetModel.entries()) {
|
|
313
|
-
lines.push([
|
|
314
|
-
model,
|
|
315
|
-
data.usedAsFallback,
|
|
316
|
-
data.successful,
|
|
317
|
-
data.failed,
|
|
318
|
-
].join(","));
|
|
319
|
-
}
|
|
320
|
-
lines.push("");
|
|
321
|
-
// Model Performance CSV
|
|
322
|
-
lines.push("=== MODEL_PERFORMANCE ===");
|
|
323
|
-
lines.push("model,requests,successes,failures,avg_response_time_ms,success_rate");
|
|
324
|
-
for (const [model, data] of metrics.modelPerformance.entries()) {
|
|
325
|
-
const successRate = data.requests > 0 ? ((data.successes / data.requests) * 100).toFixed(1) : "0";
|
|
326
|
-
lines.push([
|
|
327
|
-
model,
|
|
328
|
-
data.requests,
|
|
329
|
-
data.successes,
|
|
330
|
-
data.failures,
|
|
331
|
-
data.averageResponseTime || 0,
|
|
332
|
-
successRate,
|
|
333
|
-
].join(","));
|
|
334
|
-
}
|
|
335
|
-
return lines.join("\n");
|
|
336
|
-
}
|
|
337
|
-
async report() {
|
|
338
|
-
if (!this.config.enabled)
|
|
339
|
-
return;
|
|
340
|
-
const output = this.export(this.config.output.format);
|
|
341
|
-
// Console output
|
|
342
|
-
if (this.config.output.console) {
|
|
343
|
-
console.log(output);
|
|
344
|
-
}
|
|
345
|
-
// File output
|
|
346
|
-
if (this.config.output.file) {
|
|
347
|
-
try {
|
|
348
|
-
writeFileSync(this.config.output.file, output, "utf-8");
|
|
349
|
-
this.logger.debug(`Metrics exported to ${this.config.output.file}`);
|
|
350
|
-
}
|
|
351
|
-
catch (error) {
|
|
352
|
-
this.logger.warn(`Failed to write metrics to file: ${this.config.output.file}`, { error });
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
destroy() {
|
|
357
|
-
if (this.resetTimer) {
|
|
358
|
-
clearInterval(this.resetTimer);
|
|
359
|
-
this.resetTimer = null;
|
|
360
|
-
}
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
const DEFAULT_CONFIG = {
|
|
364
|
-
fallbackModels: DEFAULT_FALLBACK_MODELS,
|
|
365
|
-
cooldownMs: 60 * 1000,
|
|
366
|
-
enabled: true,
|
|
367
|
-
fallbackMode: "cycle",
|
|
368
|
-
log: {
|
|
369
|
-
level: "warn",
|
|
370
|
-
format: "simple",
|
|
371
|
-
enableTimestamp: true,
|
|
372
|
-
},
|
|
373
|
-
metrics: {
|
|
374
|
-
enabled: false,
|
|
375
|
-
output: {
|
|
376
|
-
console: true,
|
|
377
|
-
format: "pretty",
|
|
378
|
-
},
|
|
379
|
-
resetInterval: "daily",
|
|
380
|
-
},
|
|
381
|
-
};
|
|
382
|
-
function loadConfig(directory) {
|
|
383
|
-
const homedir = process.env.HOME || "";
|
|
384
|
-
const configPaths = [
|
|
385
|
-
join(directory, ".opencode", "rate-limit-fallback.json"),
|
|
386
|
-
join(directory, "rate-limit-fallback.json"),
|
|
387
|
-
join(homedir, ".opencode", "rate-limit-fallback.json"),
|
|
388
|
-
join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
|
|
389
|
-
];
|
|
390
|
-
for (const configPath of configPaths) {
|
|
391
|
-
if (existsSync(configPath)) {
|
|
392
|
-
try {
|
|
393
|
-
const content = readFileSync(configPath, "utf-8");
|
|
394
|
-
const userConfig = JSON.parse(content);
|
|
395
|
-
const mode = userConfig.fallbackMode;
|
|
396
|
-
const resetInterval = userConfig.metrics?.resetInterval;
|
|
397
|
-
return {
|
|
398
|
-
...DEFAULT_CONFIG,
|
|
399
|
-
...userConfig,
|
|
400
|
-
fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
|
|
401
|
-
fallbackMode: VALID_FALLBACK_MODES.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
|
|
402
|
-
log: userConfig.log ? { ...DEFAULT_CONFIG.log, ...userConfig.log } : DEFAULT_CONFIG.log,
|
|
403
|
-
metrics: userConfig.metrics ? {
|
|
404
|
-
...DEFAULT_CONFIG.metrics,
|
|
405
|
-
...userConfig.metrics,
|
|
406
|
-
output: userConfig.metrics.output ? {
|
|
407
|
-
...DEFAULT_CONFIG.metrics.output,
|
|
408
|
-
...userConfig.metrics.output,
|
|
409
|
-
} : DEFAULT_CONFIG.metrics.output,
|
|
410
|
-
resetInterval: VALID_RESET_INTERVALS.includes(resetInterval) ? resetInterval : DEFAULT_CONFIG.metrics.resetInterval,
|
|
411
|
-
} : DEFAULT_CONFIG.metrics,
|
|
412
|
-
};
|
|
413
|
-
}
|
|
414
|
-
catch (error) {
|
|
415
|
-
// Silently ignore config load errors - will be logged after logger is initialized
|
|
416
|
-
}
|
|
417
|
-
}
|
|
418
|
-
}
|
|
419
|
-
return DEFAULT_CONFIG;
|
|
420
|
-
}
|
|
421
|
-
function getModelKey(providerID, modelID) {
|
|
422
|
-
return `${providerID}/${modelID}`;
|
|
423
|
-
}
|
|
424
|
-
function getStateKey(sessionID, messageID) {
|
|
425
|
-
return `${sessionID}:${messageID}`;
|
|
426
|
-
}
|
|
427
|
-
function isRateLimitError(error) {
|
|
428
|
-
if (!error || typeof error !== "object")
|
|
429
|
-
return false;
|
|
430
|
-
// More type-safe error object structure
|
|
431
|
-
const err = error;
|
|
432
|
-
// Check for 429 status code in APIError (strict check)
|
|
433
|
-
if (err.name === "APIError" && err.data?.statusCode === 429) {
|
|
434
|
-
return true;
|
|
435
|
-
}
|
|
436
|
-
// Type-safe access to error fields
|
|
437
|
-
const responseBody = String(err.data?.responseBody || "").toLowerCase();
|
|
438
|
-
const message = String(err.data?.message || err.message || "").toLowerCase();
|
|
439
|
-
// Strict rate limit indicators only - avoid false positives
|
|
440
|
-
const strictRateLimitIndicators = [
|
|
441
|
-
"rate limit",
|
|
442
|
-
"rate_limit",
|
|
443
|
-
"ratelimit",
|
|
444
|
-
"too many requests",
|
|
445
|
-
"quota exceeded",
|
|
446
|
-
];
|
|
447
|
-
// Check for 429 in text (explicit HTTP status code)
|
|
448
|
-
if (responseBody.includes("429") || message.includes("429")) {
|
|
449
|
-
return true;
|
|
450
|
-
}
|
|
451
|
-
// Check for strict rate limit keywords
|
|
452
|
-
return strictRateLimitIndicators.some((indicator) => responseBody.includes(indicator) ||
|
|
453
|
-
message.includes(indicator));
|
|
454
|
-
}
|
|
455
|
-
// Constants for deduplication and state management
|
|
456
|
-
const DEDUP_WINDOW_MS = 5000;
|
|
457
|
-
const STATE_TIMEOUT_MS = 30000;
|
|
458
|
-
const CLEANUP_INTERVAL_MS = 300000; // 5 minutes
|
|
459
|
-
const SESSION_ENTRY_TTL_MS = 3600000; // 1 hour
|
|
460
|
-
/**
|
|
461
|
-
* Extract toast message properties with fallback values
|
|
462
|
-
*/
|
|
463
|
-
function getToastMessage(toast) {
|
|
464
|
-
const title = toast?.body?.title || toast?.title || "Toast";
|
|
465
|
-
const message = toast?.body?.message || toast?.message || "";
|
|
466
|
-
const variant = toast?.body?.variant || toast?.variant || "info";
|
|
467
|
-
return { title, message, variant };
|
|
468
|
-
}
|
|
469
|
-
/**
|
|
470
|
-
* Safely show toast, falling back to console logging if TUI is missing or fails
|
|
471
|
-
*/
|
|
472
|
-
const safeShowToast = async (client, toast) => {
|
|
473
|
-
const { title, message, variant } = getToastMessage(toast);
|
|
474
|
-
const logToConsole = () => {
|
|
475
|
-
if (variant === "error") {
|
|
476
|
-
console.error(`[RateLimitFallback] ${title}: ${message}`);
|
|
477
|
-
}
|
|
478
|
-
else if (variant === "warning") {
|
|
479
|
-
console.warn(`[RateLimitFallback] ${title}: ${message}`);
|
|
480
|
-
}
|
|
481
|
-
else {
|
|
482
|
-
console.log(`[RateLimitFallback] ${title}: ${message}`);
|
|
483
|
-
}
|
|
484
|
-
};
|
|
485
|
-
try {
|
|
486
|
-
if (client.tui) {
|
|
487
|
-
await client.tui.showToast(toast);
|
|
488
|
-
}
|
|
489
|
-
else {
|
|
490
|
-
// TUI doesn't exist - log to console
|
|
491
|
-
logToConsole();
|
|
492
|
-
}
|
|
493
|
-
}
|
|
494
|
-
catch {
|
|
495
|
-
// TUI exists but failed to show toast - log to console
|
|
496
|
-
logToConsole();
|
|
497
|
-
}
|
|
498
|
-
};
|
|
55
|
+
// ============================================================================
|
|
56
|
+
// Main Plugin Export
|
|
57
|
+
// ============================================================================
|
|
499
58
|
export const RateLimitFallback = async ({ client, directory }) => {
|
|
500
59
|
const config = loadConfig(directory);
|
|
501
60
|
// Detect headless mode (no TUI)
|
|
@@ -528,507 +87,45 @@ export const RateLimitFallback = async ({ client, directory }) => {
|
|
|
528
87
|
if (!config.enabled) {
|
|
529
88
|
return {};
|
|
530
89
|
}
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
const
|
|
534
|
-
|
|
535
|
-
const
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
const
|
|
540
|
-
//
|
|
541
|
-
const metricsManager = new MetricsManager(config.metrics ?? { ...DEFAULT_CONFIG.metrics }, logger);
|
|
542
|
-
// Track model requests for performance metrics
|
|
543
|
-
const modelRequestStartTimes = new Map(); // modelKey -> startTime
|
|
544
|
-
// Helper functions for session hierarchy management
|
|
545
|
-
function getOrCreateHierarchy(rootSessionID) {
|
|
546
|
-
let hierarchy = sessionHierarchies.get(rootSessionID);
|
|
547
|
-
if (!hierarchy) {
|
|
548
|
-
hierarchy = {
|
|
549
|
-
rootSessionID,
|
|
550
|
-
subagents: new Map(),
|
|
551
|
-
sharedFallbackState: "none",
|
|
552
|
-
sharedConfig: config,
|
|
553
|
-
createdAt: Date.now(),
|
|
554
|
-
lastActivity: Date.now(),
|
|
555
|
-
};
|
|
556
|
-
sessionHierarchies.set(rootSessionID, hierarchy);
|
|
557
|
-
sessionToRootMap.set(rootSessionID, rootSessionID);
|
|
558
|
-
}
|
|
559
|
-
return hierarchy;
|
|
560
|
-
}
|
|
561
|
-
function registerSubagent(sessionID, parentSessionID) {
|
|
562
|
-
// Validate parent session exists
|
|
563
|
-
// Parent session must either be registered in sessionToRootMap or be a new root session
|
|
564
|
-
const parentRootSessionID = sessionToRootMap.get(parentSessionID);
|
|
565
|
-
// Determine root session - if parent doesn't exist, treat it as a new root
|
|
566
|
-
const rootSessionID = parentRootSessionID || parentSessionID;
|
|
567
|
-
// If parent is not a subagent but we're treating it as a root, create a hierarchy for it
|
|
568
|
-
// This allows sessions to become roots when their first subagent is registered
|
|
569
|
-
const hierarchy = getOrCreateHierarchy(rootSessionID);
|
|
570
|
-
const parentSubagent = hierarchy.subagents.get(parentSessionID);
|
|
571
|
-
const depth = parentSubagent ? parentSubagent.depth + 1 : 1;
|
|
572
|
-
// Enforce max depth
|
|
573
|
-
if (depth > maxSubagentDepth) {
|
|
574
|
-
return false;
|
|
575
|
-
}
|
|
576
|
-
const subagent = {
|
|
577
|
-
sessionID,
|
|
578
|
-
parentSessionID,
|
|
579
|
-
depth,
|
|
580
|
-
fallbackState: "none",
|
|
581
|
-
createdAt: Date.now(),
|
|
582
|
-
lastActivity: Date.now(),
|
|
583
|
-
};
|
|
584
|
-
hierarchy.subagents.set(sessionID, subagent);
|
|
585
|
-
sessionToRootMap.set(sessionID, rootSessionID);
|
|
586
|
-
hierarchy.lastActivity = Date.now();
|
|
587
|
-
return true;
|
|
588
|
-
}
|
|
589
|
-
function getRootSession(sessionID) {
|
|
590
|
-
return sessionToRootMap.get(sessionID) || null;
|
|
591
|
-
}
|
|
592
|
-
function getHierarchy(sessionID) {
|
|
593
|
-
const rootSessionID = getRootSession(sessionID);
|
|
594
|
-
return rootSessionID ? sessionHierarchies.get(rootSessionID) || null : null;
|
|
595
|
-
}
|
|
596
|
-
// Cleanup stale session model entries (every 5 minutes)
|
|
90
|
+
// Initialize components
|
|
91
|
+
initSubagentTracker(config);
|
|
92
|
+
const metricsManager = new MetricsManager(config.metrics ?? { enabled: false, output: { console: true, format: "pretty" }, resetInterval: "daily" }, logger);
|
|
93
|
+
// Create hierarchy resolver to avoid circular dependency
|
|
94
|
+
const hierarchyResolver = {
|
|
95
|
+
getRootSession: getRootSession,
|
|
96
|
+
getHierarchy: getHierarchy,
|
|
97
|
+
};
|
|
98
|
+
const fallbackHandler = new FallbackHandler(config, client, logger, metricsManager, hierarchyResolver);
|
|
99
|
+
// Cleanup stale entries periodically
|
|
597
100
|
const cleanupInterval = setInterval(() => {
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
// Remove entries older than 1 hour
|
|
601
|
-
if (now - entry.lastUpdated > SESSION_ENTRY_TTL_MS) {
|
|
602
|
-
currentSessionModel.delete(sessionID);
|
|
603
|
-
}
|
|
604
|
-
}
|
|
605
|
-
// Clean up stale session hierarchies
|
|
606
|
-
for (const [rootSessionID, hierarchy] of sessionHierarchies.entries()) {
|
|
607
|
-
if (now - hierarchy.lastActivity > SESSION_ENTRY_TTL_MS) {
|
|
608
|
-
// Clean up all subagents in this hierarchy
|
|
609
|
-
for (const subagentID of hierarchy.subagents.keys()) {
|
|
610
|
-
sessionToRootMap.delete(subagentID);
|
|
611
|
-
}
|
|
612
|
-
sessionHierarchies.delete(rootSessionID);
|
|
613
|
-
sessionToRootMap.delete(rootSessionID);
|
|
614
|
-
}
|
|
615
|
-
}
|
|
616
|
-
// Clean up stale retry state entries to prevent memory leaks
|
|
617
|
-
for (const [stateKey, state] of retryState.entries()) {
|
|
618
|
-
if (now - state.lastAttemptTime > STATE_TIMEOUT_MS) {
|
|
619
|
-
retryState.delete(stateKey);
|
|
620
|
-
}
|
|
621
|
-
}
|
|
622
|
-
// Clean up stale fallback messages
|
|
623
|
-
for (const [fallbackKey, fallbackInfo] of fallbackMessages.entries()) {
|
|
624
|
-
if (now - fallbackInfo.timestamp > SESSION_ENTRY_TTL_MS) {
|
|
625
|
-
fallbackInProgress.delete(fallbackKey);
|
|
626
|
-
fallbackMessages.delete(fallbackKey);
|
|
627
|
-
}
|
|
628
|
-
}
|
|
101
|
+
clearHierarchyEntries();
|
|
102
|
+
fallbackHandler.cleanupStaleEntries();
|
|
629
103
|
}, CLEANUP_INTERVAL_MS);
|
|
630
|
-
function isModelRateLimited(providerID, modelID) {
|
|
631
|
-
const key = getModelKey(providerID, modelID);
|
|
632
|
-
const limitedAt = rateLimitedModels.get(key);
|
|
633
|
-
if (!limitedAt)
|
|
634
|
-
return false;
|
|
635
|
-
if (Date.now() - limitedAt > config.cooldownMs) {
|
|
636
|
-
rateLimitedModels.delete(key);
|
|
637
|
-
return false;
|
|
638
|
-
}
|
|
639
|
-
return true;
|
|
640
|
-
}
|
|
641
|
-
function markModelRateLimited(providerID, modelID) {
|
|
642
|
-
const key = getModelKey(providerID, modelID);
|
|
643
|
-
rateLimitedModels.set(key, Date.now());
|
|
644
|
-
}
|
|
645
|
-
function findNextAvailableModel(currentProviderID, currentModelID, attemptedModels) {
|
|
646
|
-
const currentKey = getModelKey(currentProviderID, currentModelID);
|
|
647
|
-
const startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
|
|
648
|
-
// If current model is not in the fallback list (startIndex is -1), start from 0
|
|
649
|
-
const searchStartIndex = Math.max(0, startIndex);
|
|
650
|
-
for (let i = searchStartIndex + 1; i < config.fallbackModels.length; i++) {
|
|
651
|
-
const model = config.fallbackModels[i];
|
|
652
|
-
const key = getModelKey(model.providerID, model.modelID);
|
|
653
|
-
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
654
|
-
return model;
|
|
655
|
-
}
|
|
656
|
-
}
|
|
657
|
-
for (let i = 0; i <= searchStartIndex && i < config.fallbackModels.length; i++) {
|
|
658
|
-
const model = config.fallbackModels[i];
|
|
659
|
-
const key = getModelKey(model.providerID, model.modelID);
|
|
660
|
-
if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
|
|
661
|
-
return model;
|
|
662
|
-
}
|
|
663
|
-
}
|
|
664
|
-
return null;
|
|
665
|
-
}
|
|
666
|
-
/**
|
|
667
|
-
* Check and mark fallback in progress for deduplication.
|
|
668
|
-
* Uses message scope (sessionID:messageID) for better tracking.
|
|
669
|
-
* Returns true if processing should continue, false if it should be skipped.
|
|
670
|
-
*/
|
|
671
|
-
function checkAndMarkFallbackInProgress(sessionID, messageID) {
|
|
672
|
-
const key = getStateKey(sessionID, messageID);
|
|
673
|
-
const lastFallback = fallbackInProgress.get(key);
|
|
674
|
-
if (lastFallback && Date.now() - lastFallback < DEDUP_WINDOW_MS) {
|
|
675
|
-
return false; // Skip - already processing
|
|
676
|
-
}
|
|
677
|
-
fallbackInProgress.set(key, Date.now());
|
|
678
|
-
return true; // Continue processing
|
|
679
|
-
}
|
|
680
|
-
/**
|
|
681
|
-
* Resolve the target session for fallback processing.
|
|
682
|
-
* For subagent sessions, the target is the root session (parent-centered approach).
|
|
683
|
-
* Uses message scope (sessionID:messageID) for deduplication.
|
|
684
|
-
* Updates hierarchy state and returns { targetSessionID, hierarchy }.
|
|
685
|
-
*/
|
|
686
|
-
function resolveTargetSessionWithDedup(sessionID, messageID) {
|
|
687
|
-
const hierarchy = getHierarchy(sessionID);
|
|
688
|
-
const rootSessionID = getRootSession(sessionID);
|
|
689
|
-
if (rootSessionID && hierarchy) {
|
|
690
|
-
// Check deduplication with message scope
|
|
691
|
-
if (!checkAndMarkFallbackInProgress(rootSessionID, messageID)) {
|
|
692
|
-
return null; // Skip - already processing
|
|
693
|
-
}
|
|
694
|
-
// Update the shared fallback state
|
|
695
|
-
hierarchy.sharedFallbackState = "in_progress";
|
|
696
|
-
hierarchy.lastActivity = Date.now();
|
|
697
|
-
// Update the subagent's state
|
|
698
|
-
const subagent = hierarchy.subagents.get(sessionID);
|
|
699
|
-
if (subagent) {
|
|
700
|
-
subagent.fallbackState = "in_progress";
|
|
701
|
-
subagent.lastActivity = Date.now();
|
|
702
|
-
}
|
|
703
|
-
return { targetSessionID: rootSessionID, hierarchy };
|
|
704
|
-
}
|
|
705
|
-
else {
|
|
706
|
-
// Prevent duplicate fallback processing for non-subagent sessions with message scope
|
|
707
|
-
if (!checkAndMarkFallbackInProgress(sessionID, messageID)) {
|
|
708
|
-
return null; // Skip - already processing
|
|
709
|
-
}
|
|
710
|
-
return { targetSessionID: sessionID, hierarchy: null };
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
/**
|
|
714
|
-
* Get or create retry state for a specific message.
|
|
715
|
-
*/
|
|
716
|
-
function getOrCreateRetryState(sessionID, messageID) {
|
|
717
|
-
const stateKey = getStateKey(sessionID, messageID);
|
|
718
|
-
let state = retryState.get(stateKey);
|
|
719
|
-
if (!state || Date.now() - state.lastAttemptTime > STATE_TIMEOUT_MS) {
|
|
720
|
-
state = { attemptedModels: new Set(), lastAttemptTime: Date.now() };
|
|
721
|
-
retryState.set(stateKey, state);
|
|
722
|
-
}
|
|
723
|
-
return state;
|
|
724
|
-
}
|
|
725
|
-
/**
|
|
726
|
-
* Select the next fallback model based on current state and fallback mode.
|
|
727
|
-
* Returns the selected model or null if no model is available.
|
|
728
|
-
*/
|
|
729
|
-
async function selectFallbackModel(currentProviderID, currentModelID, state) {
|
|
730
|
-
// Mark current model as rate limited and add to attempted
|
|
731
|
-
if (currentProviderID && currentModelID) {
|
|
732
|
-
markModelRateLimited(currentProviderID, currentModelID);
|
|
733
|
-
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
734
|
-
}
|
|
735
|
-
let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
|
|
736
|
-
// Handle when no model is found based on fallbackMode
|
|
737
|
-
if (!nextModel && state.attemptedModels.size > 0) {
|
|
738
|
-
if (config.fallbackMode === "cycle") {
|
|
739
|
-
// Reset and retry from the first model
|
|
740
|
-
state.attemptedModels.clear();
|
|
741
|
-
if (currentProviderID && currentModelID) {
|
|
742
|
-
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
743
|
-
}
|
|
744
|
-
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
745
|
-
}
|
|
746
|
-
else if (config.fallbackMode === "retry-last") {
|
|
747
|
-
// Try the last model in the list once, then reset on next prompt
|
|
748
|
-
const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
|
|
749
|
-
if (lastModel) {
|
|
750
|
-
const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
|
|
751
|
-
if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
|
|
752
|
-
// Use the last model for one more try
|
|
753
|
-
nextModel = lastModel;
|
|
754
|
-
await safeShowToast(client, {
|
|
755
|
-
body: {
|
|
756
|
-
title: "Last Resort",
|
|
757
|
-
message: `Trying ${lastModel.modelID} one more time...`,
|
|
758
|
-
variant: "warning",
|
|
759
|
-
duration: 3000,
|
|
760
|
-
},
|
|
761
|
-
});
|
|
762
|
-
}
|
|
763
|
-
else {
|
|
764
|
-
// Last model also failed, reset for next prompt
|
|
765
|
-
state.attemptedModels.clear();
|
|
766
|
-
if (currentProviderID && currentModelID) {
|
|
767
|
-
state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
|
|
768
|
-
}
|
|
769
|
-
nextModel = findNextAvailableModel("", "", state.attemptedModels);
|
|
770
|
-
}
|
|
771
|
-
}
|
|
772
|
-
}
|
|
773
|
-
// "stop" mode: nextModel remains null, will show error below
|
|
774
|
-
}
|
|
775
|
-
return nextModel;
|
|
776
|
-
}
|
|
777
|
-
/**
|
|
778
|
-
* Extract and validate message parts from a user message.
|
|
779
|
-
*/
|
|
780
|
-
function extractMessageParts(message) {
|
|
781
|
-
const msg = message;
|
|
782
|
-
return msg.parts
|
|
783
|
-
.filter((p) => {
|
|
784
|
-
const part = p;
|
|
785
|
-
return part.type === "text" || part.type === "file";
|
|
786
|
-
})
|
|
787
|
-
.map((p) => {
|
|
788
|
-
const part = p;
|
|
789
|
-
if (part.type === "text")
|
|
790
|
-
return { type: "text", text: String(part.text) };
|
|
791
|
-
if (part.type === "file")
|
|
792
|
-
return { type: "file", path: String(part.path), mediaType: String(part.mediaType) };
|
|
793
|
-
return null;
|
|
794
|
-
})
|
|
795
|
-
.filter((p) => p !== null);
|
|
796
|
-
}
|
|
797
|
-
/**
|
|
798
|
-
* Convert internal MessagePart to SDK-compatible format.
|
|
799
|
-
*/
|
|
800
|
-
function convertPartsToSDKFormat(parts) {
|
|
801
|
-
return parts.map((part) => {
|
|
802
|
-
if (part.type === "text") {
|
|
803
|
-
return { type: "text", text: part.text };
|
|
804
|
-
}
|
|
805
|
-
// For file parts, we need to match the FilePartInput format
|
|
806
|
-
// Using path as url since we're dealing with local files
|
|
807
|
-
return {
|
|
808
|
-
type: "file",
|
|
809
|
-
url: part.path,
|
|
810
|
-
mime: part.mediaType || "application/octet-stream",
|
|
811
|
-
};
|
|
812
|
-
});
|
|
813
|
-
}
|
|
814
|
-
/**
|
|
815
|
-
* Propagate model changes to all subagents in the hierarchy.
|
|
816
|
-
*/
|
|
817
|
-
function propagateModelToSubagents(hierarchy, targetSessionID, providerID, modelID) {
|
|
818
|
-
if (hierarchy.rootSessionID === targetSessionID) {
|
|
819
|
-
hierarchy.sharedFallbackState = "completed";
|
|
820
|
-
hierarchy.lastActivity = Date.now();
|
|
821
|
-
// Update model tracking for all subagents
|
|
822
|
-
for (const [subagentID, subagent] of hierarchy.subagents.entries()) {
|
|
823
|
-
currentSessionModel.set(subagentID, {
|
|
824
|
-
providerID,
|
|
825
|
-
modelID,
|
|
826
|
-
lastUpdated: Date.now(),
|
|
827
|
-
});
|
|
828
|
-
subagent.fallbackState = "completed";
|
|
829
|
-
subagent.lastActivity = Date.now();
|
|
830
|
-
}
|
|
831
|
-
}
|
|
832
|
-
}
|
|
833
|
-
/**
|
|
834
|
-
* Retry the prompt with a different model.
|
|
835
|
-
*/
|
|
836
|
-
async function retryWithModel(targetSessionID, model, parts, hierarchy) {
|
|
837
|
-
// Track the new model for this session
|
|
838
|
-
currentSessionModel.set(targetSessionID, {
|
|
839
|
-
providerID: model.providerID,
|
|
840
|
-
modelID: model.modelID,
|
|
841
|
-
lastUpdated: Date.now(),
|
|
842
|
-
});
|
|
843
|
-
// If this is a root session with subagents, propagate the model to all subagents
|
|
844
|
-
if (hierarchy) {
|
|
845
|
-
propagateModelToSubagents(hierarchy, targetSessionID, model.providerID, model.modelID);
|
|
846
|
-
}
|
|
847
|
-
// Record model request for metrics
|
|
848
|
-
metricsManager.recordModelRequest(model.providerID, model.modelID);
|
|
849
|
-
const modelKey = getModelKey(model.providerID, model.modelID);
|
|
850
|
-
modelRequestStartTimes.set(modelKey, Date.now());
|
|
851
|
-
// Convert internal MessagePart to SDK-compatible format
|
|
852
|
-
const sdkParts = convertPartsToSDKFormat(parts);
|
|
853
|
-
await client.session.prompt({
|
|
854
|
-
path: { id: targetSessionID },
|
|
855
|
-
body: {
|
|
856
|
-
parts: sdkParts,
|
|
857
|
-
model: { providerID: model.providerID, modelID: model.modelID },
|
|
858
|
-
},
|
|
859
|
-
});
|
|
860
|
-
await safeShowToast(client, {
|
|
861
|
-
body: {
|
|
862
|
-
title: "Fallback Successful",
|
|
863
|
-
message: `Now using ${model.modelID}`,
|
|
864
|
-
variant: "success",
|
|
865
|
-
duration: 3000,
|
|
866
|
-
},
|
|
867
|
-
});
|
|
868
|
-
}
|
|
869
|
-
async function handleRateLimitFallback(sessionID, currentProviderID, currentModelID) {
|
|
870
|
-
try {
|
|
871
|
-
// If no model info provided, try to get from tracked session model
|
|
872
|
-
const rootSessionID = getRootSession(sessionID);
|
|
873
|
-
const targetSessionID = rootSessionID || sessionID;
|
|
874
|
-
if (!currentProviderID || !currentModelID) {
|
|
875
|
-
const tracked = currentSessionModel.get(targetSessionID);
|
|
876
|
-
if (tracked) {
|
|
877
|
-
currentProviderID = tracked.providerID;
|
|
878
|
-
currentModelID = tracked.modelID;
|
|
879
|
-
}
|
|
880
|
-
}
|
|
881
|
-
// Record rate limit metric
|
|
882
|
-
if (currentProviderID && currentModelID) {
|
|
883
|
-
metricsManager.recordRateLimit(currentProviderID, currentModelID);
|
|
884
|
-
}
|
|
885
|
-
// Abort current session with error handling
|
|
886
|
-
try {
|
|
887
|
-
await client.session.abort({ path: { id: targetSessionID } });
|
|
888
|
-
}
|
|
889
|
-
catch (abortError) {
|
|
890
|
-
// Silently ignore abort errors and continue with fallback
|
|
891
|
-
logger.debug(`Failed to abort session ${targetSessionID}`, { error: abortError });
|
|
892
|
-
}
|
|
893
|
-
await safeShowToast(client, {
|
|
894
|
-
body: {
|
|
895
|
-
title: "Rate Limit Detected",
|
|
896
|
-
message: `Switching from ${currentModelID || 'current model'}...`,
|
|
897
|
-
variant: "warning",
|
|
898
|
-
duration: 3000,
|
|
899
|
-
},
|
|
900
|
-
});
|
|
901
|
-
// Get messages from the session
|
|
902
|
-
const messagesResult = await client.session.messages({ path: { id: targetSessionID } });
|
|
903
|
-
if (!messagesResult.data) {
|
|
904
|
-
return;
|
|
905
|
-
}
|
|
906
|
-
const messages = messagesResult.data;
|
|
907
|
-
const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
|
|
908
|
-
if (!lastUserMessage) {
|
|
909
|
-
return;
|
|
910
|
-
}
|
|
911
|
-
// Resolve the target session for fallback processing with message scope
|
|
912
|
-
const resolution = resolveTargetSessionWithDedup(sessionID, lastUserMessage.info.id);
|
|
913
|
-
if (!resolution) {
|
|
914
|
-
return; // Skipped due to deduplication
|
|
915
|
-
}
|
|
916
|
-
// Get or create retry state for this message
|
|
917
|
-
const state = getOrCreateRetryState(sessionID, lastUserMessage.info.id);
|
|
918
|
-
const stateKey = getStateKey(sessionID, lastUserMessage.info.id);
|
|
919
|
-
const fallbackKey = getStateKey(resolution.targetSessionID, lastUserMessage.info.id);
|
|
920
|
-
// Select the next fallback model
|
|
921
|
-
const nextModel = await selectFallbackModel(currentProviderID, currentModelID, state);
|
|
922
|
-
// Show error if no model is available
|
|
923
|
-
if (!nextModel) {
|
|
924
|
-
await safeShowToast(client, {
|
|
925
|
-
body: {
|
|
926
|
-
title: "No Fallback Available",
|
|
927
|
-
message: config.fallbackMode === "stop"
|
|
928
|
-
? "All fallback models exhausted"
|
|
929
|
-
: "All models are rate limited",
|
|
930
|
-
variant: "error",
|
|
931
|
-
duration: 5000,
|
|
932
|
-
},
|
|
933
|
-
});
|
|
934
|
-
retryState.delete(stateKey);
|
|
935
|
-
fallbackInProgress.delete(fallbackKey);
|
|
936
|
-
return;
|
|
937
|
-
}
|
|
938
|
-
state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
|
|
939
|
-
state.lastAttemptTime = Date.now();
|
|
940
|
-
// Extract message parts
|
|
941
|
-
const parts = extractMessageParts(lastUserMessage);
|
|
942
|
-
if (parts.length === 0) {
|
|
943
|
-
fallbackInProgress.delete(fallbackKey);
|
|
944
|
-
return;
|
|
945
|
-
}
|
|
946
|
-
await safeShowToast(client, {
|
|
947
|
-
body: {
|
|
948
|
-
title: "Retrying",
|
|
949
|
-
message: `Using ${nextModel.providerID}/${nextModel.modelID}`,
|
|
950
|
-
variant: "info",
|
|
951
|
-
duration: 3000,
|
|
952
|
-
},
|
|
953
|
-
});
|
|
954
|
-
// Record fallback start time
|
|
955
|
-
metricsManager.recordFallbackStart();
|
|
956
|
-
// Track this message as a fallback message for completion detection
|
|
957
|
-
// Note: The new message will have a new ID after prompting, but we use the original message ID
|
|
958
|
-
// to correlate with the fallback in progress state
|
|
959
|
-
fallbackMessages.set(fallbackKey, {
|
|
960
|
-
sessionID: resolution.targetSessionID,
|
|
961
|
-
messageID: lastUserMessage.info.id,
|
|
962
|
-
timestamp: Date.now(),
|
|
963
|
-
});
|
|
964
|
-
// Retry with the selected model
|
|
965
|
-
await retryWithModel(resolution.targetSessionID, nextModel, parts, resolution.hierarchy);
|
|
966
|
-
// Clean up state
|
|
967
|
-
retryState.delete(stateKey);
|
|
968
|
-
}
|
|
969
|
-
catch (err) {
|
|
970
|
-
// Silently ignore fallback errors - log only limited error info
|
|
971
|
-
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
972
|
-
const errorName = err instanceof Error ? err.name : undefined;
|
|
973
|
-
logger.debug(`Fallback error for session ${sessionID}`, {
|
|
974
|
-
error: errorMessage,
|
|
975
|
-
name: errorName,
|
|
976
|
-
});
|
|
977
|
-
}
|
|
978
|
-
}
|
|
979
104
|
return {
|
|
980
105
|
event: async ({ event }) => {
|
|
106
|
+
// Handle session.error events
|
|
981
107
|
if (isSessionErrorEvent(event)) {
|
|
982
108
|
const { sessionID, error } = event.properties;
|
|
983
109
|
if (sessionID && error && isRateLimitError(error)) {
|
|
984
|
-
await handleRateLimitFallback(sessionID, "", "");
|
|
110
|
+
await fallbackHandler.handleRateLimitFallback(sessionID, "", "");
|
|
985
111
|
}
|
|
986
112
|
}
|
|
113
|
+
// Handle message.updated events
|
|
987
114
|
if (isMessageUpdatedEvent(event)) {
|
|
988
115
|
const info = event.properties.info;
|
|
989
116
|
if (info?.error && isRateLimitError(info.error)) {
|
|
990
|
-
await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
|
|
117
|
+
await fallbackHandler.handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
|
|
991
118
|
}
|
|
992
|
-
else if (info?.status === "completed" && !info?.error) {
|
|
993
|
-
//
|
|
994
|
-
|
|
995
|
-
const fallbackInfo = fallbackMessages.get(fallbackKey);
|
|
996
|
-
if (fallbackInfo) {
|
|
997
|
-
// Clear fallback in progress for this message
|
|
998
|
-
fallbackInProgress.delete(fallbackKey);
|
|
999
|
-
fallbackMessages.delete(fallbackKey);
|
|
1000
|
-
logger.debug(`Fallback completed for message ${info.id}`, { sessionID: info.sessionID });
|
|
1001
|
-
// Record fallback success metric
|
|
1002
|
-
const tracked = currentSessionModel.get(info.sessionID);
|
|
1003
|
-
if (tracked) {
|
|
1004
|
-
metricsManager.recordFallbackSuccess(tracked.providerID, tracked.modelID, fallbackInfo.timestamp);
|
|
1005
|
-
// Record model performance metric
|
|
1006
|
-
const modelKey = getModelKey(tracked.providerID, tracked.modelID);
|
|
1007
|
-
const startTime = modelRequestStartTimes.get(modelKey);
|
|
1008
|
-
if (startTime) {
|
|
1009
|
-
const responseTime = Date.now() - startTime;
|
|
1010
|
-
metricsManager.recordModelSuccess(tracked.providerID, tracked.modelID, responseTime);
|
|
1011
|
-
modelRequestStartTimes.delete(modelKey);
|
|
1012
|
-
}
|
|
1013
|
-
}
|
|
1014
|
-
}
|
|
119
|
+
else if (info?.status === "completed" && !info?.error && info?.id) {
|
|
120
|
+
// Record fallback success
|
|
121
|
+
fallbackHandler.handleMessageUpdated(info.sessionID, info.id, false, false);
|
|
1015
122
|
}
|
|
1016
|
-
else if (info?.error && !isRateLimitError(info.error)) {
|
|
1017
|
-
//
|
|
1018
|
-
|
|
1019
|
-
if (tracked) {
|
|
1020
|
-
metricsManager.recordModelFailure(tracked.providerID, tracked.modelID);
|
|
1021
|
-
// Check if this was a fallback attempt and record failure
|
|
1022
|
-
const fallbackKey = getStateKey(info.sessionID, info.id);
|
|
1023
|
-
const fallbackInfo = fallbackMessages.get(fallbackKey);
|
|
1024
|
-
if (fallbackInfo) {
|
|
1025
|
-
metricsManager.recordFallbackFailure();
|
|
1026
|
-
fallbackInProgress.delete(fallbackKey);
|
|
1027
|
-
fallbackMessages.delete(fallbackKey);
|
|
1028
|
-
}
|
|
1029
|
-
}
|
|
123
|
+
else if (info?.error && !isRateLimitError(info.error) && info?.id) {
|
|
124
|
+
// Record non-rate-limit error
|
|
125
|
+
fallbackHandler.handleMessageUpdated(info.sessionID, info.id, true, false);
|
|
1030
126
|
}
|
|
1031
127
|
}
|
|
128
|
+
// Handle session.status events
|
|
1032
129
|
if (isSessionStatusEvent(event)) {
|
|
1033
130
|
const props = event.properties;
|
|
1034
131
|
const status = props?.status;
|
|
@@ -1040,34 +137,28 @@ export const RateLimitFallback = async ({ client, directory }) => {
|
|
|
1040
137
|
message.includes("reduce concurrency");
|
|
1041
138
|
if (isRateLimitRetry) {
|
|
1042
139
|
// Try fallback on any attempt, handleRateLimitFallback will manage state
|
|
1043
|
-
await handleRateLimitFallback(props.sessionID, "", "");
|
|
140
|
+
await fallbackHandler.handleRateLimitFallback(props.sessionID, "", "");
|
|
1044
141
|
}
|
|
1045
142
|
}
|
|
1046
143
|
}
|
|
1047
144
|
// Handle subagent session creation events
|
|
1048
|
-
// Note: Using type assertion for subagent events since they may not be in the official Event union yet
|
|
1049
145
|
const rawEvent = event;
|
|
1050
146
|
if (isSubagentSessionCreatedEvent(rawEvent)) {
|
|
1051
147
|
const { sessionID, parentSessionID } = rawEvent.properties;
|
|
1052
148
|
if (config.enableSubagentFallback !== false) {
|
|
1053
|
-
registerSubagent(sessionID, parentSessionID);
|
|
149
|
+
registerSubagent(sessionID, parentSessionID, config);
|
|
1054
150
|
}
|
|
1055
151
|
}
|
|
1056
152
|
},
|
|
1057
153
|
// Cleanup function to prevent memory leaks
|
|
1058
154
|
cleanup: () => {
|
|
1059
155
|
clearInterval(cleanupInterval);
|
|
1060
|
-
|
|
1061
|
-
sessionHierarchies.clear();
|
|
1062
|
-
sessionToRootMap.clear();
|
|
1063
|
-
// Clean up fallback messages
|
|
1064
|
-
fallbackMessages.clear();
|
|
1065
|
-
// Clean up metrics manager
|
|
156
|
+
clearAllHierarchies();
|
|
1066
157
|
metricsManager.destroy();
|
|
1067
|
-
|
|
1068
|
-
modelRequestStartTimes.clear();
|
|
158
|
+
fallbackHandler.destroy();
|
|
1069
159
|
},
|
|
1070
160
|
};
|
|
1071
161
|
};
|
|
1072
162
|
export default RateLimitFallback;
|
|
1073
|
-
|
|
163
|
+
export { MetricsManager } from "./src/metrics/MetricsManager.js";
|
|
164
|
+
export { createLogger } from "./logger.js";
|