@azumag/opencode-rate-limit-fallback 1.19.2 → 1.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,7 +1,23 @@
1
- import { existsSync, readFileSync, writeFileSync } from "fs";
1
+ /**
2
+ * Rate Limit Fallback Plugin - Main entry point
3
+ *
4
+ * This plugin automatically switches to fallback models when rate limited
5
+ */
6
+ import { existsSync, readFileSync } from "fs";
2
7
  import { join } from "path";
3
8
  import { createLogger } from "./logger.js";
4
- // Event type guards
9
+ import { MetricsManager } from "./src/metrics/MetricsManager.js";
10
+ import { FallbackHandler } from "./src/fallback/FallbackHandler.js";
11
+ import { loadConfig } from "./src/utils/config.js";
12
+ import { isRateLimitError } from "./src/utils/errorDetection.js";
13
+ import { initSubagentTracker, registerSubagent, getRootSession, getHierarchy, cleanupStaleEntries as clearHierarchyEntries, clearAll as clearAllHierarchies, } from "./src/session/SubagentTracker.js";
14
+ import { CLEANUP_INTERVAL_MS } from "./src/types/index.js";
15
+ // ============================================================================
16
+ // Event Type Guards
17
+ // ============================================================================
18
+ /**
19
+ * Check if event is a session error event
20
+ */
5
21
  function isSessionErrorEvent(event) {
6
22
  return event.type === "session.error" &&
7
23
  typeof event.properties === "object" &&
@@ -9,18 +25,26 @@ function isSessionErrorEvent(event) {
9
25
  "sessionID" in event.properties &&
10
26
  "error" in event.properties;
11
27
  }
28
+ /**
29
+ * Check if event is a message updated event
30
+ */
12
31
  function isMessageUpdatedEvent(event) {
13
32
  return event.type === "message.updated" &&
14
33
  typeof event.properties === "object" &&
15
34
  event.properties !== null &&
16
35
  "info" in event.properties;
17
36
  }
37
+ /**
38
+ * Check if event is a session status event
39
+ */
18
40
  function isSessionStatusEvent(event) {
19
41
  return event.type === "session.status" &&
20
42
  typeof event.properties === "object" &&
21
43
  event.properties !== null;
22
44
  }
23
- // Subagent event type guards
45
+ /**
46
+ * Check if event is a subagent session created event
47
+ */
24
48
  function isSubagentSessionCreatedEvent(event) {
25
49
  return event.type === "subagent.session.created" &&
26
50
  typeof event.properties === "object" &&
@@ -28,474 +52,9 @@ function isSubagentSessionCreatedEvent(event) {
28
52
  "sessionID" in event.properties &&
29
53
  "parentSessionID" in event.properties;
30
54
  }
31
- const DEFAULT_FALLBACK_MODELS = [
32
- { providerID: "anthropic", modelID: "claude-3-5-sonnet-20250514" },
33
- { providerID: "google", modelID: "gemini-2.5-pro" },
34
- { providerID: "google", modelID: "gemini-2.5-flash" },
35
- ];
36
- const VALID_FALLBACK_MODES = ["cycle", "stop", "retry-last"];
37
- const VALID_RESET_INTERVALS = ["hourly", "daily", "weekly"];
38
- const RESET_INTERVAL_MS = {
39
- hourly: 60 * 60 * 1000,
40
- daily: 24 * 60 * 60 * 1000,
41
- weekly: 7 * 24 * 60 * 60 * 1000,
42
- };
43
- // Metrics management
44
- class MetricsManager {
45
- metrics;
46
- config;
47
- logger;
48
- resetTimer = null;
49
- constructor(config, logger) {
50
- this.config = config;
51
- this.logger = logger;
52
- this.metrics = {
53
- rateLimits: new Map(),
54
- fallbacks: {
55
- total: 0,
56
- successful: 0,
57
- failed: 0,
58
- averageDuration: 0,
59
- byTargetModel: new Map(),
60
- },
61
- modelPerformance: new Map(),
62
- startedAt: Date.now(),
63
- generatedAt: Date.now(),
64
- };
65
- if (this.config.enabled) {
66
- this.startResetTimer();
67
- }
68
- }
69
- startResetTimer() {
70
- if (this.resetTimer) {
71
- clearInterval(this.resetTimer);
72
- }
73
- const intervalMs = RESET_INTERVAL_MS[this.config.resetInterval];
74
- this.resetTimer = setInterval(() => {
75
- this.reset();
76
- }, intervalMs);
77
- }
78
- reset() {
79
- this.metrics = {
80
- rateLimits: new Map(),
81
- fallbacks: {
82
- total: 0,
83
- successful: 0,
84
- failed: 0,
85
- averageDuration: 0,
86
- byTargetModel: new Map(),
87
- },
88
- modelPerformance: new Map(),
89
- startedAt: Date.now(),
90
- generatedAt: Date.now(),
91
- };
92
- this.logger.debug("Metrics reset");
93
- }
94
- recordRateLimit(providerID, modelID) {
95
- if (!this.config.enabled)
96
- return;
97
- const key = getModelKey(providerID, modelID);
98
- const now = Date.now();
99
- const existing = this.metrics.rateLimits.get(key);
100
- if (existing) {
101
- const intervalMs = now - existing.lastOccurrence;
102
- existing.count++;
103
- existing.lastOccurrence = now;
104
- existing.averageInterval = existing.averageInterval
105
- ? (existing.averageInterval + intervalMs) / 2
106
- : intervalMs;
107
- this.metrics.rateLimits.set(key, existing);
108
- }
109
- else {
110
- this.metrics.rateLimits.set(key, {
111
- count: 1,
112
- firstOccurrence: now,
113
- lastOccurrence: now,
114
- });
115
- }
116
- }
117
- recordFallbackStart() {
118
- if (!this.config.enabled)
119
- return 0;
120
- return Date.now();
121
- }
122
- recordFallbackSuccess(targetProviderID, targetModelID, startTime) {
123
- if (!this.config.enabled)
124
- return;
125
- const duration = Date.now() - startTime;
126
- const key = getModelKey(targetProviderID, targetModelID);
127
- this.metrics.fallbacks.total++;
128
- this.metrics.fallbacks.successful++;
129
- // Update average duration
130
- const totalDuration = this.metrics.fallbacks.averageDuration * (this.metrics.fallbacks.successful - 1);
131
- this.metrics.fallbacks.averageDuration = (totalDuration + duration) / this.metrics.fallbacks.successful;
132
- // Update target model metrics
133
- const targetMetrics = this.metrics.fallbacks.byTargetModel.get(key) || {
134
- usedAsFallback: 0,
135
- successful: 0,
136
- failed: 0,
137
- };
138
- targetMetrics.usedAsFallback++;
139
- targetMetrics.successful++;
140
- this.metrics.fallbacks.byTargetModel.set(key, targetMetrics);
141
- }
142
- recordFallbackFailure() {
143
- if (!this.config.enabled)
144
- return;
145
- this.metrics.fallbacks.total++;
146
- this.metrics.fallbacks.failed++;
147
- }
148
- recordModelRequest(providerID, modelID) {
149
- if (!this.config.enabled)
150
- return;
151
- const key = getModelKey(providerID, modelID);
152
- const existing = this.metrics.modelPerformance.get(key) || {
153
- requests: 0,
154
- successes: 0,
155
- failures: 0,
156
- };
157
- existing.requests++;
158
- this.metrics.modelPerformance.set(key, existing);
159
- }
160
- recordModelSuccess(providerID, modelID, responseTime) {
161
- if (!this.config.enabled)
162
- return;
163
- const key = getModelKey(providerID, modelID);
164
- const existing = this.metrics.modelPerformance.get(key) || {
165
- requests: 0,
166
- successes: 0,
167
- failures: 0,
168
- };
169
- existing.successes++;
170
- // Update average response time
171
- const totalTime = (existing.averageResponseTime || 0) * (existing.successes - 1);
172
- existing.averageResponseTime = (totalTime + responseTime) / existing.successes;
173
- this.metrics.modelPerformance.set(key, existing);
174
- }
175
- recordModelFailure(providerID, modelID) {
176
- if (!this.config.enabled)
177
- return;
178
- const key = getModelKey(providerID, modelID);
179
- const existing = this.metrics.modelPerformance.get(key) || {
180
- requests: 0,
181
- successes: 0,
182
- failures: 0,
183
- };
184
- existing.failures++;
185
- this.metrics.modelPerformance.set(key, existing);
186
- }
187
- getMetrics() {
188
- this.metrics.generatedAt = Date.now();
189
- return { ...this.metrics };
190
- }
191
- export(format = "json") {
192
- const metrics = this.getMetrics();
193
- switch (format) {
194
- case "pretty":
195
- return this.exportPretty(metrics);
196
- case "csv":
197
- return this.exportCSV(metrics);
198
- case "json":
199
- default:
200
- return JSON.stringify(this.toPlainObject(metrics), null, 2);
201
- }
202
- }
203
- toPlainObject(metrics) {
204
- return {
205
- rateLimits: Object.fromEntries(Array.from(metrics.rateLimits.entries()).map(([k, v]) => [k, v])),
206
- fallbacks: {
207
- ...metrics.fallbacks,
208
- byTargetModel: Object.fromEntries(Array.from(metrics.fallbacks.byTargetModel.entries()).map(([k, v]) => [k, v])),
209
- },
210
- modelPerformance: Object.fromEntries(Array.from(metrics.modelPerformance.entries()).map(([k, v]) => [k, v])),
211
- startedAt: metrics.startedAt,
212
- generatedAt: metrics.generatedAt,
213
- };
214
- }
215
- exportPretty(metrics) {
216
- const lines = [];
217
- lines.push("=".repeat(60));
218
- lines.push("Rate Limit Fallback Metrics");
219
- lines.push("=".repeat(60));
220
- lines.push(`Started: ${new Date(metrics.startedAt).toISOString()}`);
221
- lines.push(`Generated: ${new Date(metrics.generatedAt).toISOString()}`);
222
- lines.push("");
223
- // Rate Limits
224
- lines.push("Rate Limits:");
225
- lines.push("-".repeat(40));
226
- if (metrics.rateLimits.size === 0) {
227
- lines.push(" No rate limits recorded");
228
- }
229
- else {
230
- for (const [model, data] of metrics.rateLimits.entries()) {
231
- lines.push(` ${model}:`);
232
- lines.push(` Count: ${data.count}`);
233
- lines.push(` First: ${new Date(data.firstOccurrence).toISOString()}`);
234
- lines.push(` Last: ${new Date(data.lastOccurrence).toISOString()}`);
235
- if (data.averageInterval) {
236
- lines.push(` Avg Interval: ${(data.averageInterval / 1000).toFixed(2)}s`);
237
- }
238
- }
239
- }
240
- lines.push("");
241
- // Fallbacks
242
- lines.push("Fallbacks:");
243
- lines.push("-".repeat(40));
244
- lines.push(` Total: ${metrics.fallbacks.total}`);
245
- lines.push(` Successful: ${metrics.fallbacks.successful}`);
246
- lines.push(` Failed: ${metrics.fallbacks.failed}`);
247
- if (metrics.fallbacks.averageDuration > 0) {
248
- lines.push(` Avg Duration: ${(metrics.fallbacks.averageDuration / 1000).toFixed(2)}s`);
249
- }
250
- if (metrics.fallbacks.byTargetModel.size > 0) {
251
- lines.push("");
252
- lines.push(" By Target Model:");
253
- for (const [model, data] of metrics.fallbacks.byTargetModel.entries()) {
254
- lines.push(` ${model}:`);
255
- lines.push(` Used: ${data.usedAsFallback}`);
256
- lines.push(` Success: ${data.successful}`);
257
- lines.push(` Failed: ${data.failed}`);
258
- }
259
- }
260
- lines.push("");
261
- // Model Performance
262
- lines.push("Model Performance:");
263
- lines.push("-".repeat(40));
264
- if (metrics.modelPerformance.size === 0) {
265
- lines.push(" No performance data recorded");
266
- }
267
- else {
268
- for (const [model, data] of metrics.modelPerformance.entries()) {
269
- lines.push(` ${model}:`);
270
- lines.push(` Requests: ${data.requests}`);
271
- lines.push(` Successes: ${data.successes}`);
272
- lines.push(` Failures: ${data.failures}`);
273
- if (data.averageResponseTime) {
274
- lines.push(` Avg Response: ${(data.averageResponseTime / 1000).toFixed(2)}s`);
275
- }
276
- if (data.requests > 0) {
277
- const successRate = ((data.successes / data.requests) * 100).toFixed(1);
278
- lines.push(` Success Rate: ${successRate}%`);
279
- }
280
- }
281
- }
282
- return lines.join("\n");
283
- }
284
- exportCSV(metrics) {
285
- const lines = [];
286
- // Rate Limits CSV
287
- lines.push("=== RATE_LIMITS ===");
288
- lines.push("model,count,first_occurrence,last_occurrence,avg_interval_ms");
289
- for (const [model, data] of metrics.rateLimits.entries()) {
290
- lines.push([
291
- model,
292
- data.count,
293
- data.firstOccurrence,
294
- data.lastOccurrence,
295
- data.averageInterval || 0,
296
- ].join(","));
297
- }
298
- lines.push("");
299
- // Fallbacks Summary CSV
300
- lines.push("=== FALLBACKS_SUMMARY ===");
301
- lines.push(`total,successful,failed,avg_duration_ms`);
302
- lines.push([
303
- metrics.fallbacks.total,
304
- metrics.fallbacks.successful,
305
- metrics.fallbacks.failed,
306
- metrics.fallbacks.averageDuration || 0,
307
- ].join(","));
308
- lines.push("");
309
- // Fallbacks by Model CSV
310
- lines.push("=== FALLBACKS_BY_MODEL ===");
311
- lines.push("model,used_as_fallback,successful,failed");
312
- for (const [model, data] of metrics.fallbacks.byTargetModel.entries()) {
313
- lines.push([
314
- model,
315
- data.usedAsFallback,
316
- data.successful,
317
- data.failed,
318
- ].join(","));
319
- }
320
- lines.push("");
321
- // Model Performance CSV
322
- lines.push("=== MODEL_PERFORMANCE ===");
323
- lines.push("model,requests,successes,failures,avg_response_time_ms,success_rate");
324
- for (const [model, data] of metrics.modelPerformance.entries()) {
325
- const successRate = data.requests > 0 ? ((data.successes / data.requests) * 100).toFixed(1) : "0";
326
- lines.push([
327
- model,
328
- data.requests,
329
- data.successes,
330
- data.failures,
331
- data.averageResponseTime || 0,
332
- successRate,
333
- ].join(","));
334
- }
335
- return lines.join("\n");
336
- }
337
- async report() {
338
- if (!this.config.enabled)
339
- return;
340
- const output = this.export(this.config.output.format);
341
- // Console output
342
- if (this.config.output.console) {
343
- console.log(output);
344
- }
345
- // File output
346
- if (this.config.output.file) {
347
- try {
348
- writeFileSync(this.config.output.file, output, "utf-8");
349
- this.logger.debug(`Metrics exported to ${this.config.output.file}`);
350
- }
351
- catch (error) {
352
- this.logger.warn(`Failed to write metrics to file: ${this.config.output.file}`, { error });
353
- }
354
- }
355
- }
356
- destroy() {
357
- if (this.resetTimer) {
358
- clearInterval(this.resetTimer);
359
- this.resetTimer = null;
360
- }
361
- }
362
- }
363
- const DEFAULT_CONFIG = {
364
- fallbackModels: DEFAULT_FALLBACK_MODELS,
365
- cooldownMs: 60 * 1000,
366
- enabled: true,
367
- fallbackMode: "cycle",
368
- log: {
369
- level: "warn",
370
- format: "simple",
371
- enableTimestamp: true,
372
- },
373
- metrics: {
374
- enabled: false,
375
- output: {
376
- console: true,
377
- format: "pretty",
378
- },
379
- resetInterval: "daily",
380
- },
381
- };
382
- function loadConfig(directory) {
383
- const homedir = process.env.HOME || "";
384
- const configPaths = [
385
- join(directory, ".opencode", "rate-limit-fallback.json"),
386
- join(directory, "rate-limit-fallback.json"),
387
- join(homedir, ".opencode", "rate-limit-fallback.json"),
388
- join(homedir, ".config", "opencode", "rate-limit-fallback.json"),
389
- ];
390
- for (const configPath of configPaths) {
391
- if (existsSync(configPath)) {
392
- try {
393
- const content = readFileSync(configPath, "utf-8");
394
- const userConfig = JSON.parse(content);
395
- const mode = userConfig.fallbackMode;
396
- const resetInterval = userConfig.metrics?.resetInterval;
397
- return {
398
- ...DEFAULT_CONFIG,
399
- ...userConfig,
400
- fallbackModels: userConfig.fallbackModels || DEFAULT_CONFIG.fallbackModels,
401
- fallbackMode: VALID_FALLBACK_MODES.includes(mode) ? mode : DEFAULT_CONFIG.fallbackMode,
402
- log: userConfig.log ? { ...DEFAULT_CONFIG.log, ...userConfig.log } : DEFAULT_CONFIG.log,
403
- metrics: userConfig.metrics ? {
404
- ...DEFAULT_CONFIG.metrics,
405
- ...userConfig.metrics,
406
- output: userConfig.metrics.output ? {
407
- ...DEFAULT_CONFIG.metrics.output,
408
- ...userConfig.metrics.output,
409
- } : DEFAULT_CONFIG.metrics.output,
410
- resetInterval: VALID_RESET_INTERVALS.includes(resetInterval) ? resetInterval : DEFAULT_CONFIG.metrics.resetInterval,
411
- } : DEFAULT_CONFIG.metrics,
412
- };
413
- }
414
- catch (error) {
415
- // Silently ignore config load errors - will be logged after logger is initialized
416
- }
417
- }
418
- }
419
- return DEFAULT_CONFIG;
420
- }
421
- function getModelKey(providerID, modelID) {
422
- return `${providerID}/${modelID}`;
423
- }
424
- function getStateKey(sessionID, messageID) {
425
- return `${sessionID}:${messageID}`;
426
- }
427
- function isRateLimitError(error) {
428
- if (!error || typeof error !== "object")
429
- return false;
430
- // More type-safe error object structure
431
- const err = error;
432
- // Check for 429 status code in APIError (strict check)
433
- if (err.name === "APIError" && err.data?.statusCode === 429) {
434
- return true;
435
- }
436
- // Type-safe access to error fields
437
- const responseBody = String(err.data?.responseBody || "").toLowerCase();
438
- const message = String(err.data?.message || err.message || "").toLowerCase();
439
- // Strict rate limit indicators only - avoid false positives
440
- const strictRateLimitIndicators = [
441
- "rate limit",
442
- "rate_limit",
443
- "ratelimit",
444
- "too many requests",
445
- "quota exceeded",
446
- ];
447
- // Check for 429 in text (explicit HTTP status code)
448
- if (responseBody.includes("429") || message.includes("429")) {
449
- return true;
450
- }
451
- // Check for strict rate limit keywords
452
- return strictRateLimitIndicators.some((indicator) => responseBody.includes(indicator) ||
453
- message.includes(indicator));
454
- }
455
- // Constants for deduplication and state management
456
- const DEDUP_WINDOW_MS = 5000;
457
- const STATE_TIMEOUT_MS = 30000;
458
- const CLEANUP_INTERVAL_MS = 300000; // 5 minutes
459
- const SESSION_ENTRY_TTL_MS = 3600000; // 1 hour
460
- /**
461
- * Extract toast message properties with fallback values
462
- */
463
- function getToastMessage(toast) {
464
- const title = toast?.body?.title || toast?.title || "Toast";
465
- const message = toast?.body?.message || toast?.message || "";
466
- const variant = toast?.body?.variant || toast?.variant || "info";
467
- return { title, message, variant };
468
- }
469
- /**
470
- * Safely show toast, falling back to console logging if TUI is missing or fails
471
- */
472
- const safeShowToast = async (client, toast) => {
473
- const { title, message, variant } = getToastMessage(toast);
474
- const logToConsole = () => {
475
- if (variant === "error") {
476
- console.error(`[RateLimitFallback] ${title}: ${message}`);
477
- }
478
- else if (variant === "warning") {
479
- console.warn(`[RateLimitFallback] ${title}: ${message}`);
480
- }
481
- else {
482
- console.log(`[RateLimitFallback] ${title}: ${message}`);
483
- }
484
- };
485
- try {
486
- if (client.tui) {
487
- await client.tui.showToast(toast);
488
- }
489
- else {
490
- // TUI doesn't exist - log to console
491
- logToConsole();
492
- }
493
- }
494
- catch {
495
- // TUI exists but failed to show toast - log to console
496
- logToConsole();
497
- }
498
- };
55
+ // ============================================================================
56
+ // Main Plugin Export
57
+ // ============================================================================
499
58
  export const RateLimitFallback = async ({ client, directory }) => {
500
59
  const config = loadConfig(directory);
501
60
  // Detect headless mode (no TUI)
@@ -528,507 +87,45 @@ export const RateLimitFallback = async ({ client, directory }) => {
528
87
  if (!config.enabled) {
529
88
  return {};
530
89
  }
531
- const rateLimitedModels = new Map();
532
- const retryState = new Map();
533
- const currentSessionModel = new Map();
534
- const fallbackInProgress = new Map(); // sessionID:messageID -> timestamp (message scope)
535
- const fallbackMessages = new Map(); // Track fallback messages for completion detection
536
- // Subagent session tracking
537
- const sessionHierarchies = new Map(); // rootSessionID -> SessionHierarchy
538
- const sessionToRootMap = new Map(); // sessionID -> rootSessionID
539
- const maxSubagentDepth = config.maxSubagentDepth ?? 10;
540
- // Metrics management
541
- const metricsManager = new MetricsManager(config.metrics ?? { ...DEFAULT_CONFIG.metrics }, logger);
542
- // Track model requests for performance metrics
543
- const modelRequestStartTimes = new Map(); // modelKey -> startTime
544
- // Helper functions for session hierarchy management
545
- function getOrCreateHierarchy(rootSessionID) {
546
- let hierarchy = sessionHierarchies.get(rootSessionID);
547
- if (!hierarchy) {
548
- hierarchy = {
549
- rootSessionID,
550
- subagents: new Map(),
551
- sharedFallbackState: "none",
552
- sharedConfig: config,
553
- createdAt: Date.now(),
554
- lastActivity: Date.now(),
555
- };
556
- sessionHierarchies.set(rootSessionID, hierarchy);
557
- sessionToRootMap.set(rootSessionID, rootSessionID);
558
- }
559
- return hierarchy;
560
- }
561
- function registerSubagent(sessionID, parentSessionID) {
562
- // Validate parent session exists
563
- // Parent session must either be registered in sessionToRootMap or be a new root session
564
- const parentRootSessionID = sessionToRootMap.get(parentSessionID);
565
- // Determine root session - if parent doesn't exist, treat it as a new root
566
- const rootSessionID = parentRootSessionID || parentSessionID;
567
- // If parent is not a subagent but we're treating it as a root, create a hierarchy for it
568
- // This allows sessions to become roots when their first subagent is registered
569
- const hierarchy = getOrCreateHierarchy(rootSessionID);
570
- const parentSubagent = hierarchy.subagents.get(parentSessionID);
571
- const depth = parentSubagent ? parentSubagent.depth + 1 : 1;
572
- // Enforce max depth
573
- if (depth > maxSubagentDepth) {
574
- return false;
575
- }
576
- const subagent = {
577
- sessionID,
578
- parentSessionID,
579
- depth,
580
- fallbackState: "none",
581
- createdAt: Date.now(),
582
- lastActivity: Date.now(),
583
- };
584
- hierarchy.subagents.set(sessionID, subagent);
585
- sessionToRootMap.set(sessionID, rootSessionID);
586
- hierarchy.lastActivity = Date.now();
587
- return true;
588
- }
589
- function getRootSession(sessionID) {
590
- return sessionToRootMap.get(sessionID) || null;
591
- }
592
- function getHierarchy(sessionID) {
593
- const rootSessionID = getRootSession(sessionID);
594
- return rootSessionID ? sessionHierarchies.get(rootSessionID) || null : null;
595
- }
596
- // Cleanup stale session model entries (every 5 minutes)
90
+ // Initialize components
91
+ initSubagentTracker(config);
92
+ const metricsManager = new MetricsManager(config.metrics ?? { enabled: false, output: { console: true, format: "pretty" }, resetInterval: "daily" }, logger);
93
+ // Create hierarchy resolver to avoid circular dependency
94
+ const hierarchyResolver = {
95
+ getRootSession: getRootSession,
96
+ getHierarchy: getHierarchy,
97
+ };
98
+ const fallbackHandler = new FallbackHandler(config, client, logger, metricsManager, hierarchyResolver);
99
+ // Cleanup stale entries periodically
597
100
  const cleanupInterval = setInterval(() => {
598
- const now = Date.now();
599
- for (const [sessionID, entry] of currentSessionModel.entries()) {
600
- // Remove entries older than 1 hour
601
- if (now - entry.lastUpdated > SESSION_ENTRY_TTL_MS) {
602
- currentSessionModel.delete(sessionID);
603
- }
604
- }
605
- // Clean up stale session hierarchies
606
- for (const [rootSessionID, hierarchy] of sessionHierarchies.entries()) {
607
- if (now - hierarchy.lastActivity > SESSION_ENTRY_TTL_MS) {
608
- // Clean up all subagents in this hierarchy
609
- for (const subagentID of hierarchy.subagents.keys()) {
610
- sessionToRootMap.delete(subagentID);
611
- }
612
- sessionHierarchies.delete(rootSessionID);
613
- sessionToRootMap.delete(rootSessionID);
614
- }
615
- }
616
- // Clean up stale retry state entries to prevent memory leaks
617
- for (const [stateKey, state] of retryState.entries()) {
618
- if (now - state.lastAttemptTime > STATE_TIMEOUT_MS) {
619
- retryState.delete(stateKey);
620
- }
621
- }
622
- // Clean up stale fallback messages
623
- for (const [fallbackKey, fallbackInfo] of fallbackMessages.entries()) {
624
- if (now - fallbackInfo.timestamp > SESSION_ENTRY_TTL_MS) {
625
- fallbackInProgress.delete(fallbackKey);
626
- fallbackMessages.delete(fallbackKey);
627
- }
628
- }
101
+ clearHierarchyEntries();
102
+ fallbackHandler.cleanupStaleEntries();
629
103
  }, CLEANUP_INTERVAL_MS);
630
- function isModelRateLimited(providerID, modelID) {
631
- const key = getModelKey(providerID, modelID);
632
- const limitedAt = rateLimitedModels.get(key);
633
- if (!limitedAt)
634
- return false;
635
- if (Date.now() - limitedAt > config.cooldownMs) {
636
- rateLimitedModels.delete(key);
637
- return false;
638
- }
639
- return true;
640
- }
641
- function markModelRateLimited(providerID, modelID) {
642
- const key = getModelKey(providerID, modelID);
643
- rateLimitedModels.set(key, Date.now());
644
- }
645
- function findNextAvailableModel(currentProviderID, currentModelID, attemptedModels) {
646
- const currentKey = getModelKey(currentProviderID, currentModelID);
647
- const startIndex = config.fallbackModels.findIndex(m => getModelKey(m.providerID, m.modelID) === currentKey);
648
- // If current model is not in the fallback list (startIndex is -1), start from 0
649
- const searchStartIndex = Math.max(0, startIndex);
650
- for (let i = searchStartIndex + 1; i < config.fallbackModels.length; i++) {
651
- const model = config.fallbackModels[i];
652
- const key = getModelKey(model.providerID, model.modelID);
653
- if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
654
- return model;
655
- }
656
- }
657
- for (let i = 0; i <= searchStartIndex && i < config.fallbackModels.length; i++) {
658
- const model = config.fallbackModels[i];
659
- const key = getModelKey(model.providerID, model.modelID);
660
- if (!attemptedModels.has(key) && !isModelRateLimited(model.providerID, model.modelID)) {
661
- return model;
662
- }
663
- }
664
- return null;
665
- }
666
- /**
667
- * Check and mark fallback in progress for deduplication.
668
- * Uses message scope (sessionID:messageID) for better tracking.
669
- * Returns true if processing should continue, false if it should be skipped.
670
- */
671
- function checkAndMarkFallbackInProgress(sessionID, messageID) {
672
- const key = getStateKey(sessionID, messageID);
673
- const lastFallback = fallbackInProgress.get(key);
674
- if (lastFallback && Date.now() - lastFallback < DEDUP_WINDOW_MS) {
675
- return false; // Skip - already processing
676
- }
677
- fallbackInProgress.set(key, Date.now());
678
- return true; // Continue processing
679
- }
680
- /**
681
- * Resolve the target session for fallback processing.
682
- * For subagent sessions, the target is the root session (parent-centered approach).
683
- * Uses message scope (sessionID:messageID) for deduplication.
684
- * Updates hierarchy state and returns { targetSessionID, hierarchy }.
685
- */
686
- function resolveTargetSessionWithDedup(sessionID, messageID) {
687
- const hierarchy = getHierarchy(sessionID);
688
- const rootSessionID = getRootSession(sessionID);
689
- if (rootSessionID && hierarchy) {
690
- // Check deduplication with message scope
691
- if (!checkAndMarkFallbackInProgress(rootSessionID, messageID)) {
692
- return null; // Skip - already processing
693
- }
694
- // Update the shared fallback state
695
- hierarchy.sharedFallbackState = "in_progress";
696
- hierarchy.lastActivity = Date.now();
697
- // Update the subagent's state
698
- const subagent = hierarchy.subagents.get(sessionID);
699
- if (subagent) {
700
- subagent.fallbackState = "in_progress";
701
- subagent.lastActivity = Date.now();
702
- }
703
- return { targetSessionID: rootSessionID, hierarchy };
704
- }
705
- else {
706
- // Prevent duplicate fallback processing for non-subagent sessions with message scope
707
- if (!checkAndMarkFallbackInProgress(sessionID, messageID)) {
708
- return null; // Skip - already processing
709
- }
710
- return { targetSessionID: sessionID, hierarchy: null };
711
- }
712
- }
713
- /**
714
- * Get or create retry state for a specific message.
715
- */
716
- function getOrCreateRetryState(sessionID, messageID) {
717
- const stateKey = getStateKey(sessionID, messageID);
718
- let state = retryState.get(stateKey);
719
- if (!state || Date.now() - state.lastAttemptTime > STATE_TIMEOUT_MS) {
720
- state = { attemptedModels: new Set(), lastAttemptTime: Date.now() };
721
- retryState.set(stateKey, state);
722
- }
723
- return state;
724
- }
725
- /**
726
- * Select the next fallback model based on current state and fallback mode.
727
- * Returns the selected model or null if no model is available.
728
- */
729
- async function selectFallbackModel(currentProviderID, currentModelID, state) {
730
- // Mark current model as rate limited and add to attempted
731
- if (currentProviderID && currentModelID) {
732
- markModelRateLimited(currentProviderID, currentModelID);
733
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
734
- }
735
- let nextModel = findNextAvailableModel(currentProviderID || "", currentModelID || "", state.attemptedModels);
736
- // Handle when no model is found based on fallbackMode
737
- if (!nextModel && state.attemptedModels.size > 0) {
738
- if (config.fallbackMode === "cycle") {
739
- // Reset and retry from the first model
740
- state.attemptedModels.clear();
741
- if (currentProviderID && currentModelID) {
742
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
743
- }
744
- nextModel = findNextAvailableModel("", "", state.attemptedModels);
745
- }
746
- else if (config.fallbackMode === "retry-last") {
747
- // Try the last model in the list once, then reset on next prompt
748
- const lastModel = config.fallbackModels[config.fallbackModels.length - 1];
749
- if (lastModel) {
750
- const isLastModelCurrent = currentProviderID === lastModel.providerID && currentModelID === lastModel.modelID;
751
- if (!isLastModelCurrent && !isModelRateLimited(lastModel.providerID, lastModel.modelID)) {
752
- // Use the last model for one more try
753
- nextModel = lastModel;
754
- await safeShowToast(client, {
755
- body: {
756
- title: "Last Resort",
757
- message: `Trying ${lastModel.modelID} one more time...`,
758
- variant: "warning",
759
- duration: 3000,
760
- },
761
- });
762
- }
763
- else {
764
- // Last model also failed, reset for next prompt
765
- state.attemptedModels.clear();
766
- if (currentProviderID && currentModelID) {
767
- state.attemptedModels.add(getModelKey(currentProviderID, currentModelID));
768
- }
769
- nextModel = findNextAvailableModel("", "", state.attemptedModels);
770
- }
771
- }
772
- }
773
- // "stop" mode: nextModel remains null, will show error below
774
- }
775
- return nextModel;
776
- }
777
- /**
778
- * Extract and validate message parts from a user message.
779
- */
780
- function extractMessageParts(message) {
781
- const msg = message;
782
- return msg.parts
783
- .filter((p) => {
784
- const part = p;
785
- return part.type === "text" || part.type === "file";
786
- })
787
- .map((p) => {
788
- const part = p;
789
- if (part.type === "text")
790
- return { type: "text", text: String(part.text) };
791
- if (part.type === "file")
792
- return { type: "file", path: String(part.path), mediaType: String(part.mediaType) };
793
- return null;
794
- })
795
- .filter((p) => p !== null);
796
- }
797
- /**
798
- * Convert internal MessagePart to SDK-compatible format.
799
- */
800
- function convertPartsToSDKFormat(parts) {
801
- return parts.map((part) => {
802
- if (part.type === "text") {
803
- return { type: "text", text: part.text };
804
- }
805
- // For file parts, we need to match the FilePartInput format
806
- // Using path as url since we're dealing with local files
807
- return {
808
- type: "file",
809
- url: part.path,
810
- mime: part.mediaType || "application/octet-stream",
811
- };
812
- });
813
- }
814
- /**
815
- * Propagate model changes to all subagents in the hierarchy.
816
- */
817
- function propagateModelToSubagents(hierarchy, targetSessionID, providerID, modelID) {
818
- if (hierarchy.rootSessionID === targetSessionID) {
819
- hierarchy.sharedFallbackState = "completed";
820
- hierarchy.lastActivity = Date.now();
821
- // Update model tracking for all subagents
822
- for (const [subagentID, subagent] of hierarchy.subagents.entries()) {
823
- currentSessionModel.set(subagentID, {
824
- providerID,
825
- modelID,
826
- lastUpdated: Date.now(),
827
- });
828
- subagent.fallbackState = "completed";
829
- subagent.lastActivity = Date.now();
830
- }
831
- }
832
- }
833
- /**
834
- * Retry the prompt with a different model.
835
- */
836
- async function retryWithModel(targetSessionID, model, parts, hierarchy) {
837
- // Track the new model for this session
838
- currentSessionModel.set(targetSessionID, {
839
- providerID: model.providerID,
840
- modelID: model.modelID,
841
- lastUpdated: Date.now(),
842
- });
843
- // If this is a root session with subagents, propagate the model to all subagents
844
- if (hierarchy) {
845
- propagateModelToSubagents(hierarchy, targetSessionID, model.providerID, model.modelID);
846
- }
847
- // Record model request for metrics
848
- metricsManager.recordModelRequest(model.providerID, model.modelID);
849
- const modelKey = getModelKey(model.providerID, model.modelID);
850
- modelRequestStartTimes.set(modelKey, Date.now());
851
- // Convert internal MessagePart to SDK-compatible format
852
- const sdkParts = convertPartsToSDKFormat(parts);
853
- await client.session.prompt({
854
- path: { id: targetSessionID },
855
- body: {
856
- parts: sdkParts,
857
- model: { providerID: model.providerID, modelID: model.modelID },
858
- },
859
- });
860
- await safeShowToast(client, {
861
- body: {
862
- title: "Fallback Successful",
863
- message: `Now using ${model.modelID}`,
864
- variant: "success",
865
- duration: 3000,
866
- },
867
- });
868
- }
869
- async function handleRateLimitFallback(sessionID, currentProviderID, currentModelID) {
870
- try {
871
- // If no model info provided, try to get from tracked session model
872
- const rootSessionID = getRootSession(sessionID);
873
- const targetSessionID = rootSessionID || sessionID;
874
- if (!currentProviderID || !currentModelID) {
875
- const tracked = currentSessionModel.get(targetSessionID);
876
- if (tracked) {
877
- currentProviderID = tracked.providerID;
878
- currentModelID = tracked.modelID;
879
- }
880
- }
881
- // Record rate limit metric
882
- if (currentProviderID && currentModelID) {
883
- metricsManager.recordRateLimit(currentProviderID, currentModelID);
884
- }
885
- // Abort current session with error handling
886
- try {
887
- await client.session.abort({ path: { id: targetSessionID } });
888
- }
889
- catch (abortError) {
890
- // Silently ignore abort errors and continue with fallback
891
- logger.debug(`Failed to abort session ${targetSessionID}`, { error: abortError });
892
- }
893
- await safeShowToast(client, {
894
- body: {
895
- title: "Rate Limit Detected",
896
- message: `Switching from ${currentModelID || 'current model'}...`,
897
- variant: "warning",
898
- duration: 3000,
899
- },
900
- });
901
- // Get messages from the session
902
- const messagesResult = await client.session.messages({ path: { id: targetSessionID } });
903
- if (!messagesResult.data) {
904
- return;
905
- }
906
- const messages = messagesResult.data;
907
- const lastUserMessage = [...messages].reverse().find(m => m.info.role === "user");
908
- if (!lastUserMessage) {
909
- return;
910
- }
911
- // Resolve the target session for fallback processing with message scope
912
- const resolution = resolveTargetSessionWithDedup(sessionID, lastUserMessage.info.id);
913
- if (!resolution) {
914
- return; // Skipped due to deduplication
915
- }
916
- // Get or create retry state for this message
917
- const state = getOrCreateRetryState(sessionID, lastUserMessage.info.id);
918
- const stateKey = getStateKey(sessionID, lastUserMessage.info.id);
919
- const fallbackKey = getStateKey(resolution.targetSessionID, lastUserMessage.info.id);
920
- // Select the next fallback model
921
- const nextModel = await selectFallbackModel(currentProviderID, currentModelID, state);
922
- // Show error if no model is available
923
- if (!nextModel) {
924
- await safeShowToast(client, {
925
- body: {
926
- title: "No Fallback Available",
927
- message: config.fallbackMode === "stop"
928
- ? "All fallback models exhausted"
929
- : "All models are rate limited",
930
- variant: "error",
931
- duration: 5000,
932
- },
933
- });
934
- retryState.delete(stateKey);
935
- fallbackInProgress.delete(fallbackKey);
936
- return;
937
- }
938
- state.attemptedModels.add(getModelKey(nextModel.providerID, nextModel.modelID));
939
- state.lastAttemptTime = Date.now();
940
- // Extract message parts
941
- const parts = extractMessageParts(lastUserMessage);
942
- if (parts.length === 0) {
943
- fallbackInProgress.delete(fallbackKey);
944
- return;
945
- }
946
- await safeShowToast(client, {
947
- body: {
948
- title: "Retrying",
949
- message: `Using ${nextModel.providerID}/${nextModel.modelID}`,
950
- variant: "info",
951
- duration: 3000,
952
- },
953
- });
954
- // Record fallback start time
955
- metricsManager.recordFallbackStart();
956
- // Track this message as a fallback message for completion detection
957
- // Note: The new message will have a new ID after prompting, but we use the original message ID
958
- // to correlate with the fallback in progress state
959
- fallbackMessages.set(fallbackKey, {
960
- sessionID: resolution.targetSessionID,
961
- messageID: lastUserMessage.info.id,
962
- timestamp: Date.now(),
963
- });
964
- // Retry with the selected model
965
- await retryWithModel(resolution.targetSessionID, nextModel, parts, resolution.hierarchy);
966
- // Clean up state
967
- retryState.delete(stateKey);
968
- }
969
- catch (err) {
970
- // Silently ignore fallback errors - log only limited error info
971
- const errorMessage = err instanceof Error ? err.message : String(err);
972
- const errorName = err instanceof Error ? err.name : undefined;
973
- logger.debug(`Fallback error for session ${sessionID}`, {
974
- error: errorMessage,
975
- name: errorName,
976
- });
977
- }
978
- }
979
104
  return {
980
105
  event: async ({ event }) => {
106
+ // Handle session.error events
981
107
  if (isSessionErrorEvent(event)) {
982
108
  const { sessionID, error } = event.properties;
983
109
  if (sessionID && error && isRateLimitError(error)) {
984
- await handleRateLimitFallback(sessionID, "", "");
110
+ await fallbackHandler.handleRateLimitFallback(sessionID, "", "");
985
111
  }
986
112
  }
113
+ // Handle message.updated events
987
114
  if (isMessageUpdatedEvent(event)) {
988
115
  const info = event.properties.info;
989
116
  if (info?.error && isRateLimitError(info.error)) {
990
- await handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
117
+ await fallbackHandler.handleRateLimitFallback(info.sessionID, info.providerID || "", info.modelID || "");
991
118
  }
992
- else if (info?.status === "completed" && !info?.error) {
993
- // Check if this message is a fallback message and clear its in-progress state
994
- const fallbackKey = getStateKey(info.sessionID, info.id);
995
- const fallbackInfo = fallbackMessages.get(fallbackKey);
996
- if (fallbackInfo) {
997
- // Clear fallback in progress for this message
998
- fallbackInProgress.delete(fallbackKey);
999
- fallbackMessages.delete(fallbackKey);
1000
- logger.debug(`Fallback completed for message ${info.id}`, { sessionID: info.sessionID });
1001
- // Record fallback success metric
1002
- const tracked = currentSessionModel.get(info.sessionID);
1003
- if (tracked) {
1004
- metricsManager.recordFallbackSuccess(tracked.providerID, tracked.modelID, fallbackInfo.timestamp);
1005
- // Record model performance metric
1006
- const modelKey = getModelKey(tracked.providerID, tracked.modelID);
1007
- const startTime = modelRequestStartTimes.get(modelKey);
1008
- if (startTime) {
1009
- const responseTime = Date.now() - startTime;
1010
- metricsManager.recordModelSuccess(tracked.providerID, tracked.modelID, responseTime);
1011
- modelRequestStartTimes.delete(modelKey);
1012
- }
1013
- }
1014
- }
119
+ else if (info?.status === "completed" && !info?.error && info?.id) {
120
+ // Record fallback success
121
+ fallbackHandler.handleMessageUpdated(info.sessionID, info.id, false, false);
1015
122
  }
1016
- else if (info?.error && !isRateLimitError(info.error)) {
1017
- // Non-rate-limit error - record model failure metric
1018
- const tracked = currentSessionModel.get(info.sessionID);
1019
- if (tracked) {
1020
- metricsManager.recordModelFailure(tracked.providerID, tracked.modelID);
1021
- // Check if this was a fallback attempt and record failure
1022
- const fallbackKey = getStateKey(info.sessionID, info.id);
1023
- const fallbackInfo = fallbackMessages.get(fallbackKey);
1024
- if (fallbackInfo) {
1025
- metricsManager.recordFallbackFailure();
1026
- fallbackInProgress.delete(fallbackKey);
1027
- fallbackMessages.delete(fallbackKey);
1028
- }
1029
- }
123
+ else if (info?.error && !isRateLimitError(info.error) && info?.id) {
124
+ // Record non-rate-limit error
125
+ fallbackHandler.handleMessageUpdated(info.sessionID, info.id, true, false);
1030
126
  }
1031
127
  }
128
+ // Handle session.status events
1032
129
  if (isSessionStatusEvent(event)) {
1033
130
  const props = event.properties;
1034
131
  const status = props?.status;
@@ -1040,34 +137,28 @@ export const RateLimitFallback = async ({ client, directory }) => {
1040
137
  message.includes("reduce concurrency");
1041
138
  if (isRateLimitRetry) {
1042
139
  // Try fallback on any attempt, handleRateLimitFallback will manage state
1043
- await handleRateLimitFallback(props.sessionID, "", "");
140
+ await fallbackHandler.handleRateLimitFallback(props.sessionID, "", "");
1044
141
  }
1045
142
  }
1046
143
  }
1047
144
  // Handle subagent session creation events
1048
- // Note: Using type assertion for subagent events since they may not be in the official Event union yet
1049
145
  const rawEvent = event;
1050
146
  if (isSubagentSessionCreatedEvent(rawEvent)) {
1051
147
  const { sessionID, parentSessionID } = rawEvent.properties;
1052
148
  if (config.enableSubagentFallback !== false) {
1053
- registerSubagent(sessionID, parentSessionID);
149
+ registerSubagent(sessionID, parentSessionID, config);
1054
150
  }
1055
151
  }
1056
152
  },
1057
153
  // Cleanup function to prevent memory leaks
1058
154
  cleanup: () => {
1059
155
  clearInterval(cleanupInterval);
1060
- // Clean up all session hierarchies
1061
- sessionHierarchies.clear();
1062
- sessionToRootMap.clear();
1063
- // Clean up fallback messages
1064
- fallbackMessages.clear();
1065
- // Clean up metrics manager
156
+ clearAllHierarchies();
1066
157
  metricsManager.destroy();
1067
- // Clean up model request start times
1068
- modelRequestStartTimes.clear();
158
+ fallbackHandler.destroy();
1069
159
  },
1070
160
  };
1071
161
  };
1072
162
  export default RateLimitFallback;
1073
- //# sourceMappingURL=index.js.map
163
+ export { MetricsManager } from "./src/metrics/MetricsManager.js";
164
+ export { createLogger } from "./logger.js";