maxpool 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1729 @@
1
+ import { refreshAccessToken, isTokenExpiringSoon } from './oauth.js';
2
+
3
+ function emptyQuota() {
4
+ return {
5
+ // Standard API rate limits (API key accounts)
6
+ tokensLimit: null,
7
+ tokensRemaining: null,
8
+ requestsLimit: null,
9
+ requestsRemaining: null,
10
+ genericLimit: null,
11
+ genericRemaining: null,
12
+ genericReset: null,
13
+ // Unified rate limits (Claude Max accounts)
14
+ unified5h: null, // utilization 0-1
15
+ unified7d: null, // utilization 0-1
16
+ unified5hRaw: null, // upstream-reported utilization before display clamp
17
+ unified7dRaw: null, // upstream-reported utilization before display clamp
18
+ unified5hReset: null, // ms timestamp
19
+ unified7dReset: null, // ms timestamp
20
+ unified7dSonnet: null, // Sonnet-specific weekly utilization (from usage probe)
21
+ unified7dSonnetReset: null, // ms timestamp
22
+ unifiedStatus: null, // allowed | allowed_warning | rejected
23
+ resetsAt: null,
24
+ };
25
+ }
26
+
27
+ const DEFAULT_SCHEDULER = {
28
+ safetyMaxActivePerAccount: 50,
29
+ safetyMaxGlobalActive: 150,
30
+ cooldownMs: 30_000,
31
+ maxCooldownMs: 15 * 60_000,
32
+ weeklySoftThreshold: 0.65,
33
+ weeklyReserveThreshold: 0.85,
34
+ weeklyCriticalThreshold: 0.95,
35
+ weeklyExhaustedThreshold: 0.985,
36
+ weeklyBurnDebtWeight: 0.6,
37
+ // Routing-cost tuning (lower cost = preferred). Quota scarcity is the primary
38
+ // signal; recent-load spread breaks ties between equally-scarce accounts so
39
+ // sequential traffic rotates instead of funnelling onto one account.
40
+ scarcityWeight: 6, // multiplies quota scarcity (pace overage, 0..~1)
41
+ spreadShareWeight: 3, // multiplies an account's share of recent fleet load (0..1)
42
+ recoveryRampWeight: 4, // decaying penalty applied to a just-recovered account
43
+ recoveryRampMs: 5 * 60_000, // how long the post-recovery ramp lasts
44
+ spreadWindowMs: 15 * 60_000,// rolling window used to measure recent per-account load
45
+ };
46
+ const LOAD_EVENT_MAX_AGE_MS = 60 * 60 * 1000;
47
+ const WEEK_MS = 7 * 24 * 60 * 60 * 1000;
48
+ const FIVE_HOUR_MS = 5 * 60 * 60 * 1000;
49
+
50
+ // Quota fields that survive a restart: utilization levels and their reset
51
+ // windows, learned passively from upstream responses. Transient/derived state
52
+ // (probing, requalify, rateLimitedUntil) and credentials are intentionally
53
+ // excluded. A stale restored window is wiped on first use by _clearExpiredQuotas.
54
+ const PERSISTED_QUOTA_FIELDS = [
55
+ 'unified5h', 'unified7d', 'unified5hReset', 'unified7dReset', 'unifiedStatus',
56
+ 'tokensLimit', 'tokensRemaining', 'requestsLimit', 'requestsRemaining', 'resetsAt',
57
+ ];
58
+
59
+ function clampRetryAfterSeconds(value) {
60
+ const n = Number(value);
61
+ if (!Number.isFinite(n)) return 60;
62
+ return Math.min(Math.max(Math.ceil(n), 1), 24 * 60 * 60);
63
+ }
64
+
65
+ function clamp01(value) {
66
+ const n = Number(value);
67
+ if (!Number.isFinite(n)) return 0;
68
+ return Math.max(0, Math.min(1, n));
69
+ }
70
+
71
+ function firstHeader(headers, names) {
72
+ for (const name of names) {
73
+ if (headers[name] != null) return headers[name];
74
+ }
75
+ return null;
76
+ }
77
+
78
+ function parseFirstInt(headers, names) {
79
+ const value = firstHeader(headers, names);
80
+ if (value == null) return null;
81
+ const first = String(value).split(',')[0].trim();
82
+ const n = parseInt(first, 10);
83
+ return Number.isNaN(n) ? null : n;
84
+ }
85
+
86
+ function parseResetHeader(value) {
87
+ if (value == null) return null;
88
+ const raw = String(value).trim();
89
+ const first = raw.split(',')[0].trim();
90
+ const asNumber = Number(first);
91
+ if (Number.isFinite(asNumber)) {
92
+ // Most reset headers are epoch seconds or delay seconds. Treat small
93
+ // values as delay seconds; large values as epoch seconds.
94
+ return asNumber > 10_000_000_000
95
+ ? asNumber
96
+ : asNumber > 1_000_000_000
97
+ ? asNumber * 1000
98
+ : Date.now() + asNumber * 1000;
99
+ }
100
+ const asDate = Date.parse(raw);
101
+ return Number.isNaN(asDate) ? null : asDate;
102
+ }
103
+
104
+ export class AccountManager {
105
+ constructor(accounts, switchThreshold = 0.90, schedulerOptions = {}, dependencies = {}) {
106
+ this.scheduler = { ...DEFAULT_SCHEDULER, ...schedulerOptions };
107
+ this._refreshAccessToken = dependencies.refreshAccessToken || refreshAccessToken;
108
+ this.accounts = accounts.map((acct, index) => ({
109
+ index,
110
+ name: acct.name,
111
+ type: acct.type,
112
+ provider: acct.provider || (acct.type === 'provider' ? 'provider' : 'anthropic'),
113
+ accountUuid: acct.accountUuid || null,
114
+ credential: acct.accessToken || acct.authToken || acct.apiKey,
115
+ upstream: acct.upstream || null,
116
+ authHeader: acct.authHeader || null,
117
+ profiles: acct.profiles || (acct.type === 'provider' ? ['all'] : ['claude', 'all']),
118
+ priority: Number.isFinite(acct.priority) ? acct.priority : 0,
119
+ model: acct.model || null,
120
+ modelMap: acct.modelMap || null,
121
+ stripBetaHeaders: Boolean(acct.stripBetaHeaders),
122
+ runtime: Boolean(acct.runtime),
123
+ enabled: acct.enabled !== false,
124
+ refreshToken: acct.refreshToken || null,
125
+ expiresAt: acct.expiresAt || null,
126
+ status: 'active',
127
+ // No quota is known at startup, so start probing: the first response for
128
+ // an account reveals its weekly limit and triggers re-evaluation.
129
+ probing: true,
130
+ quota: emptyQuota(),
131
+ usage: {
132
+ totalInputTokens: 0,
133
+ totalOutputTokens: 0,
134
+ totalRequests: 0,
135
+ lastUsed: null,
136
+ },
137
+ inFlight: 0,
138
+ activeWeight: 0,
139
+ completedRequests: 0,
140
+ failedRequests: 0,
141
+ loadEvents: [],
142
+ consecutiveFailures: 0,
143
+ lastStatus: null,
144
+ lastResponseMs: null,
145
+ lastAcceptedAt: null,
146
+ lastError: null,
147
+ lastErrorAt: null,
148
+ cooldownUntil: null,
149
+ provisionalUpstreamFingerprint: null,
150
+ provisionalUpstreamUntil: null,
151
+ rateLimitedUntil: null,
152
+ provisionalRateLimitFingerprint: null,
153
+ recoveredAt: null,
154
+ lastQuotaLogKey: null,
155
+ }));
156
+ this.currentIndex = 0;
157
+ this.nextIndex = 0;
158
+ this.switchThreshold = switchThreshold;
159
+ this.routingMode = 'automatic';
160
+ this.preferredAccountName = null;
161
+ this.sessionBindings = new Map();
162
+ this.sessionPolicies = new Map();
163
+ this.upstreamThrottle = {
164
+ until: null,
165
+ reason: null,
166
+ probeInFlight: false,
167
+ count: 0,
168
+ lastAt: null,
169
+ };
170
+ this.ambiguousRateLimits = new Map();
171
+ this.queueState = {
172
+ nextId: 1,
173
+ waiting: [],
174
+ lastAdmissionAt: 0,
175
+ rampUntil: 0,
176
+ };
177
+ this.admissionPaused = false;
178
+ }
179
+
180
+ /**
181
+ * Get the best available account, rotating if the current one is near quota.
182
+ * Returns null if all accounts are exhausted.
183
+ */
184
+ getActiveAccount(requestInfo = {}, excludedIndexes = new Set()) {
185
+ this.refreshExpiredQuotas();
186
+ return this._selectNext(requestInfo, excludedIndexes);
187
+ }
188
+
189
+ nextRetryForRequest(requestInfo = {}, excludedIndexes = new Set()) {
190
+ this.refreshExpiredQuotas();
191
+ const upstreamRetry = this._upstreamThrottleRetry();
192
+ if (upstreamRetry && !this._hasAvailableProvider(requestInfo, excludedIndexes)) {
193
+ return upstreamRetry;
194
+ }
195
+
196
+ const profile = requestInfo.profile || 'claude';
197
+ let soonestTemporary = Infinity;
198
+ let temporaryCause = null;
199
+ let soonestWeekly = Infinity;
200
+ let matchingRoutes = 0;
201
+ const reasons = {};
202
+
203
+ const note = reason => {
204
+ reasons[reason] = (reasons[reason] || 0) + 1;
205
+ };
206
+
207
+ for (const account of this.accounts) {
208
+ if (excludedIndexes.has(account.index)) continue;
209
+ if (!this._matchesRequest(account, profile, requestInfo)) {
210
+ if (account.type === 'provider' && this._requiresAnthropicThinkingIntegrity(requestInfo)) {
211
+ note('provider_fallback_disabled_signed_thinking');
212
+ }
213
+ continue;
214
+ }
215
+
216
+ matchingRoutes++;
217
+ if (this._isAvailable(account, { allowWeeklyReserve: true })) {
218
+ return {
219
+ available: true,
220
+ retryAfterMs: 0,
221
+ cause: 'available',
222
+ reasons,
223
+ matchingRoutes,
224
+ };
225
+ }
226
+
227
+ const retry = this._retryInfo(account);
228
+ note(retry.cause);
229
+ if (retry.cause === 'weekly_critical' && this._isAvailable(account, { allowWeeklyReserve: true, allowWeeklyCritical: true })) {
230
+ return {
231
+ available: true,
232
+ retryAfterMs: 0,
233
+ cause: 'weekly_critical_last_resort',
234
+ reasons,
235
+ matchingRoutes,
236
+ };
237
+ }
238
+ if (retry.queueable && retry.retryAt) {
239
+ const ms = retry.retryAt - Date.now();
240
+ if (ms < soonestTemporary) {
241
+ soonestTemporary = ms;
242
+ temporaryCause = retry.cause;
243
+ }
244
+ } else if (retry.cause === 'weekly_exhausted' && retry.retryAt) {
245
+ const ms = retry.retryAt - Date.now();
246
+ if (ms < soonestWeekly) soonestWeekly = ms;
247
+ }
248
+ }
249
+
250
+ if (Number.isFinite(soonestTemporary)) {
251
+ return {
252
+ available: false,
253
+ retryAfterMs: Math.max(0, soonestTemporary),
254
+ cause: temporaryCause || 'temporary_unavailable',
255
+ reasons,
256
+ matchingRoutes,
257
+ };
258
+ }
259
+
260
+ if (Number.isFinite(soonestWeekly)) {
261
+ return {
262
+ available: false,
263
+ retryAfterMs: Math.max(0, soonestWeekly),
264
+ cause: 'weekly_exhausted',
265
+ reasons,
266
+ matchingRoutes,
267
+ };
268
+ }
269
+
270
+ return {
271
+ available: false,
272
+ retryAfterMs: Infinity,
273
+ cause: matchingRoutes ? 'unavailable' : 'no_eligible_route',
274
+ reasons,
275
+ matchingRoutes,
276
+ };
277
+ }
278
+
279
+ hasAvailableRoute(requestInfo = {}, excludedIndexes = new Set()) {
280
+ this.refreshExpiredQuotas();
281
+ const profile = requestInfo.profile || 'claude';
282
+ const hasBinding = Boolean(requestInfo.sessionKey && this.sessionBindings.has(requestInfo.sessionKey));
283
+ const weeklyPasses = hasBinding
284
+ ? [
285
+ { allowWeeklyReserve: true, allowWeeklyCritical: false },
286
+ { allowWeeklyReserve: true, allowWeeklyCritical: true },
287
+ ]
288
+ : [
289
+ { allowWeeklyReserve: false, allowWeeklyCritical: false },
290
+ { allowWeeklyReserve: true, allowWeeklyCritical: false },
291
+ { allowWeeklyReserve: true, allowWeeklyCritical: true },
292
+ ];
293
+
294
+ return weeklyPasses.some(options => this.accounts.some(account => {
295
+ if (excludedIndexes.has(account.index)) return false;
296
+ if (!this._matchesRequest(account, profile, requestInfo)) return false;
297
+ return this._isAvailable(account, options);
298
+ }));
299
+ }
300
+
301
+ acquireAccount(requestInfo = {}, excludedIndexes = new Set()) {
302
+ this._noteRequestPolicy(requestInfo);
303
+ const account = this.getActiveAccount(requestInfo, excludedIndexes);
304
+ if (!account) return null;
305
+
306
+ const weight = Math.max(1, Number(requestInfo.weight) || 1);
307
+ const upstreamThrottleProbe = account.type !== 'provider' && this._claimUpstreamThrottleProbe();
308
+ if (requestInfo.sessionKey) {
309
+ this._bindSession(requestInfo.sessionKey, account);
310
+ }
311
+ account.inFlight++;
312
+ account.activeWeight += weight;
313
+ account.lastUsedAt = Date.now();
314
+ return { account, weight, startedAt: Date.now(), upstreamThrottleProbe };
315
+ }
316
+
317
+ releaseAccount(lease, outcome = {}) {
318
+ if (!lease?.account) return;
319
+ const account = lease.account;
320
+
321
+ account.inFlight = Math.max(0, account.inFlight - 1);
322
+ account.activeWeight = Math.max(0, account.activeWeight - lease.weight);
323
+
324
+ if (lease.upstreamThrottleProbe) {
325
+ if (outcome.success) {
326
+ this.clearUpstreamThrottle('successful recovery probe');
327
+ } else if (!outcome.upstreamThrottled) {
328
+ this.deferUpstreamThrottleProbe(5, outcome.error || `HTTP ${outcome.status || 'failure'}`);
329
+ }
330
+ }
331
+
332
+ if (outcome.neutral) return;
333
+
334
+ if (outcome.success) {
335
+ account.completedRequests++;
336
+ account.consecutiveFailures = 0;
337
+ account.lastStatus = outcome.status || account.lastStatus;
338
+ account.lastResponseMs = Date.now() - lease.startedAt;
339
+ if (account.provisionalUpstreamFingerprint) {
340
+ account.provisionalUpstreamUntil = null;
341
+ account.provisionalUpstreamFingerprint = null;
342
+ }
343
+ if (account.status !== 'throttled' || account.lastError !== 'rate_limited') {
344
+ account.lastError = null;
345
+ account.lastErrorAt = null;
346
+ }
347
+ this._recordLoadEvent(account, lease, { ...outcome, success: true });
348
+ account.lastSuccessAt = Date.now();
349
+ return;
350
+ }
351
+
352
+ if (outcome.error || outcome.status) {
353
+ account.failedRequests++;
354
+ account.consecutiveFailures++;
355
+ account.lastStatus = outcome.status || account.lastStatus;
356
+ account.lastResponseMs = Date.now() - lease.startedAt;
357
+ this._recordLoadEvent(account, lease, outcome);
358
+ account.lastError = outcome.error || `HTTP ${outcome.status}`;
359
+ account.lastErrorAt = Date.now();
360
+ }
361
+ }
362
+
363
+ _recordLoadEvent(account, lease, outcome = {}) {
364
+ const now = Date.now();
365
+ account.loadEvents ||= [];
366
+ account.loadEvents.push({
367
+ at: now,
368
+ durationMs: Math.max(0, now - lease.startedAt),
369
+ weight: Math.max(1, lease.weight || 1),
370
+ success: Boolean(outcome.success),
371
+ status: outcome.status || null,
372
+ });
373
+ this._pruneLoadEvents(account, now);
374
+ }
375
+
376
+ _pruneLoadEvents(account, now = Date.now()) {
377
+ if (!account.loadEvents?.length) return;
378
+ const cutoff = now - LOAD_EVENT_MAX_AGE_MS;
379
+ while (account.loadEvents.length && account.loadEvents[0].at < cutoff) {
380
+ account.loadEvents.shift();
381
+ }
382
+ }
383
+
384
+ _loadSummary(account, windowMs, now = Date.now()) {
385
+ this._pruneLoadEvents(account, now);
386
+ const since = now - windowMs;
387
+ const events = (account.loadEvents || []).filter(e => e.at >= since);
388
+ const requests = events.length;
389
+ const failed = events.filter(e => !e.success).length;
390
+ const weight = events.reduce((sum, e) => sum + (e.weight || 1), 0);
391
+ const durationMs = events.reduce((sum, e) => sum + (e.durationMs || 0), 0);
392
+ return {
393
+ requests,
394
+ failed,
395
+ weight,
396
+ avgMs: requests ? Math.round(durationMs / requests) : null,
397
+ };
398
+ }
399
+
400
+ _isAvailable(account, options = {}) {
401
+ if (!account) return false;
402
+ if (!account.enabled) return false;
403
+ const now = Date.now();
404
+
405
+ // Check rate limit expiry
406
+ if (account.status === 'throttled' && account.rateLimitedUntil) {
407
+ if (now < account.rateLimitedUntil) return false;
408
+ account.status = 'active';
409
+ account.rateLimitedUntil = null;
410
+ account.recoveredAt = now;
411
+ if (account.lastError === 'rate_limited') {
412
+ account.lastError = null;
413
+ account.lastErrorAt = null;
414
+ account.provisionalRateLimitFingerprint = null;
415
+ }
416
+ console.log(`[Maxpool] Account "${account.name}" rate limit expired, marking active`);
417
+ }
418
+
419
+ if (account.cooldownUntil) {
420
+ if (now < account.cooldownUntil) return false;
421
+ account.cooldownUntil = null;
422
+ account.recoveredAt = now;
423
+ }
424
+
425
+ if (account.provisionalUpstreamUntil) {
426
+ if (now < account.provisionalUpstreamUntil) return false;
427
+ account.provisionalUpstreamUntil = null;
428
+ account.provisionalUpstreamFingerprint = null;
429
+ account.recoveredAt = now;
430
+ if (account.lastError === 'upstream_throttled') {
431
+ account.lastError = null;
432
+ account.lastErrorAt = null;
433
+ }
434
+ }
435
+
436
+ if (account.inFlight >= this.scheduler.safetyMaxActivePerAccount) return false;
437
+ if (this.getGlobalInFlight() >= this.scheduler.safetyMaxGlobalActive) return false;
438
+ if (account.status === 'exhausted' || account.status === 'error') return false;
439
+ if (this._isSessionQuotaUnavailable(account)) return false;
440
+ const weeklyState = this._weeklyState(account);
441
+ if (weeklyState === 'exhausted') return false;
442
+ if (weeklyState === 'critical' && !options.allowWeeklyCritical) return false;
443
+ if (weeklyState === 'reserve' && !options.allowWeeklyReserve) return false;
444
+
445
+ return true;
446
+ }
447
+
448
+ getGlobalInFlight() {
449
+ return this.accounts.reduce((sum, account) => sum + account.inFlight, 0);
450
+ }
451
+
452
+ setAdmissionPaused(paused) {
453
+ this.admissionPaused = Boolean(paused);
454
+ }
455
+
456
+ markUpstreamThrottled(retryAfterSeconds, reason = 'temporary_server_limit') {
457
+ const retryAfter = clampRetryAfterSeconds(retryAfterSeconds);
458
+ const until = Date.now() + retryAfter * 1000;
459
+ this.upstreamThrottle.until = Math.max(this.upstreamThrottle.until || 0, until);
460
+ this.upstreamThrottle.reason = reason;
461
+ this.upstreamThrottle.probeInFlight = false;
462
+ this.upstreamThrottle.count++;
463
+ this.upstreamThrottle.lastAt = Date.now();
464
+ console.log(`[Maxpool] Anthropic upstream temporarily limiting requests for ${retryAfter}s; pausing Claude routes`);
465
+ }
466
+
467
+ clearUpstreamThrottle(reason = 'recovered') {
468
+ if (!this.upstreamThrottle.until && !this.upstreamThrottle.probeInFlight) return;
469
+ this.upstreamThrottle.until = null;
470
+ this.upstreamThrottle.reason = null;
471
+ this.upstreamThrottle.probeInFlight = false;
472
+ this.queueState.rampUntil = Date.now() + 5000;
473
+ this.queueState.lastAdmissionAt = Date.now();
474
+ console.log(`[Maxpool] Anthropic upstream throttle cleared (${reason})`);
475
+ }
476
+
477
+ confirmUpstreamProbe(lease) {
478
+ if (!lease?.upstreamThrottleProbe) return;
479
+ this.clearUpstreamThrottle('Anthropic accepted recovery probe');
480
+ lease.upstreamThrottleProbe = false;
481
+ }
482
+
483
+ deferUpstreamThrottleProbe(retryAfterSeconds = 5, reason = 'probe_failed') {
484
+ if (!this.upstreamThrottle.until && !this.upstreamThrottle.probeInFlight) return;
485
+ const retryAfter = clampRetryAfterSeconds(retryAfterSeconds);
486
+ this.upstreamThrottle.until = Date.now() + retryAfter * 1000;
487
+ this.upstreamThrottle.reason = reason;
488
+ this.upstreamThrottle.probeInFlight = false;
489
+ this.upstreamThrottle.lastAt = Date.now();
490
+ console.log(`[Maxpool] Anthropic recovery probe failed; retrying in ${retryAfter}s (${reason})`);
491
+ }
492
+
493
+ noteAmbiguousRateLimit(accountIndex, fingerprint, retryAfterSeconds) {
494
+ if (!fingerprint) return false;
495
+ const now = Date.now();
496
+ const windowMs = 30_000;
497
+ for (const [key, incident] of this.ambiguousRateLimits) {
498
+ if (now - incident.lastAt > windowMs) this.ambiguousRateLimits.delete(key);
499
+ }
500
+
501
+ const incident = this.ambiguousRateLimits.get(fingerprint) || {
502
+ accounts: new Set(),
503
+ firstAt: now,
504
+ lastAt: now,
505
+ };
506
+ incident.accounts.add(accountIndex);
507
+ incident.lastAt = now;
508
+ this.ambiguousRateLimits.set(fingerprint, incident);
509
+ if (incident.accounts.size < 2) return false;
510
+
511
+ for (const index of incident.accounts) {
512
+ const account = this.accounts[index];
513
+ if (
514
+ !account
515
+ || account.lastError !== 'rate_limited'
516
+ || account.provisionalRateLimitFingerprint !== fingerprint
517
+ ) continue;
518
+ account.status = 'active';
519
+ account.rateLimitedUntil = null;
520
+ account.lastError = null;
521
+ account.lastErrorAt = null;
522
+ account.provisionalRateLimitFingerprint = null;
523
+ }
524
+ this.ambiguousRateLimits.delete(fingerprint);
525
+ return true;
526
+ }
527
+
528
+ _isUpstreamThrottleBlocking() {
529
+ const throttle = this.upstreamThrottle;
530
+ if (!throttle.until) return false;
531
+ if (Date.now() < throttle.until) return true;
532
+ return throttle.probeInFlight;
533
+ }
534
+
535
+ _claimUpstreamThrottleProbe() {
536
+ const throttle = this.upstreamThrottle;
537
+ if (!throttle.until || Date.now() < throttle.until || throttle.probeInFlight) return false;
538
+ throttle.probeInFlight = true;
539
+ console.log('[Maxpool] Anthropic upstream throttle window expired; sending one recovery probe');
540
+ return true;
541
+ }
542
+
543
+ _upstreamThrottleRetry() {
544
+ const throttle = this.upstreamThrottle;
545
+ if (!throttle.until) return null;
546
+ const now = Date.now();
547
+ if (now < throttle.until) {
548
+ return {
549
+ available: false,
550
+ retryAfterMs: throttle.until - now,
551
+ cause: 'upstream_throttle',
552
+ reasons: { upstream_throttle: 1 },
553
+ matchingRoutes: this.accounts.filter(a => a.type !== 'provider').length,
554
+ };
555
+ }
556
+ if (throttle.probeInFlight) {
557
+ return {
558
+ available: false,
559
+ retryAfterMs: 1000,
560
+ cause: 'upstream_probe',
561
+ reasons: { upstream_probe: 1 },
562
+ matchingRoutes: this.accounts.filter(a => a.type !== 'provider').length,
563
+ };
564
+ }
565
+ return null;
566
+ }
567
+
568
+ _hasAvailableProvider(requestInfo = {}, excludedIndexes = new Set()) {
569
+ const profile = requestInfo.profile || 'claude';
570
+ return this.accounts.some(account => {
571
+ if (account.type !== 'provider' || excludedIndexes.has(account.index)) return false;
572
+ if (!this._matchesRequest(account, profile, requestInfo)) return false;
573
+ return this._isAvailable(account, { allowWeeklyReserve: true, allowWeeklyCritical: true });
574
+ });
575
+ }
576
+
577
+ registerQueuedRequest(requestInfo = {}) {
578
+ if (requestInfo.queueTicket) return requestInfo.queueTicket;
579
+ const ticket = {
580
+ id: this.queueState.nextId++,
581
+ queuedAt: Date.now(),
582
+ };
583
+ this.queueState.waiting.push(ticket);
584
+ requestInfo.queueTicket = ticket;
585
+ return ticket;
586
+ }
587
+
588
+ canAdmitQueuedRequest(requestInfo = {}) {
589
+ const ticket = requestInfo.queueTicket;
590
+ if (!ticket) return true;
591
+ if (this.queueState.waiting[0]?.id !== ticket.id) return false;
592
+ const now = Date.now();
593
+ if (now < this.queueState.rampUntil && now - this.queueState.lastAdmissionAt < 250) return false;
594
+ this.queueState.waiting.shift();
595
+ this.queueState.lastAdmissionAt = now;
596
+ requestInfo.queueTicket = null;
597
+ requestInfo.queueAdmitted = true;
598
+ return true;
599
+ }
600
+
601
+ removeQueuedRequest(requestInfo = {}) {
602
+ const ticket = requestInfo.queueTicket;
603
+ if (!ticket) return;
604
+ const index = this.queueState.waiting.findIndex(entry => entry.id === ticket.id);
605
+ if (index >= 0) this.queueState.waiting.splice(index, 1);
606
+ requestInfo.queueTicket = null;
607
+ }
608
+
609
+ /**
610
+ * Clear any quota counters whose reset time has passed. Cheap and safe to
611
+ * call frequently (e.g. from the TUI render loop) — once a counter is cleared
612
+ * it stays null until the next upstream response repopulates it, so the
613
+ * "reset" log fires at most once per window.
614
+ * @returns {{changed: boolean, session: boolean}} what was cleared.
615
+ */
616
+ _clearExpiredQuotas(account) {
617
+ const q = account.quota;
618
+ const now = Date.now();
619
+ let changed = false;
620
+ let session = false;
621
+
622
+ // Clear expired unified quotas
623
+ if (q.unified5h != null && q.unified5hReset && now >= q.unified5hReset) {
624
+ console.log(`[Maxpool] Account "${account.name}" session quota reset`);
625
+ q.unified5h = null;
626
+ q.unified5hReset = null;
627
+ changed = true;
628
+ session = true;
629
+ }
630
+ if (q.unified7d != null && q.unified7dReset && now >= q.unified7dReset) {
631
+ console.log(`[Maxpool] Account "${account.name}" weekly quota reset`);
632
+ q.unified7d = null;
633
+ q.unified7dReset = null;
634
+ q.unifiedStatus = null;
635
+ changed = true;
636
+ }
637
+
638
+ // Clear expired standard quotas
639
+ if (q.resetsAt && now >= new Date(q.resetsAt).getTime()) {
640
+ q.tokensRemaining = null;
641
+ q.tokensLimit = null;
642
+ q.requestsRemaining = null;
643
+ q.requestsLimit = null;
644
+ q.resetsAt = null;
645
+ changed = true;
646
+ }
647
+
648
+ return { changed, session };
649
+ }
650
+
651
+ /**
652
+ * Clear expired quotas across all accounts. Called from the display loop and
653
+ * the request path so a window expiry (e.g. the 5-hour session quota) resets
654
+ * the view instantly rather than waiting for the next request.
655
+ *
656
+ * When an account's session quota resets, it may have become the better
657
+ * choice — switch to it if its weekly limit expires sooner than the current
658
+ * account's (and it still has weekly quota), so we spend the quota closest to
659
+ * refreshing first.
660
+ */
661
+ refreshExpiredQuotas() {
662
+ let changed = false;
663
+ const sessionReset = [];
664
+ for (const account of this.accounts) {
665
+ const r = this._clearExpiredQuotas(account);
666
+ if (r.changed) changed = true;
667
+ if (r.session) sessionReset.push(account);
668
+ }
669
+ if (sessionReset.length) this._switchOnSessionReset(sessionReset);
670
+ return changed;
671
+ }
672
+
673
+ /**
674
+ * Given accounts whose session quota just reset, switch to the one whose
675
+ * weekly limit expires soonest — but only if that is sooner than the current
676
+ * account's weekly limit and the account still has weekly quota to spend.
677
+ */
678
+ _switchOnSessionReset(candidates) {
679
+ const current = this.accounts[this.currentIndex];
680
+ // Need a known weekly reset on the current account to compare against;
681
+ // if it is unknown we are still probing it, so leave it alone.
682
+ if (!current || current.quota.unified7dReset == null) return;
683
+
684
+ let best = null;
685
+ let bestWeekly = current.quota.unified7dReset;
686
+ for (const acc of candidates) {
687
+ if (acc.index === this.currentIndex) continue;
688
+ if (!this._isAvailable(acc, { allowWeeklyReserve: true })) continue; // enough session & weekly quota left
689
+ const weekly = acc.quota.unified7dReset;
690
+ if (weekly == null) continue; // need a known weekly to compare
691
+ if (weekly < bestWeekly) {
692
+ bestWeekly = weekly;
693
+ best = acc;
694
+ }
695
+ }
696
+
697
+ if (best) {
698
+ this.currentIndex = best.index;
699
+ console.log(`[Maxpool] Account "${best.name}" session quota reset and weekly expires sooner — switching to it`);
700
+ }
701
+ }
702
+
703
+ _isSessionQuotaUnavailable(account) {
704
+ const q = account.quota;
705
+ this._clearExpiredQuotas(account);
706
+
707
+ // Unified 5h quota is immediate availability. Weekly quota is handled
708
+ // separately as long-horizon admission control.
709
+ if (q.unified5h != null && q.unified5h >= this.switchThreshold) return true;
710
+
711
+ // Standard quotas (API key accounts)
712
+ if (q.tokensLimit != null && q.tokensRemaining != null) {
713
+ const used = 1 - (q.tokensRemaining / q.tokensLimit);
714
+ if (used >= this.switchThreshold) return true;
715
+ }
716
+
717
+ if (q.requestsLimit != null && q.requestsRemaining != null) {
718
+ const used = 1 - (q.requestsRemaining / q.requestsLimit);
719
+ if (used >= this.switchThreshold) return true;
720
+ }
721
+
722
+ return false;
723
+ }
724
+
725
+ _isNearQuota(account) {
726
+ return this._isSessionQuotaUnavailable(account)
727
+ || ['reserve', 'critical', 'exhausted'].includes(this._weeklyState(account));
728
+ }
729
+
730
+ _retryInfo(account) {
731
+ const now = Date.now();
732
+ const q = account.quota || {};
733
+ const weeklyState = this._weeklyState(account);
734
+ if (weeklyState === 'critical') {
735
+ return { cause: 'weekly_critical', retryAt: q.unified7dReset || null, queueable: false };
736
+ }
737
+
738
+ if (weeklyState === 'exhausted') {
739
+ return { cause: 'weekly_exhausted', retryAt: q.unified7dReset || null, queueable: false };
740
+ }
741
+
742
+ if (account.status === 'throttled' && account.rateLimitedUntil && now < account.rateLimitedUntil) {
743
+ return { cause: 'rate_limited', retryAt: account.rateLimitedUntil, queueable: true };
744
+ }
745
+
746
+ if (account.cooldownUntil && now < account.cooldownUntil) {
747
+ return { cause: 'cooldown', retryAt: account.cooldownUntil, queueable: true };
748
+ }
749
+
750
+ if (account.provisionalUpstreamUntil && now < account.provisionalUpstreamUntil) {
751
+ return { cause: 'upstream_failure', retryAt: account.provisionalUpstreamUntil, queueable: true };
752
+ }
753
+
754
+ if (q.unified5h != null && q.unified5h >= this.switchThreshold) {
755
+ return { cause: 'session_limit', retryAt: q.unified5hReset || null, queueable: Boolean(q.unified5hReset) };
756
+ }
757
+
758
+ if (q.tokensLimit != null && q.tokensRemaining != null && q.tokensLimit > 0) {
759
+ const used = 1 - q.tokensRemaining / q.tokensLimit;
760
+ if (used >= this.switchThreshold) {
761
+ const retryAt = q.resetsAt ? new Date(q.resetsAt).getTime() : null;
762
+ return { cause: 'token_limit', retryAt, queueable: Boolean(retryAt) };
763
+ }
764
+ }
765
+
766
+ if (q.requestsLimit != null && q.requestsRemaining != null && q.requestsLimit > 0) {
767
+ const used = 1 - q.requestsRemaining / q.requestsLimit;
768
+ if (used >= this.switchThreshold) {
769
+ const retryAt = q.resetsAt ? new Date(q.resetsAt).getTime() : null;
770
+ return { cause: 'request_limit', retryAt, queueable: Boolean(retryAt) };
771
+ }
772
+ }
773
+
774
+ if (q.genericLimit != null && q.genericRemaining != null && q.genericRemaining <= 0) {
775
+ return { cause: 'provider_limit', retryAt: q.genericReset || null, queueable: Boolean(q.genericReset) };
776
+ }
777
+
778
+ if (!account.enabled) return { cause: 'disabled', retryAt: null, queueable: false };
779
+ if (account.status === 'error') return { cause: 'error', retryAt: null, queueable: false };
780
+ if (account.status === 'exhausted') return { cause: 'exhausted', retryAt: null, queueable: false };
781
+ return { cause: 'unavailable', retryAt: null, queueable: false };
782
+ }
783
+
784
+ _selectNext(requestInfo = {}, excludedIndexes = new Set()) {
785
+ // Adaptive least-loaded balancing: spread requests across every healthy
786
+ // account immediately, and let live load, quota pressure, and recent errors
787
+ // push traffic away from weaker accounts.
788
+ let best = null;
789
+ let bestScore = Infinity;
790
+ let bestPriority = Infinity;
791
+ const profile = requestInfo.profile || 'claude';
792
+ const scoringCtx = this._scoringContext();
793
+
794
+ const hasBinding = Boolean(requestInfo.sessionKey && this.sessionBindings.has(requestInfo.sessionKey));
795
+ const preferred = this._preferredAccount(profile, excludedIndexes, requestInfo);
796
+ if (preferred) {
797
+ const preferredPasses = [
798
+ { allowWeeklyReserve: true, allowWeeklyCritical: false },
799
+ { allowWeeklyReserve: true, allowWeeklyCritical: true },
800
+ ];
801
+ if (preferredPasses.some(options => this._isAvailable(preferred, options))) {
802
+ this.currentIndex = preferred.index;
803
+ return preferred;
804
+ }
805
+ }
806
+ const bound = this._boundAccount(requestInfo.sessionKey, profile, excludedIndexes, requestInfo);
807
+ if (bound && !this._hasHigherPriorityAvailable(bound, profile, excludedIndexes, requestInfo)) return bound;
808
+
809
+ const weeklyPasses = hasBinding
810
+ ? [
811
+ { allowWeeklyReserve: true, allowWeeklyCritical: false },
812
+ { allowWeeklyReserve: true, allowWeeklyCritical: true },
813
+ ]
814
+ : [
815
+ { allowWeeklyReserve: false, allowWeeklyCritical: false },
816
+ { allowWeeklyReserve: true, allowWeeklyCritical: false },
817
+ { allowWeeklyReserve: true, allowWeeklyCritical: true },
818
+ ];
819
+
820
+ for (const weeklyOptions of weeklyPasses) {
821
+ best = null;
822
+ bestScore = Infinity;
823
+ bestPriority = Infinity;
824
+
825
+ for (let i = 0; i < this.accounts.length; i++) {
826
+ const idx = (this.nextIndex + i) % this.accounts.length;
827
+ const account = this.accounts[idx];
828
+ if (excludedIndexes.has(account.index)) continue;
829
+ if (!this._matchesRequest(account, profile, requestInfo)) continue;
830
+ if (!this._isAvailable(account, weeklyOptions)) continue;
831
+
832
+ const priority = Number.isFinite(account.priority) ? account.priority : 0;
833
+ const score = this._scoreAccount(account, requestInfo, scoringCtx);
834
+ if (priority < bestPriority || (priority === bestPriority && score < bestScore)) {
835
+ bestPriority = priority;
836
+ bestScore = score;
837
+ best = account;
838
+ }
839
+ }
840
+
841
+ if (best) {
842
+ const switched = best.index !== this.currentIndex;
843
+ this.currentIndex = best.index;
844
+ this.nextIndex = (best.index + 1) % this.accounts.length;
845
+ // If we switched to an account whose weekly quota is still unknown, flag
846
+ // it so we re-evaluate once that quota is learned (see updateQuota).
847
+ best.probing = best.quota.unified7dReset == null;
848
+ if (switched) {
849
+ console.log(`[Maxpool] Switched to account "${best.name}"`);
850
+ }
851
+ return best;
852
+ }
853
+ }
854
+
855
+ // All accounts unavailable — find the one that resets soonest
856
+ let soonestAccount = null;
857
+ let soonestTime = Infinity;
858
+
859
+ for (const account of this.accounts) {
860
+ if (!this._matchesRequest(account, profile, requestInfo)) continue;
861
+ if (!account.enabled) continue;
862
+ const resetTime = account.rateLimitedUntil
863
+ || account.quota.unified5hReset
864
+ || account.quota.unified7dReset
865
+ || (account.quota.resetsAt ? new Date(account.quota.resetsAt).getTime() : null);
866
+
867
+ if (resetTime && resetTime < soonestTime) {
868
+ soonestTime = resetTime;
869
+ soonestAccount = account;
870
+ }
871
+ }
872
+
873
+ if (soonestAccount && soonestTime <= Date.now()) {
874
+ soonestAccount.status = 'active';
875
+ soonestAccount.rateLimitedUntil = null;
876
+ this.currentIndex = soonestAccount.index;
877
+ console.log(`[Maxpool] Account "${soonestAccount.name}" reset, switching to it`);
878
+ return soonestAccount;
879
+ }
880
+
881
+ return null;
882
+ }
883
+
884
+ _boundAccount(sessionKey, profile, excludedIndexes = new Set(), requestInfo = {}) {
885
+ if (!sessionKey) return null;
886
+ const binding = this._sessionBinding(sessionKey);
887
+ if (!binding) return null;
888
+
889
+ const home = this._eligibleBoundAccount(binding.homeName, profile, excludedIndexes, { allowWeeklyReserve: true }, requestInfo);
890
+ if (home) return home;
891
+
892
+ const current = this._eligibleBoundAccount(binding.currentName, profile, excludedIndexes, { allowWeeklyReserve: true }, requestInfo);
893
+ if (current) return current;
894
+
895
+ const homeExists = binding.homeName && this.accounts.some(a => a.name === binding.homeName);
896
+ const currentExists = binding.currentName && this.accounts.some(a => a.name === binding.currentName);
897
+ if (!homeExists && !currentExists) {
898
+ this.sessionBindings.delete(sessionKey);
899
+ }
900
+ return null;
901
+ }
902
+
903
+ _eligibleBoundAccount(accountName, profile, excludedIndexes = new Set(), options = {}, requestInfo = {}) {
904
+ if (!accountName) return null;
905
+ const account = this.accounts.find(a => a.name === accountName);
906
+ if (!account) return null;
907
+ if (excludedIndexes.has(account.index)) return null;
908
+ if (!this._matchesRequest(account, profile, requestInfo)) return null;
909
+ if (!this._isAvailable(account, options)) return null;
910
+ return account;
911
+ }
912
+
913
+ _bindSession(sessionKey, account) {
914
+ const priority = this._priority(account);
915
+ const binding = this._sessionBinding(sessionKey) || {
916
+ homeName: account.name,
917
+ homePriority: priority,
918
+ currentName: account.name,
919
+ };
920
+
921
+ if (!binding.homeName || priority < binding.homePriority) {
922
+ binding.homeName = account.name;
923
+ binding.homePriority = priority;
924
+ }
925
+ binding.currentName = account.name;
926
+ this.sessionBindings.set(sessionKey, binding);
927
+ }
928
+
929
+ _sessionBinding(sessionKey) {
930
+ const binding = this.sessionBindings.get(sessionKey);
931
+ if (!binding) return null;
932
+ if (typeof binding === 'string') {
933
+ const account = this.accounts.find(a => a.name === binding);
934
+ const normalized = {
935
+ homeName: binding,
936
+ homePriority: account ? this._priority(account) : Infinity,
937
+ currentName: binding,
938
+ };
939
+ this.sessionBindings.set(sessionKey, normalized);
940
+ return normalized;
941
+ }
942
+ return binding;
943
+ }
944
+
945
+ _hasHigherPriorityAvailable(boundAccount, profile, excludedIndexes = new Set(), requestInfo = {}) {
946
+ const boundPriority = this._priority(boundAccount);
947
+ return this.accounts.some(account => {
948
+ if (account.index === boundAccount.index) return false;
949
+ if (excludedIndexes.has(account.index)) return false;
950
+ if (!this._matchesRequest(account, profile, requestInfo)) return false;
951
+ const priority = this._priority(account);
952
+ return priority < boundPriority && this._isAvailable(account, { allowWeeklyReserve: true });
953
+ });
954
+ }
955
+
956
+ _priority(account) {
957
+ return Number.isFinite(account?.priority) ? account.priority : 0;
958
+ }
959
+
960
+ _preferredAccount(profile, excludedIndexes = new Set(), requestInfo = {}) {
961
+ if (this.routingMode !== 'preferred' || !this.preferredAccountName) return null;
962
+ const account = this.accounts.find(candidate => candidate.name === this.preferredAccountName);
963
+ if (!account || excludedIndexes.has(account.index)) return null;
964
+ if (!this._matchesRequest(account, profile, requestInfo)) return null;
965
+ return account;
966
+ }
967
+
968
+ setRoutingMode(mode, preferredAccount = null) {
969
+ if (mode !== 'preferred') {
970
+ this.routingMode = 'automatic';
971
+ this.preferredAccountName = null;
972
+ return true;
973
+ }
974
+ const account = this.accounts.find(candidate => candidate.name === preferredAccount);
975
+ if (!account || account.type === 'provider' || !account.enabled) {
976
+ this.routingMode = 'automatic';
977
+ this.preferredAccountName = null;
978
+ return false;
979
+ }
980
+ this.routingMode = 'preferred';
981
+ this.preferredAccountName = account.name;
982
+ this.currentIndex = account.index;
983
+ return true;
984
+ }
985
+
986
+ setAccountEnabled(index, enabled) {
987
+ const account = this.accounts[index];
988
+ if (!account) return false;
989
+ account.enabled = Boolean(enabled);
990
+ if (!enabled && account.name === this.preferredAccountName) {
991
+ this.setRoutingMode('automatic');
992
+ }
993
+ return true;
994
+ }
995
+
996
+ _matchesProfile(account, profile) {
997
+ const profiles = account.profiles || ['claude', 'all'];
998
+ return profiles.includes(profile);
999
+ }
1000
+
1001
+ _matchesRequest(account, profile, requestInfo = {}) {
1002
+ if (this.admissionPaused) return false;
1003
+ if (!this._isRequestCompatible(account, profile, requestInfo)) return false;
1004
+ if (
1005
+ account.type !== 'provider'
1006
+ && this.queueState.waiting.length
1007
+ && !requestInfo.queueTicket
1008
+ && !requestInfo.queueAdmitted
1009
+ ) return false;
1010
+ if (account.type !== 'provider' && this._isUpstreamThrottleBlocking()) return false;
1011
+ return true;
1012
+ }
1013
+
1014
+ _isRequestCompatible(account, profile, requestInfo = {}) {
1015
+ if (!this._matchesProfile(account, profile)) return false;
1016
+ if (account.type === 'provider' && this._requiresAnthropicThinkingIntegrity(requestInfo)) return false;
1017
+ return true;
1018
+ }
1019
+
1020
+ _noteRequestPolicy(requestInfo = {}) {
1021
+ if (!requestInfo.sessionKey || !requestInfo.requiresAnthropicThinkingIntegrity) return;
1022
+ this.markSessionThinkingProtected(requestInfo.sessionKey, requestInfo.model);
1023
+ }
1024
+
1025
+ markSessionThinkingProtected(sessionKey, model = null) {
1026
+ if (!sessionKey) return;
1027
+ const existing = this.sessionPolicies.get(sessionKey) || {};
1028
+ if (!existing.requiresAnthropicThinkingIntegrity) {
1029
+ console.log(`[Maxpool] Session "${sessionKey}" contains Anthropic signed thinking; provider fallback disabled`);
1030
+ }
1031
+ this.sessionPolicies.set(sessionKey, {
1032
+ ...existing,
1033
+ requiresAnthropicThinkingIntegrity: true,
1034
+ model: existing.model || model || null,
1035
+ });
1036
+ }
1037
+
1038
+ _requiresAnthropicThinkingIntegrity(requestInfo = {}) {
1039
+ if (requestInfo.requiresAnthropicThinkingIntegrity) return true;
1040
+ if (!requestInfo.sessionKey) return false;
1041
+ return Boolean(this.sessionPolicies.get(requestInfo.sessionKey)?.requiresAnthropicThinkingIntegrity);
1042
+ }
1043
+
1044
+ /**
1045
+ * Per-selection context shared across the candidate loop so each account's
1046
+ * recent-load *share* can be computed against the live fleet total exactly
1047
+ * once (rather than O(N) per candidate).
1048
+ */
1049
+ _scoringContext() {
1050
+ const now = Date.now();
1051
+ // Denominator for the recent-load *share* term: the primary OAuth pool we
1052
+ // balance across. Exclude disabled accounts (never selectable) and provider
1053
+ // fallbacks (last-resort, not part of the spread) so a busy provider can't
1054
+ // shrink the share signal for the OAuth accounts.
1055
+ let fleetRecentWeight = 0;
1056
+ for (const account of this.accounts) {
1057
+ if (account.enabled === false || account.type === 'provider') continue;
1058
+ fleetRecentWeight += this._loadSummary(account, this.scheduler.spreadWindowMs, now).weight;
1059
+ }
1060
+ return { now, fleetRecentWeight };
1061
+ }
1062
+
1063
+ /**
1064
+ * Routing cost — lower is preferred. Composed of independent forces rather
1065
+ * than a single quota ratio:
1066
+ * - concurrency: never pile concurrent streams on one account (dominant when busy)
1067
+ * - scarcity: quota *rate* pressure — high only when an account would burn out
1068
+ * before its window resets; ~0 for a near-reset account with quota
1069
+ * left (use-it-or-lose-it), so that account is drained, not avoided
1070
+ * - spread: recent-load share, so sequential traffic rotates off whoever
1071
+ * served last instead of funnelling onto the lowest-quota account
1072
+ * - ramp: ease a just-recovered account back in instead of slamming it
1073
+ * - failures: direct per-account backoff after errors
1074
+ */
1075
+ _scoreAccount(account, requestInfo = {}, ctx = null) {
1076
+ const now = ctx?.now ?? Date.now();
1077
+ const reqWeight = Math.max(1, requestInfo.weight || 1);
1078
+ const concurrency = account.activeWeight + reqWeight;
1079
+ const scarcity = this._accountScarcity(account, now) * this.scheduler.scarcityWeight;
1080
+
1081
+ const fleetRecentWeight = ctx?.fleetRecentWeight ?? 0;
1082
+ const recentWeight = this._loadSummary(account, this.scheduler.spreadWindowMs, now).weight;
1083
+ const share = fleetRecentWeight > 0 ? recentWeight / fleetRecentWeight : 0;
1084
+ const spread = share * this.scheduler.spreadShareWeight;
1085
+
1086
+ const ramp = this._recoveryRamp(account, now);
1087
+ const failurePenalty = account.consecutiveFailures * 5;
1088
+ // Bias toward an account whose weekly quota is still unknown so it gets
1089
+ // probed and learned (matches the legacy unknown-quota exploration nudge).
1090
+ const explorationBonus = account.quota.unified7dReset == null ? -0.5 : 0;
1091
+
1092
+ return concurrency + scarcity + spread + ramp + failurePenalty + explorationBonus;
1093
+ }
1094
+
1095
+ /**
1096
+ * Quota scarcity in [0, 1+]: the worst (max) pace-overage across all known
1097
+ * windows. Pace overage = how far an account's utilization is *ahead of* an
1098
+ * even burn over the window. It is ~0 when a window is about to reset (the
1099
+ * remaining quota is about to refresh, so it is cheap to spend) and grows
1100
+ * toward 1 for an account burning quota fast early in a long window (the
1101
+ * genuinely scarce case). When a reset time is unknown we fall back to raw
1102
+ * utilization (conservative — no time information to discount by).
1103
+ */
1104
+ _accountScarcity(account, now = Date.now()) {
1105
+ const q = account.quota;
1106
+ let scarcity = 0;
1107
+ if (q.unified5h != null) {
1108
+ scarcity = Math.max(scarcity, this._windowScarcity(q.unified5h, q.unified5hReset, FIVE_HOUR_MS, now));
1109
+ }
1110
+ if (q.unified7d != null) {
1111
+ scarcity = Math.max(scarcity, this._windowScarcity(q.unified7d, q.unified7dReset, WEEK_MS, now));
1112
+ }
1113
+ if (q.tokensLimit != null && q.tokensRemaining != null && q.tokensLimit > 0) {
1114
+ scarcity = Math.max(scarcity, 1 - q.tokensRemaining / q.tokensLimit);
1115
+ }
1116
+ if (q.requestsLimit != null && q.requestsRemaining != null && q.requestsLimit > 0) {
1117
+ scarcity = Math.max(scarcity, 1 - q.requestsRemaining / q.requestsLimit);
1118
+ }
1119
+ return scarcity;
1120
+ }
1121
+
1122
+ _windowScarcity(util, resetMs, windowLen, now = Date.now()) {
1123
+ const used = clamp01(util);
1124
+ if (!resetMs || resetMs <= now) return used; // unknown / just-reset → face value
1125
+ const remainingMs = Math.max(0, resetMs - now);
1126
+ const elapsedFrac = clamp01((windowLen - remainingMs) / windowLen);
1127
+ return Math.max(0, used - elapsedFrac);
1128
+ }
1129
+
1130
+ /**
1131
+ * Decaying penalty applied for `recoveryRampMs` after an account un-parks,
1132
+ * so a freshly-recovered account (which has ~0 recent load and may look most
1133
+ * attractive) is eased back in rather than instantly slammed back to a limit.
1134
+ */
1135
+ _recoveryRamp(account, now = Date.now()) {
1136
+ if (!account.recoveredAt) return 0;
1137
+ const age = now - account.recoveredAt;
1138
+ if (age < 0 || age >= this.scheduler.recoveryRampMs) return 0;
1139
+ return this.scheduler.recoveryRampWeight * (1 - age / this.scheduler.recoveryRampMs);
1140
+ }
1141
+
1142
+ _weeklyState(account) {
1143
+ const rawState = this._weeklyRawState(account);
1144
+ if (rawState === 'unknown' || rawState === 'exhausted') return rawState;
1145
+
1146
+ const pressure = Math.max(clamp01(account.quota.unified7d ?? 0), this._effectiveWeeklyUsage(account));
1147
+ if (pressure >= this.scheduler.weeklyCriticalThreshold) return 'critical';
1148
+ if (pressure >= this.scheduler.weeklyReserveThreshold) return 'reserve';
1149
+ if (pressure >= this.scheduler.weeklySoftThreshold) return 'soft';
1150
+ return 'normal';
1151
+ }
1152
+
1153
+ _weeklyRawState(account) {
1154
+ const q = account.quota;
1155
+ this._clearExpiredQuotas(account);
1156
+ if (q.unifiedStatus === 'rejected') return 'exhausted';
1157
+ if (q.unified7d == null) return 'unknown';
1158
+
1159
+ const used = clamp01(q.unified7d);
1160
+ if (used >= this.scheduler.weeklyExhaustedThreshold) return 'exhausted';
1161
+ if (used >= this.scheduler.weeklyCriticalThreshold) return 'critical';
1162
+ if (used >= this.scheduler.weeklyReserveThreshold) return 'reserve';
1163
+ if (used >= this.scheduler.weeklySoftThreshold) return 'soft';
1164
+ return 'normal';
1165
+ }
1166
+
1167
+ _weeklyPaceState(account) {
1168
+ if (account.quota.unified7d == null) return 'unknown';
1169
+ const effective = this._effectiveWeeklyUsage(account);
1170
+ if (effective >= this.scheduler.weeklyExhaustedThreshold) return 'exhausted';
1171
+ if (effective >= this.scheduler.weeklyCriticalThreshold) return 'critical';
1172
+ if (effective >= this.scheduler.weeklyReserveThreshold) return 'reserve';
1173
+ if (effective >= this.scheduler.weeklySoftThreshold) return 'soft';
1174
+ return 'normal';
1175
+ }
1176
+
1177
+ _effectiveWeeklyUsage(account) {
1178
+ const q = account.quota;
1179
+ const used = clamp01(q.unified7d ?? 0);
1180
+ if (!q.unified7dReset) return used;
1181
+
1182
+ const remainingMs = Math.max(0, q.unified7dReset - Date.now());
1183
+ const elapsedRatio = clamp01((WEEK_MS - remainingMs) / WEEK_MS);
1184
+ const burnDebt = Math.max(0, used - elapsedRatio);
1185
+ return Math.min(1.5, used + burnDebt * this.scheduler.weeklyBurnDebtWeight);
1186
+ }
1187
+
1188
+ /**
1189
+ * Update an account's quota from a background usage probe (fetchUsage result).
1190
+ * Same effect as learning quota from a live response, but for idle accounts.
1191
+ */
1192
+ applyUsageData(accountIndex, usage) {
1193
+ const account = this.accounts[accountIndex];
1194
+ if (!account || !usage) return;
1195
+ const q = account.quota;
1196
+
1197
+ if (usage.fiveHour) {
1198
+ if (usage.fiveHour.utilization != null) q.unified5h = clamp01(usage.fiveHour.utilization);
1199
+ if (usage.fiveHour.resetAt != null) q.unified5hReset = usage.fiveHour.resetAt;
1200
+ }
1201
+ if (usage.sevenDay) {
1202
+ if (usage.sevenDay.utilization != null) q.unified7d = clamp01(usage.sevenDay.utilization);
1203
+ if (usage.sevenDay.resetAt != null) q.unified7dReset = usage.sevenDay.resetAt;
1204
+ }
1205
+ if (usage.sevenDaySonnet) {
1206
+ if (usage.sevenDaySonnet.utilization != null) q.unified7dSonnet = clamp01(usage.sevenDaySonnet.utilization);
1207
+ if (usage.sevenDaySonnet.resetAt != null) q.unified7dSonnetReset = usage.sevenDaySonnet.resetAt;
1208
+ }
1209
+
1210
+ // If we just learned this account's weekly window while probing, re-evaluate
1211
+ // selection (same path as learning it from a live response).
1212
+ if (account.probing && q.unified7dReset != null) {
1213
+ account.probing = false;
1214
+ account.requalify = true;
1215
+ }
1216
+ }
1217
+
1218
+ /**
1219
+ * Update an account's quota tracking from upstream response headers.
1220
+ */
1221
+ updateQuota(accountIndex, headers) {
1222
+ const account = this.accounts[accountIndex];
1223
+ if (!account) return;
1224
+
1225
+ // Unified rate limits (Claude Max)
1226
+ const u5h = parseFloat(headers['anthropic-ratelimit-unified-5h-utilization']);
1227
+ const u7d = parseFloat(headers['anthropic-ratelimit-unified-7d-utilization']);
1228
+ if (!isNaN(u5h)) {
1229
+ account.quota.unified5hRaw = u5h;
1230
+ account.quota.unified5h = clamp01(u5h);
1231
+ }
1232
+ if (!isNaN(u7d)) {
1233
+ account.quota.unified7dRaw = u7d;
1234
+ account.quota.unified7d = clamp01(u7d);
1235
+ }
1236
+
1237
+ const r5h = headers['anthropic-ratelimit-unified-5h-reset'];
1238
+ const r7d = headers['anthropic-ratelimit-unified-7d-reset'];
1239
+ if (r5h) account.quota.unified5hReset = parseResetHeader(r5h);
1240
+ if (r7d) account.quota.unified7dReset = parseResetHeader(r7d);
1241
+
1242
+ // We switched to this account to discover its weekly quota; now that we
1243
+ // know it, flag for re-evaluation so selection can pick the best account.
1244
+ if (account.probing && account.quota.unified7dReset != null) {
1245
+ account.probing = false;
1246
+ account.requalify = true;
1247
+ console.log(`[Maxpool] Learned weekly quota for "${account.name}", re-evaluating selection`);
1248
+ }
1249
+
1250
+ const uStatus = headers['anthropic-ratelimit-unified-status'];
1251
+ if (uStatus) account.quota.unifiedStatus = uStatus;
1252
+
1253
+ // Standard rate limits (API key accounts)
1254
+ const tokensLimit = parseInt(headers['anthropic-ratelimit-tokens-limit'], 10);
1255
+ const tokensRemaining = parseInt(headers['anthropic-ratelimit-tokens-remaining'], 10);
1256
+ const tokensReset = headers['anthropic-ratelimit-tokens-reset'];
1257
+ const requestsLimit = parseInt(headers['anthropic-ratelimit-requests-limit'], 10);
1258
+ const requestsRemaining = parseInt(headers['anthropic-ratelimit-requests-remaining'], 10);
1259
+ const requestsReset = headers['anthropic-ratelimit-requests-reset'];
1260
+
1261
+ if (!isNaN(tokensLimit)) account.quota.tokensLimit = tokensLimit;
1262
+ if (!isNaN(tokensRemaining)) account.quota.tokensRemaining = tokensRemaining;
1263
+ if (!isNaN(requestsLimit)) account.quota.requestsLimit = requestsLimit;
1264
+ if (!isNaN(requestsRemaining)) account.quota.requestsRemaining = requestsRemaining;
1265
+
1266
+ if (tokensReset) account.quota.resetsAt = tokensReset;
1267
+ else if (requestsReset) account.quota.resetsAt = requestsReset;
1268
+
1269
+ const genericLimit = parseFirstInt(headers, [
1270
+ 'x-ratelimit-limit',
1271
+ 'x-rate-limit-limit',
1272
+ 'ratelimit-limit',
1273
+ 'x-ratelimit-limit-requests',
1274
+ 'x-ratelimit-requests-limit',
1275
+ ]);
1276
+ const genericRemaining = parseFirstInt(headers, [
1277
+ 'x-ratelimit-remaining',
1278
+ 'x-rate-limit-remaining',
1279
+ 'ratelimit-remaining',
1280
+ 'x-ratelimit-remaining-requests',
1281
+ 'x-ratelimit-requests-remaining',
1282
+ ]);
1283
+ const genericReset = parseResetHeader(firstHeader(headers, [
1284
+ 'x-ratelimit-reset',
1285
+ 'x-rate-limit-reset',
1286
+ 'ratelimit-reset',
1287
+ 'x-ratelimit-reset-requests',
1288
+ 'x-ratelimit-requests-reset',
1289
+ ]));
1290
+
1291
+ if (genericLimit != null) account.quota.genericLimit = genericLimit;
1292
+ if (genericRemaining != null) account.quota.genericRemaining = genericRemaining;
1293
+ if (genericReset != null) account.quota.genericReset = genericReset;
1294
+
1295
+ account.usage.totalRequests++;
1296
+ account.usage.lastUsed = new Date().toISOString();
1297
+
1298
+ // Log when approaching quota
1299
+ if (this._isNearQuota(account)) {
1300
+ const pct = account.quota.unified7d != null
1301
+ ? (account.quota.unified7d * 100).toFixed(1)
1302
+ : account.quota.tokensLimit
1303
+ ? ((1 - account.quota.tokensRemaining / account.quota.tokensLimit) * 100).toFixed(1)
1304
+ : '?';
1305
+ const reason = this._isSessionQuotaUnavailable(account) ? 'session quota' : `weekly ${this._weeklyState(account)}`;
1306
+ const logKey = `${reason}:${pct}`;
1307
+ if (account.lastQuotaLogKey !== logKey) {
1308
+ account.lastQuotaLogKey = logKey;
1309
+ console.log(`[Maxpool] Account "${account.name}" at ${pct}% usage — limiting new placement (${reason})`);
1310
+ }
1311
+ }
1312
+ }
1313
+
1314
+ /**
1315
+ * Update cumulative token usage from response body data.
1316
+ */
1317
+ updateUsage(accountIndex, inputTokens, outputTokens) {
1318
+ const account = this.accounts[accountIndex];
1319
+ if (!account) return;
1320
+ if (inputTokens) account.usage.totalInputTokens += inputTokens;
1321
+ if (outputTokens) account.usage.totalOutputTokens += outputTokens;
1322
+ }
1323
+
1324
+ /**
1325
+ * Mark an account as rate-limited for a given duration.
1326
+ */
1327
+ markRateLimited(accountIndex, retryAfterSeconds, options = {}) {
1328
+ const account = this.accounts[accountIndex];
1329
+ if (!account) return;
1330
+ const retryAfter = clampRetryAfterSeconds(retryAfterSeconds);
1331
+ account.status = 'throttled';
1332
+ account.rateLimitedUntil = Date.now() + (retryAfter * 1000);
1333
+ account.lastStatus = options.status || 429;
1334
+ account.lastError = 'rate_limited';
1335
+ account.lastErrorAt = Date.now();
1336
+ account.provisionalRateLimitFingerprint = options.fingerprint || null;
1337
+ if (options.recordFailure !== false) {
1338
+ account.failedRequests++;
1339
+ account.consecutiveFailures++;
1340
+ }
1341
+ console.log(`[Maxpool] Account "${account.name}" rate limited for ${retryAfter}s`);
1342
+ }
1343
+
1344
+ markAuthFailed(accountIndex, status = 403, reason = 'auth_failed') {
1345
+ const account = this.accounts[accountIndex];
1346
+ if (!account) return;
1347
+ account.status = 'error';
1348
+ account.rateLimitedUntil = null;
1349
+ account.cooldownUntil = null;
1350
+ account.provisionalUpstreamUntil = null;
1351
+ account.provisionalUpstreamFingerprint = null;
1352
+ account.lastStatus = status;
1353
+ account.lastError = reason;
1354
+ account.lastErrorAt = Date.now();
1355
+ console.log(`[Maxpool] Account "${account.name}" disabled after HTTP ${status} (${reason})`);
1356
+ }
1357
+
1358
+ markTransientFailure(accountIndex, reason = 'transient_error') {
1359
+ const account = this.accounts[accountIndex];
1360
+ if (!account) return;
1361
+ const failures = Math.max(1, account.consecutiveFailures + 1);
1362
+ const cooldown = Math.min(
1363
+ this.scheduler.maxCooldownMs,
1364
+ this.scheduler.cooldownMs * 2 ** Math.min(failures - 1, 5),
1365
+ );
1366
+ account.consecutiveFailures = failures;
1367
+ account.failedRequests++;
1368
+ account.lastError = reason;
1369
+ account.lastErrorAt = Date.now();
1370
+ account.cooldownUntil = Date.now() + cooldown;
1371
+ console.log(`[Maxpool] Account "${account.name}" cooling down for ${Math.ceil(cooldown / 1000)}s after ${reason}`);
1372
+ }
1373
+
1374
+ markProvisionalUpstreamFailure(accountIndex, status, fingerprint, retryAfterSeconds = 10) {
1375
+ const account = this.accounts[accountIndex];
1376
+ if (!account) return;
1377
+ const retryAfter = Math.min(clampRetryAfterSeconds(retryAfterSeconds), 30);
1378
+ account.provisionalUpstreamUntil = Math.max(
1379
+ account.provisionalUpstreamUntil || 0,
1380
+ Date.now() + retryAfter * 1000,
1381
+ );
1382
+ account.lastStatus = status;
1383
+ account.lastError = 'upstream_throttled';
1384
+ account.lastErrorAt = Date.now();
1385
+ account.provisionalUpstreamFingerprint = fingerprint;
1386
+ console.log(`[Maxpool] Account "${account.name}" returned HTTP ${status}; trying another Claude account and retrying this one in ${retryAfter}s`);
1387
+ }
1388
+
1389
+ clearProvisionalUpstreamFailures(fingerprint, accountIndexes) {
1390
+ for (const index of accountIndexes) {
1391
+ const account = this.accounts[index];
1392
+ if (!account || account.provisionalUpstreamFingerprint !== fingerprint) continue;
1393
+ account.provisionalUpstreamUntil = null;
1394
+ account.provisionalUpstreamFingerprint = null;
1395
+ if (account.lastError === 'upstream_throttled') {
1396
+ account.lastError = null;
1397
+ account.lastErrorAt = null;
1398
+ }
1399
+ }
1400
+ }
1401
+
1402
+ shouldPromoteUpstreamFailure(incident, requestInfo = {}) {
1403
+ if (!incident || incident.accounts.size < 2) return false;
1404
+ for (const account of this.accounts) {
1405
+ if (
1406
+ !account.enabled
1407
+ || account.type === 'provider'
1408
+ || !this._isRequestCompatible(account, requestInfo.profile || 'claude', requestInfo)
1409
+ ) {
1410
+ continue;
1411
+ }
1412
+ if (
1413
+ (account.lastSuccessAt && account.lastSuccessAt >= incident.firstAt)
1414
+ || (account.lastAcceptedAt && account.lastAcceptedAt >= incident.firstAt)
1415
+ ) return false;
1416
+ if (incident.accounts.has(account.index)) continue;
1417
+ if (account.status === 'exhausted' || account.status === 'error') continue;
1418
+ if (this._isSessionQuotaUnavailable(account)) continue;
1419
+ if (this._weeklyState(account) === 'exhausted') continue;
1420
+ return false;
1421
+ }
1422
+ return true;
1423
+ }
1424
+
1425
+ markUpstreamAccepted(accountIndex) {
1426
+ const account = this.accounts[accountIndex];
1427
+ if (!account) return;
1428
+ account.lastAcceptedAt = Date.now();
1429
+ }
1430
+
1431
+ /**
1432
+ * Ensure an OAuth account's token is fresh, refreshing if needed.
1433
+ * Pass force=true to refresh regardless of expiry (e.g. after a 401).
1434
+ * Concurrent calls for the same account coalesce into a single refresh.
1435
+ */
1436
+ async ensureTokenFresh(accountIndex, force = false) {
1437
+ const account = this.accounts[accountIndex];
1438
+ if (!account || account.type !== 'oauth' || !account.refreshToken) return true;
1439
+
1440
+ if (!force && !isTokenExpiringSoon(account.expiresAt)) return true;
1441
+
1442
+ // Coalesce concurrent refreshes
1443
+ if (account._refreshPromise) return account._refreshPromise;
1444
+
1445
+ account._refreshPromise = (async () => {
1446
+ console.log(`[Maxpool] Refreshing token for account "${account.name}"...`);
1447
+ try {
1448
+ const newTokens = await this._refreshAccessToken(account.refreshToken);
1449
+ account.credential = newTokens.accessToken;
1450
+ account.refreshToken = newTokens.refreshToken;
1451
+ account.expiresAt = newTokens.expiresAt;
1452
+ account.status = 'active';
1453
+ account.cooldownUntil = null;
1454
+ console.log(`[Maxpool] Token refreshed for account "${account.name}"`);
1455
+ this._onTokenRefresh?.(accountIndex, newTokens);
1456
+ return true;
1457
+ } catch (err) {
1458
+ console.error(`[Maxpool] Token refresh failed for "${account.name}": ${err.message}`);
1459
+ // Only mark as error if the access token is actually expired;
1460
+ // a failed proactive refresh shouldn't kill a still-valid token
1461
+ if (!account.expiresAt || Date.now() >= account.expiresAt) {
1462
+ if (err.retryable) {
1463
+ this.markTransientFailure(accountIndex, `token_refresh_${err.status || 'network'}`);
1464
+ } else {
1465
+ this.markAuthFailed(accountIndex, err.status || 401, 'token_refresh_failed');
1466
+ }
1467
+ return false;
1468
+ }
1469
+ return true;
1470
+ } finally {
1471
+ account._refreshPromise = null;
1472
+ }
1473
+ })();
1474
+
1475
+ return account._refreshPromise;
1476
+ }
1477
+
1478
+ /**
1479
+ * Set a callback to persist refreshed tokens to config.
1480
+ */
1481
+ onTokenRefresh(callback) {
1482
+ this._onTokenRefresh = callback;
1483
+ }
1484
+
1485
+ /**
1486
+ * Update a specific account's OAuth tokens (e.g. after intercepting a token refresh).
1487
+ */
1488
+ updateAccountTokens(accountIndex, { accessToken, refreshToken, expiresAt }) {
1489
+ const account = this.accounts[accountIndex];
1490
+ if (!account || account.type !== 'oauth') return;
1491
+
1492
+ account.credential = accessToken;
1493
+ if (refreshToken) account.refreshToken = refreshToken;
1494
+ account.expiresAt = expiresAt;
1495
+ if (account.status === 'error') account.status = 'active';
1496
+ console.log(`[Maxpool] Updated tokens for account "${account.name}"`);
1497
+ this._onTokenRefresh?.(accountIndex, {
1498
+ accessToken,
1499
+ refreshToken: account.refreshToken,
1500
+ expiresAt: account.expiresAt,
1501
+ });
1502
+ }
1503
+
1504
+ /**
1505
+ * Add a new account at runtime.
1506
+ */
1507
+ addAccount(acctData) {
1508
+ const index = this.accounts.length;
1509
+ this.accounts.push({
1510
+ index,
1511
+ name: acctData.name,
1512
+ type: acctData.type,
1513
+ provider: acctData.provider || (acctData.type === 'provider' ? 'provider' : 'anthropic'),
1514
+ accountUuid: acctData.accountUuid || null,
1515
+ credential: acctData.accessToken || acctData.authToken || acctData.apiKey,
1516
+ upstream: acctData.upstream || null,
1517
+ authHeader: acctData.authHeader || null,
1518
+ profiles: acctData.profiles || (acctData.type === 'provider' ? ['all'] : ['claude', 'all']),
1519
+ priority: Number.isFinite(acctData.priority) ? acctData.priority : 0,
1520
+ model: acctData.model || null,
1521
+ modelMap: acctData.modelMap || null,
1522
+ stripBetaHeaders: Boolean(acctData.stripBetaHeaders),
1523
+ runtime: Boolean(acctData.runtime),
1524
+ enabled: acctData.enabled !== false,
1525
+ refreshToken: acctData.refreshToken || null,
1526
+ expiresAt: acctData.expiresAt || null,
1527
+ status: 'active',
1528
+ // Unknown quota until the first response — probe it like startup accounts.
1529
+ probing: true,
1530
+ quota: emptyQuota(),
1531
+ usage: { totalInputTokens: 0, totalOutputTokens: 0, totalRequests: 0, lastUsed: null },
1532
+ inFlight: 0,
1533
+ activeWeight: 0,
1534
+ completedRequests: 0,
1535
+ failedRequests: 0,
1536
+ loadEvents: [],
1537
+ consecutiveFailures: 0,
1538
+ lastStatus: null,
1539
+ lastResponseMs: null,
1540
+ lastAcceptedAt: null,
1541
+ lastError: null,
1542
+ lastErrorAt: null,
1543
+ cooldownUntil: null,
1544
+ provisionalUpstreamFingerprint: null,
1545
+ provisionalUpstreamUntil: null,
1546
+ rateLimitedUntil: null,
1547
+ provisionalRateLimitFingerprint: null,
1548
+ recoveredAt: null,
1549
+ lastQuotaLogKey: null,
1550
+ });
1551
+ return index;
1552
+ }
1553
+
1554
+ upsertRuntimeAccount(acctData) {
1555
+ const idx = this.accounts.findIndex(a => a.name === acctData.name);
1556
+ if (idx < 0) return this.addAccount({ ...acctData, runtime: true });
1557
+
1558
+ const account = this.accounts[idx];
1559
+ const nextCredential = acctData.accessToken || acctData.authToken || acctData.apiKey || account.credential;
1560
+ const nextUpstream = acctData.upstream || account.upstream;
1561
+ const changed = nextCredential !== account.credential || nextUpstream !== account.upstream;
1562
+
1563
+ account.type = acctData.type || account.type;
1564
+ account.provider = acctData.provider || account.provider;
1565
+ account.credential = nextCredential;
1566
+ account.upstream = nextUpstream;
1567
+ account.authHeader = acctData.authHeader || account.authHeader;
1568
+ account.profiles = acctData.profiles || account.profiles;
1569
+ account.priority = Number.isFinite(acctData.priority) ? acctData.priority : account.priority;
1570
+ account.model = acctData.model || account.model;
1571
+ account.modelMap = acctData.modelMap || account.modelMap;
1572
+ account.stripBetaHeaders = Boolean(acctData.stripBetaHeaders);
1573
+ account.runtime = true;
1574
+ if (account.status === 'error' && changed) {
1575
+ account.status = 'active';
1576
+ account.lastError = null;
1577
+ account.lastErrorAt = null;
1578
+ account.consecutiveFailures = 0;
1579
+ }
1580
+ return idx;
1581
+ }
1582
+
1583
+ /**
1584
+ * Remove an account by index.
1585
+ */
1586
+ removeAccount(index) {
1587
+ if (index < 0 || index >= this.accounts.length) return;
1588
+ const removed = this.accounts[index];
1589
+ if (removed.inFlight > 0) return false;
1590
+ const removedName = removed.name;
1591
+ this.accounts.splice(index, 1);
1592
+ this.accounts.forEach((a, i) => a.index = i);
1593
+ if (this.currentIndex >= this.accounts.length) {
1594
+ this.currentIndex = Math.max(0, this.accounts.length - 1);
1595
+ } else if (this.currentIndex > index) {
1596
+ this.currentIndex--;
1597
+ }
1598
+ if (removedName === this.preferredAccountName) {
1599
+ this.setRoutingMode('automatic');
1600
+ }
1601
+ return true;
1602
+ }
1603
+
1604
+ // Match a saved state entry to a live account by stable identity: prefer the
1605
+ // account UUID when both have one, otherwise fall back to the name.
1606
+ _sameIdentity(saved, account) {
1607
+ if (saved.accountUuid && account.accountUuid) return saved.accountUuid === account.accountUuid;
1608
+ return saved.name === account.name;
1609
+ }
1610
+
1611
+ /**
1612
+ * Serialize persistable quota state for all accounts (no credentials), keyed
1613
+ * by account identity so it can be matched back after a restart.
1614
+ */
1615
+ exportQuotaState() {
1616
+ return this.accounts.map(a => {
1617
+ const quota = {};
1618
+ for (const f of PERSISTED_QUOTA_FIELDS) quota[f] = a.quota[f];
1619
+ return { accountUuid: a.accountUuid, name: a.name, quota };
1620
+ });
1621
+ }
1622
+
1623
+ /**
1624
+ * Restore quota learned in a previous run, matched to accounts by identity.
1625
+ * Stale windows are not special-cased — _clearExpiredQuotas wipes any restored
1626
+ * window whose reset time has already passed on first use.
1627
+ */
1628
+ restoreQuotaState(saved) {
1629
+ if (!Array.isArray(saved)) return;
1630
+ for (const account of this.accounts) {
1631
+ const match = saved.find(s => this._sameIdentity(s, account));
1632
+ if (!match || !match.quota) continue;
1633
+ for (const f of PERSISTED_QUOTA_FIELDS) {
1634
+ if (match.quota[f] != null) account.quota[f] = match.quota[f];
1635
+ }
1636
+ // Only keep a restored utilization that carries a clearable reset window.
1637
+ // A stale value with no reset can't be cleared by _clearExpiredQuotas and
1638
+ // could otherwise pin the account unavailable until the first live response.
1639
+ if (account.quota.unified5hReset == null) account.quota.unified5h = null;
1640
+ if (account.quota.unified7dReset == null) account.quota.unified7d = null;
1641
+ // We already know this account's weekly window, so it isn't "probing".
1642
+ if (account.quota.unified7dReset != null) account.probing = false;
1643
+ }
1644
+ }
1645
+
1646
+ /**
1647
+ * Return a status summary of all accounts (safe to expose, no credentials).
1648
+ */
1649
+ getStatus() {
1650
+ const now = Date.now();
1651
+ return {
1652
+ currentAccount: this.accounts[this.currentIndex]?.name,
1653
+ switchThreshold: this.switchThreshold,
1654
+ routing: {
1655
+ mode: this.routingMode,
1656
+ preferredAccount: this.preferredAccountName,
1657
+ },
1658
+ accounts: this.accounts.map(a => ({
1659
+ name: a.name,
1660
+ type: a.type,
1661
+ provider: a.provider,
1662
+ enabled: a.enabled,
1663
+ upstream: a.upstream,
1664
+ profiles: a.profiles,
1665
+ priority: a.priority,
1666
+ runtime: a.runtime,
1667
+ status: a.status,
1668
+ inFlight: a.inFlight,
1669
+ activeWeight: a.activeWeight,
1670
+ completedRequests: a.completedRequests,
1671
+ failedRequests: a.failedRequests,
1672
+ consecutiveFailures: a.consecutiveFailures,
1673
+ lastStatus: a.lastStatus,
1674
+ lastResponseMs: a.lastResponseMs,
1675
+ load: {
1676
+ current: {
1677
+ inFlight: a.inFlight,
1678
+ activeWeight: a.activeWeight,
1679
+ },
1680
+ last15m: this._loadSummary(a, 15 * 60 * 1000, now),
1681
+ last1h: this._loadSummary(a, 60 * 60 * 1000, now),
1682
+ },
1683
+ lastError: a.lastError,
1684
+ lastErrorAt: a.lastErrorAt ? new Date(a.lastErrorAt).toISOString() : null,
1685
+ cooldownUntil: Math.max(a.cooldownUntil || 0, a.provisionalUpstreamUntil || 0)
1686
+ ? new Date(Math.max(a.cooldownUntil || 0, a.provisionalUpstreamUntil || 0)).toISOString()
1687
+ : null,
1688
+ quota: { ...a.quota },
1689
+ weekly: {
1690
+ state: this._weeklyState(a),
1691
+ rawState: this._weeklyRawState(a),
1692
+ effectiveUsage: this._effectiveWeeklyUsage(a),
1693
+ paceState: this._weeklyPaceState(a),
1694
+ },
1695
+ usage: { ...a.usage },
1696
+ rateLimitedUntil: a.rateLimitedUntil
1697
+ ? new Date(a.rateLimitedUntil).toISOString()
1698
+ : null,
1699
+ })),
1700
+ scheduler: {
1701
+ mode: 'adaptive-least-loaded',
1702
+ globalInFlight: this.getGlobalInFlight(),
1703
+ admissionPaused: this.admissionPaused,
1704
+ safetyMaxActivePerAccount: this.scheduler.safetyMaxActivePerAccount,
1705
+ safetyMaxGlobalActive: this.scheduler.safetyMaxGlobalActive,
1706
+ },
1707
+ upstreamThrottle: {
1708
+ active: this._isUpstreamThrottleBlocking(),
1709
+ until: this.upstreamThrottle.until
1710
+ ? new Date(this.upstreamThrottle.until).toISOString()
1711
+ : null,
1712
+ reason: this.upstreamThrottle.reason,
1713
+ probeInFlight: this.upstreamThrottle.probeInFlight,
1714
+ count: this.upstreamThrottle.count,
1715
+ lastAt: this.upstreamThrottle.lastAt
1716
+ ? new Date(this.upstreamThrottle.lastAt).toISOString()
1717
+ : null,
1718
+ queued: this.queueState.waiting.length,
1719
+ oldestQueuedMs: this.queueState.waiting.length
1720
+ ? Math.max(0, now - this.queueState.waiting[0].queuedAt)
1721
+ : 0,
1722
+ },
1723
+ sessions: {
1724
+ stickyBindings: this.sessionBindings.size,
1725
+ thinkingProtected: [...this.sessionPolicies.values()].filter(p => p.requiresAnthropicThinkingIntegrity).length,
1726
+ },
1727
+ };
1728
+ }
1729
+ }