free-coding-models 0.1.81 → 0.1.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,600 @@
1
+ /**
2
+ * @file lib/account-manager.js
3
+ * @description Multi-account health tracking and selection for proxy rotation.
4
+ *
5
+ * Tracks per-account health (success rate, latency, quota) and uses
6
+ * Power-of-2-Choices (P2C) to select the best available account.
7
+ * Supports sticky sessions via an LRU map, circuit breakers per account,
8
+ * and retry-after cooldown periods.
9
+ *
10
+ * ## Rate-limit state machine (per account)
11
+ *
12
+ * Unknown-telemetry providers (huggingface, cloudflare, etc.):
13
+ * - Count consecutive 429s within a rolling 10-minute window.
14
+ * - On reaching threshold (3), enter temporary cooldown starting at 15m.
15
+ * - Cooldown escalates: 15m → 30m → 60m → ... capped at 6h.
16
+ * - After cooldown expires, allow ONE probe request (half-open).
17
+ * - Probe success → reset all state (active again).
18
+ * - Probe failure (429) → escalate cooldown, clear probe lock.
19
+ *
20
+ * Known-telemetry providers (groq, nvidia, etc.):
21
+ * - Honor `retry-after` header if present.
22
+ * - If absent, use 5m fallback cooldown.
23
+ * - 429s do NOT count toward unknown-telemetry threshold.
24
+ *
25
+ * Permanent disable:
26
+ * - Only for AUTH_ERROR (401/403) with skipAccount=true AND no rateLimitConfidence.
27
+ *
28
+ * @exports AccountManager
29
+ */
30
+
31
+ import { CircuitBreaker } from './error-classifier.js'
32
+ import { isKnownQuotaTelemetry } from './quota-capabilities.js'
33
+
34
+ // ─── Rate-limit state machine constants ──────────────────────────────────────
35
+
36
+ const UNKNOWN_429_THRESHOLD = 3 // consecutive 429s in window before cooldown
37
+ const UNKNOWN_429_WINDOW_MS = 10 * 60 * 1000 // 10-minute rolling window
38
+ const UNKNOWN_COOLDOWN_BASE_MS = 15 * 60 * 1000 // 15m initial cooldown
39
+ const UNKNOWN_COOLDOWN_MAX_MS = 6 * 60 * 60 * 1000 // 6h max cooldown
40
+ const KNOWN_FALLBACK_COOLDOWN_MS = 5 * 60 * 1000 // 5m fallback for known providers
41
+
42
+ // ─── Internal: per-account health state ──────────────────────────────────────
43
+
44
+ class AccountHealth {
45
+ /**
46
+ * @param {number} cbThreshold - Circuit breaker failure threshold
47
+ * @param {number} cbCooldownMs - Circuit breaker cooldown in ms
48
+ */
49
+ constructor(cbThreshold, cbCooldownMs) {
50
+ this.successCount = 0
51
+ this.failureCount = 0
52
+ this.totalLatencyMs = 0
53
+ /**
54
+ * Remaining quota as a percentage 0–100, or null when unknown.
55
+ * null means we have not yet received quota telemetry for this account.
56
+ */
57
+ this.quotaPercent = null
58
+ /**
59
+ * Quota signal reliability:
60
+ * - 'known' — quota was updated from verifiable headers/endpoint
61
+ * - 'inferred' — quota was estimated from error patterns (future use)
62
+ * - 'unknown' — no quota data received yet
63
+ */
64
+ this.quotaSignal = 'unknown'
65
+ /** When true, this account is permanently disabled (e.g. auth failure). */
66
+ this.disabled = false
67
+ this.circuitBreaker = new CircuitBreaker({
68
+ threshold: cbThreshold,
69
+ cooldownMs: cbCooldownMs,
70
+ })
71
+
72
+ /**
73
+ * Per-account rate-limit state machine for 429 handling.
74
+ *
75
+ * - consecutive429: count of consecutive 429s in the current rolling window
76
+ * - windowStartMs: start time of the current rolling window (ms epoch)
77
+ * - cooldownLevel: escalation level (0 = first occurrence → 15m, 1 → 30m, ...)
78
+ * - cooldownUntilMs: epoch ms when cooldown expires (0 = not in cooldown)
79
+ * - probeInFlight: true while one half-open probe is in flight
80
+ */
81
+ this.rateLimitState = {
82
+ consecutive429: 0,
83
+ windowStartMs: 0,
84
+ cooldownLevel: 0,
85
+ cooldownUntilMs: 0,
86
+ probeInFlight: false,
87
+ }
88
+ }
89
+
90
+ /**
91
+ * Health score in roughly [0, 1].
92
+ *
93
+ * Formula:
94
+ * 0.4 * successRate + 0.3 * latencyScore + 0.3 * quotaScore − penalty
95
+ *
96
+ * Where:
97
+ * successRate = successes / (successes + failures), default 1.0 if no requests
98
+ * latencyScore = 1 − min(avgLatencyMs / 5000, 1) (lower = better)
99
+ * quotaScore:
100
+ * - When quotaSignal is 'known': quotaPercent / 100
101
+ * - When quotaSignal is 'unknown': 0.5 (neutral — not best, not worst)
102
+ * penalty:
103
+ * - 0.5 if known quotaPercent < 20%
104
+ * - 0.3 if known quotaPercent < 35%
105
+ * - 0 otherwise / unknown
106
+ *
107
+ * @returns {number}
108
+ */
109
+ computeScore() {
110
+ const total = this.successCount + this.failureCount
111
+ const successRate = total === 0 ? 1.0 : this.successCount / total
112
+ const avgLatencyMs = total === 0 ? 0 : this.totalLatencyMs / total
113
+ const latencyScore = 1 - Math.min(avgLatencyMs / 5000, 1)
114
+
115
+ let quotaScore
116
+ let penalty = 0
117
+
118
+ if (this.quotaSignal === 'known' && this.quotaPercent !== null) {
119
+ quotaScore = this.quotaPercent / 100
120
+ if (this.quotaPercent < 20) penalty = 0.5
121
+ else if (this.quotaPercent < 35) penalty = 0.3
122
+ } else {
123
+ // Unknown quota: treat as neutral (0.5) — do not assume best-case
124
+ quotaScore = 0.5
125
+ }
126
+
127
+ return 0.4 * successRate + 0.3 * latencyScore + 0.3 * quotaScore - penalty
128
+ }
129
+ }
130
+
131
+ // ─── LRU Map helper ───────────────────────────────────────────────────────────
132
+ // Uses plain Map (insertion-ordered). To access: delete then re-set (moves to end).
133
+ // To insert new: evict first key if at capacity.
134
+
135
+ /**
136
+ * Read from LRU map, moving the entry to "most recently used" position.
137
+ * Returns undefined if key is absent.
138
+ *
139
+ * @param {Map<string, string>} map
140
+ * @param {string} key
141
+ * @returns {string|undefined}
142
+ */
143
+ function lruGet(map, key) {
144
+ if (!map.has(key)) return undefined
145
+ const val = map.get(key)
146
+ map.delete(key)
147
+ map.set(key, val)
148
+ return val
149
+ }
150
+
151
+ /**
152
+ * Write to LRU map. If the key already exists, move it to the end.
153
+ * If the map is at capacity (and key is new), evict the oldest entry first.
154
+ *
155
+ * @param {Map<string, string>} map
156
+ * @param {string} key
157
+ * @param {string} value
158
+ * @param {number} maxSize
159
+ */
160
+ function lruSet(map, key, value, maxSize) {
161
+ if (map.has(key)) {
162
+ // Update value and move to end
163
+ map.delete(key)
164
+ } else if (map.size >= maxSize) {
165
+ // Evict oldest (first) entry
166
+ const oldest = map.keys().next().value
167
+ map.delete(oldest)
168
+ }
169
+ map.set(key, value)
170
+ }
171
+
172
+ // ─── AccountManager ───────────────────────────────────────────────────────────
173
+
174
+ export class AccountManager {
175
+ /**
176
+ * @param {Array<{ id: string, providerKey: string, apiKey: string, modelId: string, url: string }>} accounts
177
+ * @param {{ circuitBreakerThreshold?: number, circuitBreakerCooldownMs?: number, maxStickySessions?: number }} [opts]
178
+ */
179
+ constructor(accounts, opts = {}) {
180
+ const {
181
+ circuitBreakerThreshold = 5,
182
+ circuitBreakerCooldownMs = 60000,
183
+ maxStickySessions = 1000,
184
+ } = opts
185
+
186
+ this._accounts = accounts
187
+ this._maxStickySessions = maxStickySessions
188
+
189
+ /** @type {Map<string, AccountHealth>} */
190
+ this._healthMap = new Map()
191
+ for (const acct of accounts) {
192
+ this._healthMap.set(
193
+ acct.id,
194
+ new AccountHealth(circuitBreakerThreshold, circuitBreakerCooldownMs)
195
+ )
196
+ }
197
+
198
+ /** LRU Map: fingerprint → accountId */
199
+ this._stickyMap = new Map()
200
+
201
+ /** Map: accountId → retryAfter epoch ms */
202
+ this._retryAfterMap = new Map()
203
+ }
204
+
205
+ /**
206
+ * Returns true when an account can serve the requested proxy model.
207
+ *
208
+ * Supports both:
209
+ * - `account.proxyModelId` (logical fcm-proxy slug)
210
+ * - `account.modelId` (upstream model id, backward compatibility)
211
+ *
212
+ * @private
213
+ * @param {{ proxyModelId?: string, modelId?: string }} acct
214
+ * @param {string|undefined} requestedModel
215
+ * @returns {boolean}
216
+ */
217
+ _accountSupportsModel(acct, requestedModel) {
218
+ if (!requestedModel) return true
219
+ if (acct.proxyModelId === requestedModel) return true
220
+ if (acct.modelId === requestedModel) return true
221
+ return false
222
+ }
223
+
224
+ // ─── Private helpers ──────────────────────────────────────────────────────
225
+
226
+ /**
227
+ * Returns true if the account can currently accept requests.
228
+ * Checks: not disabled, circuit breaker not open, not in retry-after cooldown,
229
+ * and quota > 5% remaining.
230
+ *
231
+ * Also manages the half-open probe window:
232
+ * - When cooldown has expired but probeInFlight is false, allow ONE probe
233
+ * and mark probeInFlight = true.
234
+ * - While probeInFlight is true, block further selection of this account.
235
+ *
236
+ * @param {{ id: string }} acct
237
+ * @returns {boolean}
238
+ */
239
+ _isAccountAvailable(acct) {
240
+ const health = this._healthMap.get(acct.id)
241
+ if (!health) return false
242
+ if (health.disabled) return false
243
+ if (health.circuitBreaker.isOpen()) return false
244
+
245
+ // Half-open probe logic for temporary exhaustion cooldown (unknown telemetry)
246
+ // This takes priority over _retryAfterMap for unknown-telemetry cooldowns.
247
+ const rl = health.rateLimitState
248
+ if (rl.cooldownUntilMs > 0) {
249
+ const now = Date.now()
250
+ if (now < rl.cooldownUntilMs) {
251
+ // Still in cooldown
252
+ return false
253
+ }
254
+ // Cooldown expired — allow one probe if none in flight
255
+ if (rl.probeInFlight) {
256
+ // Another probe is already in flight for this account — block
257
+ return false
258
+ }
259
+ // Arm the probe: mark it in-flight so subsequent selects are blocked
260
+ rl.probeInFlight = true
261
+ // fall through — this request IS the probe
262
+ }
263
+
264
+ // Known-telemetry retry-after cooldown (set via _retryAfterMap)
265
+ const retryAfterTs = this._retryAfterMap.get(acct.id)
266
+ if (retryAfterTs && Date.now() < retryAfterTs) return false
267
+
268
+ // Only exclude when quota is known to be nearly exhausted.
269
+ // When quotaSignal is 'unknown' (null quotaPercent), we remain available.
270
+ if (health.quotaSignal === 'known' && health.quotaPercent !== null && health.quotaPercent <= 5) return false
271
+
272
+ return true
273
+ }
274
+
275
+ // ─── Public API ───────────────────────────────────────────────────────────
276
+
277
+ /**
278
+ * Select the best available account.
279
+ *
280
+ * Algorithm:
281
+ * 1. If `sessionFingerprint` is set and a sticky entry exists for it,
282
+ * return the sticky account if it is healthy. Otherwise fall through.
283
+ * 2. Filter all accounts to those that are currently available.
284
+ * 3. If none available, return null.
285
+ * 4. Power-of-2-Choices (P2C): sample 2 random candidates, return the
286
+ * one with the higher health score. (If only 1 available, return it.)
287
+ * 5. If `sessionFingerprint` is set, store the selection in the LRU map.
288
+ *
289
+ * @param {{ sessionFingerprint?: string, requestedModel?: string }} [opts]
290
+ * @returns {{ id: string, providerKey: string, apiKey: string, modelId: string, url: string, proxyModelId?: string } | null}
291
+ */
292
+ selectAccount({ sessionFingerprint, requestedModel } = {}) {
293
+ const stickyKey = sessionFingerprint
294
+ ? (requestedModel ? `${requestedModel}::${sessionFingerprint}` : sessionFingerprint)
295
+ : null
296
+
297
+ // 1. Sticky session fast-path
298
+ if (stickyKey) {
299
+ const stickyId = lruGet(this._stickyMap, stickyKey)
300
+ if (stickyId !== undefined) {
301
+ const stickyAcct = this._accounts.find(a => a.id === stickyId)
302
+ if (stickyAcct && this._accountSupportsModel(stickyAcct, requestedModel) && this._isAccountAvailable(stickyAcct)) {
303
+ // Break sticky if the account's health score is below the slow-provider threshold
304
+ const stickyScore = this._healthMap.get(stickyAcct.id)?.computeScore() ?? 1.0
305
+ if (stickyScore >= 0.65) {
306
+ return stickyAcct
307
+ }
308
+ // Score too low (slow/degraded provider) — fall through to P2C re-selection
309
+ }
310
+ // Sticky account is unhealthy — fall through to P2C
311
+ }
312
+ }
313
+
314
+ // 2. Filter to available accounts.
315
+ // If a model is explicitly requested but has no accounts, return null immediately
316
+ // rather than silently falling back to the full pool (which would serve the wrong model).
317
+ const candidatePool = requestedModel
318
+ ? this._accounts.filter(a => this._accountSupportsModel(a, requestedModel))
319
+ : this._accounts
320
+ if (requestedModel && candidatePool.length === 0) return null
321
+ const available = candidatePool.filter(a => this._isAccountAvailable(a))
322
+ if (available.length === 0) return null
323
+
324
+ // 3. P2C selection
325
+ let selected
326
+ if (available.length === 1) {
327
+ selected = available[0]
328
+ } else {
329
+ // Pick two distinct random indices
330
+ const idx1 = Math.floor(Math.random() * available.length)
331
+ let idx2 = Math.floor(Math.random() * (available.length - 1))
332
+ if (idx2 >= idx1) idx2++
333
+
334
+ const a = available[idx1]
335
+ const b = available[idx2]
336
+ const scoreA = this._healthMap.get(a.id).computeScore()
337
+ const scoreB = this._healthMap.get(b.id).computeScore()
338
+ selected = scoreA >= scoreB ? a : b
339
+ }
340
+
341
+ // 4. Store/update sticky entry
342
+ if (stickyKey) {
343
+ lruSet(this._stickyMap, stickyKey, selected.id, this._maxStickySessions)
344
+ }
345
+
346
+ return selected
347
+ }
348
+
349
+ /**
350
+ * Return true if at least one account is registered for the given model.
351
+ * Uses the same matching logic as selectAccount: proxyModelId or modelId.
352
+ *
353
+ * @param {string} requestedModel
354
+ * @returns {boolean}
355
+ */
356
+ hasAccountsForModel(requestedModel) {
357
+ if (!requestedModel) return this._accounts.length > 0
358
+ return this._accounts.some(a => this._accountSupportsModel(a, requestedModel))
359
+ }
360
+
361
+ /**
362
+ * Update an account's remaining quota from rate-limit response headers.
363
+ * Reads common header variants:
364
+ * - x-ratelimit-remaining / x-ratelimit-limit
365
+ * - x-ratelimit-remaining-requests / x-ratelimit-limit-requests
366
+ *
367
+ * @param {string} accountId
368
+ * @param {Record<string, string>} headers - Lowercased response headers
369
+ * @returns {boolean} true when quota was updated from headers
370
+ */
371
+ updateQuota(accountId, headers) {
372
+ const remainingRaw =
373
+ headers?.['x-ratelimit-remaining'] ??
374
+ headers?.['x-ratelimit-remaining-requests']
375
+ const limitRaw =
376
+ headers?.['x-ratelimit-limit'] ??
377
+ headers?.['x-ratelimit-limit-requests']
378
+
379
+ const remaining = parseFloat(remainingRaw)
380
+ const limit = parseFloat(limitRaw)
381
+ if (!isNaN(remaining) && !isNaN(limit) && limit > 0) {
382
+ const health = this._healthMap.get(accountId)
383
+ if (health) {
384
+ health.quotaPercent = Math.round((remaining / limit) * 100)
385
+ health.quotaSignal = 'known'
386
+ return true
387
+ }
388
+ }
389
+ return false
390
+ }
391
+
392
+ /**
393
+ * Record a failed request against an account.
394
+ *
395
+ * Implements provider-aware 429 policy:
396
+ *
397
+ * Auth errors (401/403):
398
+ * - Permanently disable the account (health.disabled = true).
399
+ *
400
+ * Known-telemetry 429s (groq, nvidia, cerebras, etc.):
401
+ * - Honor `retry-after` if present; else use 5m fallback cooldown.
402
+ * - Do NOT count toward the unknown-telemetry threshold.
403
+ *
404
+ * Unknown-telemetry 429s (huggingface, cloudflare, etc.):
405
+ * - Count consecutive 429s in a rolling 10-minute window.
406
+ * - On reaching threshold (3): enter temporary cooldown (15m initial, x2 each time, cap 6h).
407
+ * - If a probe was in-flight when the 429 occurred: escalate cooldown, clear probe lock.
408
+ *
409
+ * @param {string} accountId
410
+ * @param {{ type: string, shouldRetry: boolean, skipAccount: boolean, retryAfterSec: number|null, rateLimitConfidence?: string }} classifiedError
411
+ * @param {{ providerKey?: string }} [accountCtx] - Provider context for telemetry routing
412
+ */
413
+ recordFailure(accountId, classifiedError, accountCtx = {}) {
414
+ const health = this._healthMap.get(accountId)
415
+ if (!health) return
416
+
417
+ health.failureCount++
418
+ health.circuitBreaker.recordFailure()
419
+
420
+ const is429 = classifiedError?.type === 'RATE_LIMITED' || classifiedError?.type === 'QUOTA_EXHAUSTED'
421
+ const isAuthFatal = classifiedError?.type === 'AUTH_ERROR'
422
+
423
+ // Permanent disable only for AUTH_ERROR (401/403). Other skipAccount errors
424
+ // (e.g. MODEL_NOT_FOUND) do NOT permanently disable — the account may be
425
+ // healthy for other models/requests.
426
+ if (isAuthFatal) {
427
+ health.disabled = true
428
+ return
429
+ }
430
+
431
+ if (is429) {
432
+ const providerKey = accountCtx?.providerKey ?? ''
433
+ const hasKnownTelemetry = isKnownQuotaTelemetry(providerKey)
434
+
435
+ if (hasKnownTelemetry) {
436
+ // Known-telemetry: use retry-after or 5m fallback
437
+ const cooldownMs = classifiedError?.retryAfterSec
438
+ ? classifiedError.retryAfterSec * 1000
439
+ : KNOWN_FALLBACK_COOLDOWN_MS
440
+ this._retryAfterMap.set(accountId, Date.now() + cooldownMs)
441
+ } else {
442
+ // Unknown-telemetry: rolling window + threshold + escalating cooldown
443
+ this._recordUnknown429(accountId, health)
444
+ }
445
+ return
446
+ }
447
+
448
+ // Non-429 retryable errors: apply retryAfterSec if present.
449
+ // Also clear probeInFlight if one was in flight — a non-429 probe failure
450
+ // should not permanently block the account from re-probing after cooldown.
451
+ const rl = health.rateLimitState
452
+ if (rl.probeInFlight) {
453
+ rl.probeInFlight = false
454
+ }
455
+ if (classifiedError?.retryAfterSec) {
456
+ this._retryAfterMap.set(accountId, Date.now() + classifiedError.retryAfterSec * 1000)
457
+ }
458
+ }
459
+
460
+ /**
461
+ * Handle a 429 for an unknown-telemetry account.
462
+ * Manages the rolling window, threshold, cooldown escalation, and probe state.
463
+ * Uses `rateLimitState.cooldownUntilMs` (NOT _retryAfterMap) as source of truth.
464
+ *
465
+ * @private
466
+ * @param {string} accountId
467
+ * @param {AccountHealth} health
468
+ */
469
+ _recordUnknown429(accountId, health) {
470
+ const rl = health.rateLimitState
471
+ const now = Date.now()
472
+
473
+ // If a probe was in flight when this 429 happened, it means the probe failed.
474
+ // Escalate cooldown and clear probe lock.
475
+ if (rl.probeInFlight) {
476
+ rl.probeInFlight = false
477
+ // Increment cooldown level (probe failure = another escalation step)
478
+ rl.cooldownLevel++
479
+ const cooldownMs = Math.min(UNKNOWN_COOLDOWN_BASE_MS * Math.pow(2, rl.cooldownLevel - 1), UNKNOWN_COOLDOWN_MAX_MS)
480
+ rl.cooldownUntilMs = now + cooldownMs
481
+ // Reset consecutive counter for next window
482
+ rl.consecutive429 = 0
483
+ rl.windowStartMs = now
484
+ return
485
+ }
486
+
487
+ // Rolling window: reset if window has expired
488
+ if (rl.windowStartMs === 0 || now - rl.windowStartMs > UNKNOWN_429_WINDOW_MS) {
489
+ rl.consecutive429 = 0
490
+ rl.windowStartMs = now
491
+ }
492
+
493
+ rl.consecutive429++
494
+
495
+ if (rl.consecutive429 >= UNKNOWN_429_THRESHOLD) {
496
+ // Threshold reached: enter cooldown
497
+ const cooldownMs = Math.min(UNKNOWN_COOLDOWN_BASE_MS * Math.pow(2, rl.cooldownLevel), UNKNOWN_COOLDOWN_MAX_MS)
498
+ rl.cooldownUntilMs = now + cooldownMs
499
+ // Increment level for next occurrence
500
+ rl.cooldownLevel++
501
+ // Reset window for next cycle
502
+ rl.consecutive429 = 0
503
+ rl.windowStartMs = now
504
+ }
505
+ }
506
+
507
+ /**
508
+ * Record a successful request against an account.
509
+ * Clears temporary exhaustion state (cooldown, probe lock, 429 counters).
510
+ *
511
+ * @param {string} accountId
512
+ * @param {number} [latencyMs] - Round-trip time in milliseconds (optional)
513
+ */
514
+ recordSuccess(accountId, latencyMs = 0) {
515
+ const health = this._healthMap.get(accountId)
516
+ if (!health) return
517
+
518
+ health.successCount++
519
+ health.totalLatencyMs += latencyMs
520
+ health.circuitBreaker.recordSuccess()
521
+
522
+ // Clear temporary exhaustion state on any successful request
523
+ const rl = health.rateLimitState
524
+ rl.cooldownUntilMs = 0
525
+ rl.consecutive429 = 0
526
+ rl.windowStartMs = 0
527
+ rl.probeInFlight = false
528
+ // Note: cooldownLevel intentionally preserved for future escalation tracking;
529
+ // reset it only when we have high confidence the quota has genuinely recovered.
530
+ // (For now, a successful probe is considered a full reset.)
531
+ rl.cooldownLevel = 0
532
+
533
+ // Clear retry-after from the retryAfterMap (no longer cooling down)
534
+ this._retryAfterMap.delete(accountId)
535
+ }
536
+
537
+ /**
538
+ * Get the current health snapshot for an account.
539
+ *
540
+ * @param {string} accountId
541
+ * @returns {{ score: number, quotaPercent: number|null, quotaSignal: string, disabled: boolean } | null}
542
+ */
543
+ getHealth(accountId) {
544
+ const health = this._healthMap.get(accountId)
545
+ if (!health) return null
546
+ return {
547
+ score: health.computeScore(),
548
+ quotaPercent: health.quotaPercent,
549
+ quotaSignal: health.quotaSignal,
550
+ disabled: health.disabled,
551
+ }
552
+ }
553
+
554
+ /**
555
+ * Get a snapshot of health for all accounts, keyed by account id.
556
+ *
557
+ * Each entry includes at minimum `{ score, quotaPercent, quotaSignal, disabled }`.
558
+ * If the account has `providerKey` and `modelId`, those are included too.
559
+ *
560
+ * @returns {Record<string, { score: number, quotaPercent: number|null, quotaSignal: string, disabled: boolean, providerKey?: string, modelId?: string }>}
561
+ */
562
+ getAllHealth() {
563
+ const snapshot = {}
564
+ for (const acct of this._accounts) {
565
+ const health = this._healthMap.get(acct.id)
566
+ if (!health) continue
567
+ const entry = {
568
+ score: health.computeScore(),
569
+ quotaPercent: health.quotaPercent,
570
+ quotaSignal: health.quotaSignal,
571
+ disabled: health.disabled,
572
+ }
573
+ if (acct.providerKey !== undefined) entry.providerKey = acct.providerKey
574
+ if (acct.modelId !== undefined) entry.modelId = acct.modelId
575
+ snapshot[acct.id] = entry
576
+ }
577
+ return snapshot
578
+ }
579
+
580
+ /**
581
+ * Get the remaining retry-after cooldown for an account in seconds.
582
+ * Returns 0 if no cooldown is active.
583
+ *
584
+ * Checks both known-telemetry retryAfterMap and unknown-telemetry cooldownUntilMs.
585
+ *
586
+ * @param {string} accountId
587
+ * @returns {number}
588
+ */
589
+ getRetryAfter(accountId) {
590
+ const retryAfterTs = this._retryAfterMap.get(accountId)
591
+ const health = this._healthMap.get(accountId)
592
+ const cooldownUntilMs = health?.rateLimitState?.cooldownUntilMs ?? 0
593
+
594
+ // Return the longer of the two active cooldowns
595
+ const fromRetryAfter = retryAfterTs ? Math.max(0, (retryAfterTs - Date.now()) / 1000) : 0
596
+ const fromCooldown = cooldownUntilMs > 0 ? Math.max(0, (cooldownUntilMs - Date.now()) / 1000) : 0
597
+
598
+ return Math.max(fromRetryAfter, fromCooldown)
599
+ }
600
+ }