free-coding-models 0.3.9 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,634 +0,0 @@
1
- /**
2
- * @file lib/account-manager.js
3
- * @description Multi-account health tracking and selection for proxy rotation.
4
- *
5
- * Tracks per-account health (success rate, latency, quota) and uses
6
- * Power-of-2-Choices (P2C) to select the best available account.
7
- * Supports sticky sessions via an LRU map, circuit breakers per account,
8
- * and retry-after cooldown periods.
9
- *
10
- * ## Rate-limit state machine (per account)
11
- *
12
- * Unknown-telemetry providers (huggingface, cloudflare, etc.):
13
- * - Count consecutive 429s within a rolling 10-minute window.
14
- * - On reaching threshold (3), enter temporary cooldown starting at 15m.
15
- * - Cooldown escalates: 15m → 30m → 60m → ... capped at 6h.
16
- * - After cooldown expires, allow ONE probe request (half-open).
17
- * - Probe success → reset all state (active again).
18
- * - Probe failure (429) → escalate cooldown, clear probe lock.
19
- *
20
- * Known-telemetry providers (groq, nvidia, etc.):
21
- * - Honor `retry-after` header if present.
22
- * - If absent, use 5m fallback cooldown.
23
- * - 429s do NOT count toward unknown-telemetry threshold.
24
- *
25
- * Permanent disable:
26
- * - Only for AUTH_ERROR (401/403) with skipAccount=true AND no rateLimitConfidence.
27
- *
28
- * @exports AccountManager
29
- */
30
-
31
- import { CircuitBreaker } from './error-classifier.js'
32
- import { isKnownQuotaTelemetry } from './quota-capabilities.js'
33
-
34
- // ─── Rate-limit state machine constants ──────────────────────────────────────
35
-
36
- const UNKNOWN_429_THRESHOLD = 3 // consecutive 429s in window before cooldown
37
- const UNKNOWN_429_WINDOW_MS = 10 * 60 * 1000 // 10-minute rolling window
38
- const UNKNOWN_COOLDOWN_BASE_MS = 15 * 60 * 1000 // 15m initial cooldown
39
- const UNKNOWN_COOLDOWN_MAX_MS = 6 * 60 * 60 * 1000 // 6h max cooldown
40
- const KNOWN_FALLBACK_COOLDOWN_MS = 5 * 60 * 1000 // 5m fallback for known providers
41
-
42
- // 📖 Generic consecutive-failure cooldown constants.
43
- // When an account accumulates FAILURE_COOLDOWN_THRESHOLD consecutive non-429 failures,
44
- // it enters a graduated cooldown (30s → 60s → 120s) so the proxy routes around it.
45
- const FAILURE_COOLDOWN_THRESHOLD = 3
46
- const FAILURE_COOLDOWN_STEPS_MS = [30_000, 60_000, 120_000]
47
-
48
- // ─── Internal: per-account health state ──────────────────────────────────────
49
-
50
- class AccountHealth {
51
- /**
52
- * @param {number} cbThreshold - Circuit breaker failure threshold
53
- * @param {number} cbCooldownMs - Circuit breaker cooldown in ms
54
- */
55
- constructor(cbThreshold, cbCooldownMs) {
56
- this.successCount = 0
57
- this.failureCount = 0
58
- this.totalLatencyMs = 0
59
- /**
60
- * Remaining quota as a percentage 0–100, or null when unknown.
61
- * null means we have not yet received quota telemetry for this account.
62
- */
63
- this.quotaPercent = null
64
- /**
65
- * Quota signal reliability:
66
- * - 'known' — quota was updated from verifiable headers/endpoint
67
- * - 'inferred' — quota was estimated from error patterns (future use)
68
- * - 'unknown' — no quota data received yet
69
- */
70
- this.quotaSignal = 'unknown'
71
- /** When true, this account is permanently disabled (e.g. auth failure). */
72
- this.disabled = false
73
- this.circuitBreaker = new CircuitBreaker({
74
- threshold: cbThreshold,
75
- cooldownMs: cbCooldownMs,
76
- })
77
-
78
- /**
79
- * Per-account rate-limit state machine for 429 handling.
80
- *
81
- * - consecutive429: count of consecutive 429s in the current rolling window
82
- * - windowStartMs: start time of the current rolling window (ms epoch)
83
- * - cooldownLevel: escalation level (0 = first occurrence → 15m, 1 → 30m, ...)
84
- * - cooldownUntilMs: epoch ms when cooldown expires (0 = not in cooldown)
85
- * - probeInFlight: true while one half-open probe is in flight
86
- */
87
- this.rateLimitState = {
88
- consecutive429: 0,
89
- windowStartMs: 0,
90
- cooldownLevel: 0,
91
- cooldownUntilMs: 0,
92
- probeInFlight: false,
93
- }
94
-
95
- /**
96
- * 📖 Generic consecutive-failure tracking for non-429 errors.
97
- * When consecutiveFailures >= FAILURE_COOLDOWN_THRESHOLD, the account enters
98
- * a graduated cooldown to avoid wasting requests on a failing endpoint.
99
- */
100
- this.consecutiveFailures = 0
101
- this.failureCooldownUntilMs = 0
102
- this.failureCooldownLevel = 0
103
- }
104
-
105
- /**
106
- * Health score in roughly [0, 1].
107
- *
108
- * Formula:
109
- * 0.4 * successRate + 0.3 * latencyScore + 0.3 * quotaScore − penalty
110
- *
111
- * Where:
112
- * successRate = successes / (successes + failures), default 1.0 if no requests
113
- * latencyScore = 1 − min(avgLatencyMs / 5000, 1) (lower = better)
114
- * quotaScore:
115
- * - When quotaSignal is 'known': quotaPercent / 100
116
- * - When quotaSignal is 'unknown': 0.5 (neutral — not best, not worst)
117
- * penalty:
118
- * - 0.5 if known quotaPercent < 20%
119
- * - 0.3 if known quotaPercent < 35%
120
- * - 0 otherwise / unknown
121
- *
122
- * @returns {number}
123
- */
124
- computeScore() {
125
- const total = this.successCount + this.failureCount
126
- const successRate = total === 0 ? 1.0 : this.successCount / total
127
- const avgLatencyMs = total === 0 ? 0 : this.totalLatencyMs / total
128
- const latencyScore = 1 - Math.min(avgLatencyMs / 5000, 1)
129
-
130
- let quotaScore
131
- let penalty = 0
132
-
133
- if (this.quotaSignal === 'known' && this.quotaPercent !== null) {
134
- quotaScore = this.quotaPercent / 100
135
- if (this.quotaPercent < 20) penalty = 0.5
136
- else if (this.quotaPercent < 35) penalty = 0.3
137
- } else {
138
- // Unknown quota: treat as neutral (0.5) — do not assume best-case
139
- quotaScore = 0.5
140
- }
141
-
142
- return 0.4 * successRate + 0.3 * latencyScore + 0.3 * quotaScore - penalty
143
- }
144
- }
145
-
146
- // ─── LRU Map helper ───────────────────────────────────────────────────────────
147
- // Uses plain Map (insertion-ordered). To access: delete then re-set (moves to end).
148
- // To insert new: evict first key if at capacity.
149
-
150
- /**
151
- * Read from LRU map, moving the entry to "most recently used" position.
152
- * Returns undefined if key is absent.
153
- *
154
- * @param {Map<string, string>} map
155
- * @param {string} key
156
- * @returns {string|undefined}
157
- */
158
- function lruGet(map, key) {
159
- if (!map.has(key)) return undefined
160
- const val = map.get(key)
161
- map.delete(key)
162
- map.set(key, val)
163
- return val
164
- }
165
-
166
- /**
167
- * Write to LRU map. If the key already exists, move it to the end.
168
- * If the map is at capacity (and key is new), evict the oldest entry first.
169
- *
170
- * @param {Map<string, string>} map
171
- * @param {string} key
172
- * @param {string} value
173
- * @param {number} maxSize
174
- */
175
- function lruSet(map, key, value, maxSize) {
176
- if (map.has(key)) {
177
- // Update value and move to end
178
- map.delete(key)
179
- } else if (map.size >= maxSize) {
180
- // Evict oldest (first) entry
181
- const oldest = map.keys().next().value
182
- map.delete(oldest)
183
- }
184
- map.set(key, value)
185
- }
186
-
187
- // ─── AccountManager ───────────────────────────────────────────────────────────
188
-
189
- export class AccountManager {
190
- /**
191
- * @param {Array<{ id: string, providerKey: string, apiKey: string, modelId: string, url: string }>} accounts
192
- * @param {{ circuitBreakerThreshold?: number, circuitBreakerCooldownMs?: number, maxStickySessions?: number }} [opts]
193
- */
194
- constructor(accounts, opts = {}) {
195
- const {
196
- circuitBreakerThreshold = 5,
197
- circuitBreakerCooldownMs = 60000,
198
- maxStickySessions = 1000,
199
- } = opts
200
-
201
- this._accounts = accounts
202
- this._maxStickySessions = maxStickySessions
203
-
204
- /** @type {Map<string, AccountHealth>} */
205
- this._healthMap = new Map()
206
- for (const acct of accounts) {
207
- this._healthMap.set(
208
- acct.id,
209
- new AccountHealth(circuitBreakerThreshold, circuitBreakerCooldownMs)
210
- )
211
- }
212
-
213
- /** LRU Map: fingerprint → accountId */
214
- this._stickyMap = new Map()
215
-
216
- /** Map: accountId → retryAfter epoch ms */
217
- this._retryAfterMap = new Map()
218
- }
219
-
220
- /**
221
- * Returns true when an account can serve the requested proxy model.
222
- *
223
- * Supports both:
224
- * - `account.proxyModelId` (logical fcm-proxy slug)
225
- * - `account.modelId` (upstream model id, backward compatibility)
226
- *
227
- * @private
228
- * @param {{ proxyModelId?: string, modelId?: string }} acct
229
- * @param {string|undefined} requestedModel
230
- * @returns {boolean}
231
- */
232
- _accountSupportsModel(acct, requestedModel) {
233
- if (!requestedModel) return true
234
- if (acct.proxyModelId === requestedModel) return true
235
- if (acct.modelId === requestedModel) return true
236
- return false
237
- }
238
-
239
- // ─── Private helpers ──────────────────────────────────────────────────────
240
-
241
- /**
242
- * Returns true if the account can currently accept requests.
243
- * Checks: not disabled, circuit breaker not open, not in retry-after cooldown,
244
- * and quota > 5% remaining.
245
- *
246
- * Also manages the half-open probe window:
247
- * - When cooldown has expired but probeInFlight is false, allow ONE probe
248
- * and mark probeInFlight = true.
249
- * - While probeInFlight is true, block further selection of this account.
250
- *
251
- * @param {{ id: string }} acct
252
- * @returns {boolean}
253
- */
254
- _isAccountAvailable(acct) {
255
- const health = this._healthMap.get(acct.id)
256
- if (!health) return false
257
- if (health.disabled) return false
258
- if (health.circuitBreaker.isOpen()) return false
259
-
260
- // Half-open probe logic for temporary exhaustion cooldown (unknown telemetry)
261
- // This takes priority over _retryAfterMap for unknown-telemetry cooldowns.
262
- const rl = health.rateLimitState
263
- if (rl.cooldownUntilMs > 0) {
264
- const now = Date.now()
265
- if (now < rl.cooldownUntilMs) {
266
- // Still in cooldown
267
- return false
268
- }
269
- // Cooldown expired — allow one probe if none in flight
270
- if (rl.probeInFlight) {
271
- // Another probe is already in flight for this account — block
272
- return false
273
- }
274
- // Arm the probe: mark it in-flight so subsequent selects are blocked
275
- rl.probeInFlight = true
276
- // fall through — this request IS the probe
277
- }
278
-
279
- // Known-telemetry retry-after cooldown (set via _retryAfterMap)
280
- const retryAfterTs = this._retryAfterMap.get(acct.id)
281
- if (retryAfterTs && Date.now() < retryAfterTs) return false
282
-
283
- // 📖 Generic failure cooldown — blocks account after consecutive non-429 failures
284
- if (health.failureCooldownUntilMs > 0 && Date.now() < health.failureCooldownUntilMs) return false
285
-
286
- // Only exclude when quota is known to be nearly exhausted.
287
- // When quotaSignal is 'unknown' (null quotaPercent), we remain available.
288
- if (health.quotaSignal === 'known' && health.quotaPercent !== null && health.quotaPercent <= 5) return false
289
-
290
- return true
291
- }
292
-
293
- // ─── Public API ───────────────────────────────────────────────────────────
294
-
295
- /**
296
- * Select the best available account.
297
- *
298
- * Algorithm:
299
- * 1. If `sessionFingerprint` is set and a sticky entry exists for it,
300
- * return the sticky account if it is healthy. Otherwise fall through.
301
- * 2. Filter all accounts to those that are currently available.
302
- * 3. If none available, return null.
303
- * 4. Power-of-2-Choices (P2C): sample 2 random candidates, return the
304
- * one with the higher health score. (If only 1 available, return it.)
305
- * 5. If `sessionFingerprint` is set, store the selection in the LRU map.
306
- *
307
- * @param {{ sessionFingerprint?: string, requestedModel?: string }} [opts]
308
- * @returns {{ id: string, providerKey: string, apiKey: string, modelId: string, url: string, proxyModelId?: string } | null}
309
- */
310
- selectAccount({ sessionFingerprint, requestedModel } = {}) {
311
- const stickyKey = sessionFingerprint
312
- ? (requestedModel ? `${requestedModel}::${sessionFingerprint}` : sessionFingerprint)
313
- : null
314
-
315
- // 1. Sticky session fast-path
316
- if (stickyKey) {
317
- const stickyId = lruGet(this._stickyMap, stickyKey)
318
- if (stickyId !== undefined) {
319
- const stickyAcct = this._accounts.find(a => a.id === stickyId)
320
- if (stickyAcct && this._accountSupportsModel(stickyAcct, requestedModel) && this._isAccountAvailable(stickyAcct)) {
321
- // Break sticky if the account's health score is below the slow-provider threshold
322
- const stickyScore = this._healthMap.get(stickyAcct.id)?.computeScore() ?? 1.0
323
- if (stickyScore >= 0.65) {
324
- return stickyAcct
325
- }
326
- // Score too low (slow/degraded provider) — fall through to P2C re-selection
327
- }
328
- // Sticky account is unhealthy — fall through to P2C
329
- }
330
- }
331
-
332
- // 2. Filter to available accounts.
333
- // If a model is explicitly requested but has no accounts, return null immediately
334
- // rather than silently falling back to the full pool (which would serve the wrong model).
335
- const candidatePool = requestedModel
336
- ? this._accounts.filter(a => this._accountSupportsModel(a, requestedModel))
337
- : this._accounts
338
- if (requestedModel && candidatePool.length === 0) return null
339
- const available = candidatePool.filter(a => this._isAccountAvailable(a))
340
- if (available.length === 0) return null
341
-
342
- // 3. P2C selection
343
- let selected
344
- if (available.length === 1) {
345
- selected = available[0]
346
- } else {
347
- // Pick two distinct random indices
348
- const idx1 = Math.floor(Math.random() * available.length)
349
- let idx2 = Math.floor(Math.random() * (available.length - 1))
350
- if (idx2 >= idx1) idx2++
351
-
352
- const a = available[idx1]
353
- const b = available[idx2]
354
- const scoreA = this._healthMap.get(a.id).computeScore()
355
- const scoreB = this._healthMap.get(b.id).computeScore()
356
- selected = scoreA >= scoreB ? a : b
357
- }
358
-
359
- // 4. Store/update sticky entry
360
- if (stickyKey) {
361
- lruSet(this._stickyMap, stickyKey, selected.id, this._maxStickySessions)
362
- }
363
-
364
- return selected
365
- }
366
-
367
- /**
368
- * Return true if at least one account is registered for the given model.
369
- * Uses the same matching logic as selectAccount: proxyModelId or modelId.
370
- *
371
- * @param {string} requestedModel
372
- * @returns {boolean}
373
- */
374
- hasAccountsForModel(requestedModel) {
375
- if (!requestedModel) return this._accounts.length > 0
376
- return this._accounts.some(a => this._accountSupportsModel(a, requestedModel))
377
- }
378
-
379
- /**
380
- * Update an account's remaining quota from rate-limit response headers.
381
- * Reads common header variants:
382
- * - x-ratelimit-remaining / x-ratelimit-limit
383
- * - x-ratelimit-remaining-requests / x-ratelimit-limit-requests
384
- *
385
- * @param {string} accountId
386
- * @param {Record<string, string>} headers - Lowercased response headers
387
- * @returns {boolean} true when quota was updated from headers
388
- */
389
- updateQuota(accountId, headers) {
390
- const remainingRaw =
391
- headers?.['x-ratelimit-remaining'] ??
392
- headers?.['x-ratelimit-remaining-requests']
393
- const limitRaw =
394
- headers?.['x-ratelimit-limit'] ??
395
- headers?.['x-ratelimit-limit-requests']
396
-
397
- const remaining = parseFloat(remainingRaw)
398
- const limit = parseFloat(limitRaw)
399
- if (!isNaN(remaining) && !isNaN(limit) && limit > 0) {
400
- const health = this._healthMap.get(accountId)
401
- if (health) {
402
- health.quotaPercent = Math.round((remaining / limit) * 100)
403
- health.quotaSignal = 'known'
404
- return true
405
- }
406
- }
407
- return false
408
- }
409
-
410
- /**
411
- * Record a failed request against an account.
412
- *
413
- * Implements provider-aware 429 policy:
414
- *
415
- * Auth errors (401/403):
416
- * - Permanently disable the account (health.disabled = true).
417
- *
418
- * Known-telemetry 429s (groq, nvidia, cerebras, etc.):
419
- * - Honor `retry-after` if present; else use 5m fallback cooldown.
420
- * - Do NOT count toward the unknown-telemetry threshold.
421
- *
422
- * Unknown-telemetry 429s (huggingface, cloudflare, etc.):
423
- * - Count consecutive 429s in a rolling 10-minute window.
424
- * - On reaching threshold (3): enter temporary cooldown (15m initial, x2 each time, cap 6h).
425
- * - If a probe was in-flight when the 429 occurred: escalate cooldown, clear probe lock.
426
- *
427
- * @param {string} accountId
428
- * @param {{ type: string, shouldRetry: boolean, skipAccount: boolean, retryAfterSec: number|null, rateLimitConfidence?: string }} classifiedError
429
- * @param {{ providerKey?: string }} [accountCtx] - Provider context for telemetry routing
430
- */
431
- recordFailure(accountId, classifiedError, accountCtx = {}) {
432
- const health = this._healthMap.get(accountId)
433
- if (!health) return
434
-
435
- health.failureCount++
436
- health.circuitBreaker.recordFailure()
437
-
438
- const is429 = classifiedError?.type === 'RATE_LIMITED' || classifiedError?.type === 'QUOTA_EXHAUSTED'
439
- const isAuthFatal = classifiedError?.type === 'AUTH_ERROR'
440
-
441
- // Permanent disable only for AUTH_ERROR (401/403). Other skipAccount errors
442
- // (e.g. MODEL_NOT_FOUND) do NOT permanently disable — the account may be
443
- // healthy for other models/requests.
444
- if (isAuthFatal) {
445
- health.disabled = true
446
- return
447
- }
448
-
449
- if (is429) {
450
- const providerKey = accountCtx?.providerKey ?? ''
451
- const hasKnownTelemetry = isKnownQuotaTelemetry(providerKey)
452
-
453
- if (hasKnownTelemetry) {
454
- // Known-telemetry: use retry-after or 5m fallback
455
- const cooldownMs = classifiedError?.retryAfterSec
456
- ? classifiedError.retryAfterSec * 1000
457
- : KNOWN_FALLBACK_COOLDOWN_MS
458
- this._retryAfterMap.set(accountId, Date.now() + cooldownMs)
459
- } else {
460
- // Unknown-telemetry: rolling window + threshold + escalating cooldown
461
- this._recordUnknown429(accountId, health)
462
- }
463
- return
464
- }
465
-
466
- // Non-429 retryable errors: apply retryAfterSec if present.
467
- // Also clear probeInFlight if one was in flight — a non-429 probe failure
468
- // should not permanently block the account from re-probing after cooldown.
469
- const rl = health.rateLimitState
470
- if (rl.probeInFlight) {
471
- rl.probeInFlight = false
472
- }
473
- if (classifiedError?.retryAfterSec) {
474
- this._retryAfterMap.set(accountId, Date.now() + classifiedError.retryAfterSec * 1000)
475
- }
476
-
477
- // 📖 Generic consecutive-failure cooldown: when an account hits FAILURE_COOLDOWN_THRESHOLD
478
- // consecutive non-429 failures, put it in graduated cooldown (30s → 60s → 120s)
479
- // so the proxy routes around it instead of wasting requests.
480
- health.consecutiveFailures++
481
- if (health.consecutiveFailures >= FAILURE_COOLDOWN_THRESHOLD) {
482
- const stepIdx = Math.min(health.failureCooldownLevel, FAILURE_COOLDOWN_STEPS_MS.length - 1)
483
- health.failureCooldownUntilMs = Date.now() + FAILURE_COOLDOWN_STEPS_MS[stepIdx]
484
- health.failureCooldownLevel++
485
- health.consecutiveFailures = 0
486
- }
487
- }
488
-
489
- /**
490
- * Handle a 429 for an unknown-telemetry account.
491
- * Manages the rolling window, threshold, cooldown escalation, and probe state.
492
- * Uses `rateLimitState.cooldownUntilMs` (NOT _retryAfterMap) as source of truth.
493
- *
494
- * @private
495
- * @param {string} accountId
496
- * @param {AccountHealth} health
497
- */
498
- _recordUnknown429(accountId, health) {
499
- const rl = health.rateLimitState
500
- const now = Date.now()
501
-
502
- // If a probe was in flight when this 429 happened, it means the probe failed.
503
- // Escalate cooldown and clear probe lock.
504
- if (rl.probeInFlight) {
505
- rl.probeInFlight = false
506
- // Increment cooldown level (probe failure = another escalation step)
507
- rl.cooldownLevel++
508
- const cooldownMs = Math.min(UNKNOWN_COOLDOWN_BASE_MS * Math.pow(2, rl.cooldownLevel - 1), UNKNOWN_COOLDOWN_MAX_MS)
509
- rl.cooldownUntilMs = now + cooldownMs
510
- // Reset consecutive counter for next window
511
- rl.consecutive429 = 0
512
- rl.windowStartMs = now
513
- return
514
- }
515
-
516
- // Rolling window: reset if window has expired
517
- if (rl.windowStartMs === 0 || now - rl.windowStartMs > UNKNOWN_429_WINDOW_MS) {
518
- rl.consecutive429 = 0
519
- rl.windowStartMs = now
520
- }
521
-
522
- rl.consecutive429++
523
-
524
- if (rl.consecutive429 >= UNKNOWN_429_THRESHOLD) {
525
- // Threshold reached: enter cooldown
526
- const cooldownMs = Math.min(UNKNOWN_COOLDOWN_BASE_MS * Math.pow(2, rl.cooldownLevel), UNKNOWN_COOLDOWN_MAX_MS)
527
- rl.cooldownUntilMs = now + cooldownMs
528
- // Increment level for next occurrence
529
- rl.cooldownLevel++
530
- // Reset window for next cycle
531
- rl.consecutive429 = 0
532
- rl.windowStartMs = now
533
- }
534
- }
535
-
536
- /**
537
- * Record a successful request against an account.
538
- * Clears temporary exhaustion state (cooldown, probe lock, 429 counters).
539
- *
540
- * @param {string} accountId
541
- * @param {number} [latencyMs] - Round-trip time in milliseconds (optional)
542
- */
543
- recordSuccess(accountId, latencyMs = 0) {
544
- const health = this._healthMap.get(accountId)
545
- if (!health) return
546
-
547
- health.successCount++
548
- health.totalLatencyMs += latencyMs
549
- health.circuitBreaker.recordSuccess()
550
-
551
- // Clear temporary exhaustion state on any successful request
552
- const rl = health.rateLimitState
553
- rl.cooldownUntilMs = 0
554
- rl.consecutive429 = 0
555
- rl.windowStartMs = 0
556
- rl.probeInFlight = false
557
- // Note: cooldownLevel intentionally preserved for future escalation tracking;
558
- // reset it only when we have high confidence the quota has genuinely recovered.
559
- // (For now, a successful probe is considered a full reset.)
560
- rl.cooldownLevel = 0
561
-
562
- // Clear retry-after from the retryAfterMap (no longer cooling down)
563
- this._retryAfterMap.delete(accountId)
564
-
565
- // 📖 Reset generic failure cooldown state on success
566
- health.consecutiveFailures = 0
567
- health.failureCooldownUntilMs = 0
568
- health.failureCooldownLevel = 0
569
- }
570
-
571
- /**
572
- * Get the current health snapshot for an account.
573
- *
574
- * @param {string} accountId
575
- * @returns {{ score: number, quotaPercent: number|null, quotaSignal: string, disabled: boolean } | null}
576
- */
577
- getHealth(accountId) {
578
- const health = this._healthMap.get(accountId)
579
- if (!health) return null
580
- return {
581
- score: health.computeScore(),
582
- quotaPercent: health.quotaPercent,
583
- quotaSignal: health.quotaSignal,
584
- disabled: health.disabled,
585
- }
586
- }
587
-
588
- /**
589
- * Get a snapshot of health for all accounts, keyed by account id.
590
- *
591
- * Each entry includes at minimum `{ score, quotaPercent, quotaSignal, disabled }`.
592
- * If the account has `providerKey` and `modelId`, those are included too.
593
- *
594
- * @returns {Record<string, { score: number, quotaPercent: number|null, quotaSignal: string, disabled: boolean, providerKey?: string, modelId?: string }>}
595
- */
596
- getAllHealth() {
597
- const snapshot = {}
598
- for (const acct of this._accounts) {
599
- const health = this._healthMap.get(acct.id)
600
- if (!health) continue
601
- const entry = {
602
- score: health.computeScore(),
603
- quotaPercent: health.quotaPercent,
604
- quotaSignal: health.quotaSignal,
605
- disabled: health.disabled,
606
- }
607
- if (acct.providerKey !== undefined) entry.providerKey = acct.providerKey
608
- if (acct.modelId !== undefined) entry.modelId = acct.modelId
609
- snapshot[acct.id] = entry
610
- }
611
- return snapshot
612
- }
613
-
614
- /**
615
- * Get the remaining retry-after cooldown for an account in seconds.
616
- * Returns 0 if no cooldown is active.
617
- *
618
- * Checks both known-telemetry retryAfterMap and unknown-telemetry cooldownUntilMs.
619
- *
620
- * @param {string} accountId
621
- * @returns {number}
622
- */
623
- getRetryAfter(accountId) {
624
- const retryAfterTs = this._retryAfterMap.get(accountId)
625
- const health = this._healthMap.get(accountId)
626
- const cooldownUntilMs = health?.rateLimitState?.cooldownUntilMs ?? 0
627
-
628
- // Return the longer of the two active cooldowns
629
- const fromRetryAfter = retryAfterTs ? Math.max(0, (retryAfterTs - Date.now()) / 1000) : 0
630
- const fromCooldown = cooldownUntilMs > 0 ? Math.max(0, (cooldownUntilMs - Date.now()) / 1000) : 0
631
-
632
- return Math.max(fromRetryAfter, fromCooldown)
633
- }
634
- }