@tokenbuddy/tokenbuddy 1.0.11 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/dist/src/buyer-store.d.ts +61 -0
  2. package/dist/src/buyer-store.d.ts.map +1 -1
  3. package/dist/src/buyer-store.js +12 -0
  4. package/dist/src/buyer-store.js.map +1 -1
  5. package/dist/src/cli.d.ts +47 -0
  6. package/dist/src/cli.d.ts.map +1 -1
  7. package/dist/src/cli.js +287 -63
  8. package/dist/src/cli.js.map +1 -1
  9. package/dist/src/credit-tracker.d.ts +26 -0
  10. package/dist/src/credit-tracker.d.ts.map +1 -1
  11. package/dist/src/credit-tracker.js +8 -0
  12. package/dist/src/credit-tracker.js.map +1 -1
  13. package/dist/src/daemon.d.ts +29 -3
  14. package/dist/src/daemon.d.ts.map +1 -1
  15. package/dist/src/daemon.js +292 -65
  16. package/dist/src/daemon.js.map +1 -1
  17. package/dist/src/doctor-clawtip-wallet.d.ts +25 -0
  18. package/dist/src/doctor-clawtip-wallet.d.ts.map +1 -1
  19. package/dist/src/doctor-clawtip-wallet.js +13 -0
  20. package/dist/src/doctor-clawtip-wallet.js.map +1 -1
  21. package/dist/src/doctor-diagnostics.d.ts +63 -0
  22. package/dist/src/doctor-diagnostics.d.ts.map +1 -1
  23. package/dist/src/doctor-diagnostics.js +39 -1
  24. package/dist/src/doctor-diagnostics.js.map +1 -1
  25. package/dist/src/index.d.ts +4 -0
  26. package/dist/src/index.d.ts.map +1 -1
  27. package/dist/src/index.js +4 -0
  28. package/dist/src/index.js.map +1 -1
  29. package/dist/src/init-clawtip-activation.d.ts +103 -0
  30. package/dist/src/init-clawtip-activation.d.ts.map +1 -1
  31. package/dist/src/init-clawtip-activation.js +60 -0
  32. package/dist/src/init-clawtip-activation.js.map +1 -1
  33. package/dist/src/init-payment-options.d.ts +124 -0
  34. package/dist/src/init-payment-options.d.ts.map +1 -1
  35. package/dist/src/init-payment-options.js +68 -0
  36. package/dist/src/init-payment-options.js.map +1 -1
  37. package/dist/src/model-index.d.ts +9 -0
  38. package/dist/src/model-index.d.ts.map +1 -1
  39. package/dist/src/model-index.js.map +1 -1
  40. package/dist/src/prewarm-cache.d.ts +89 -0
  41. package/dist/src/prewarm-cache.d.ts.map +1 -1
  42. package/dist/src/prewarm-cache.js +14 -1
  43. package/dist/src/prewarm-cache.js.map +1 -1
  44. package/dist/src/prewarm-scheduler.d.ts +62 -3
  45. package/dist/src/prewarm-scheduler.d.ts.map +1 -1
  46. package/dist/src/prewarm-scheduler.js +39 -8
  47. package/dist/src/prewarm-scheduler.js.map +1 -1
  48. package/dist/src/provider-install.d.ts +89 -3
  49. package/dist/src/provider-install.d.ts.map +1 -1
  50. package/dist/src/provider-install.js +77 -17
  51. package/dist/src/provider-install.js.map +1 -1
  52. package/dist/src/route-failover.d.ts +48 -0
  53. package/dist/src/route-failover.d.ts.map +1 -1
  54. package/dist/src/route-failover.js.map +1 -1
  55. package/dist/src/seller-catalog.d.ts +158 -10
  56. package/dist/src/seller-catalog.d.ts.map +1 -1
  57. package/dist/src/seller-catalog.js +79 -5
  58. package/dist/src/seller-catalog.js.map +1 -1
  59. package/dist/src/seller-metadata-cache.d.ts +29 -0
  60. package/dist/src/seller-metadata-cache.d.ts.map +1 -0
  61. package/dist/src/seller-metadata-cache.js +71 -0
  62. package/dist/src/seller-metadata-cache.js.map +1 -0
  63. package/dist/src/seller-pool.d.ts +71 -0
  64. package/dist/src/seller-pool.d.ts.map +1 -1
  65. package/dist/src/seller-pool.js +6 -1
  66. package/dist/src/seller-pool.js.map +1 -1
  67. package/dist/src/seller-route-planner.d.ts +118 -0
  68. package/dist/src/seller-route-planner.d.ts.map +1 -0
  69. package/dist/src/seller-route-planner.js +160 -0
  70. package/dist/src/seller-route-planner.js.map +1 -0
  71. package/dist/src/seller-routing-config.d.ts +69 -0
  72. package/dist/src/seller-routing-config.d.ts.map +1 -0
  73. package/dist/src/seller-routing-config.js +164 -0
  74. package/dist/src/seller-routing-config.js.map +1 -0
  75. package/dist/src/seller-routing-strategy.d.ts +118 -0
  76. package/dist/src/seller-routing-strategy.d.ts.map +1 -0
  77. package/dist/src/seller-routing-strategy.js +183 -0
  78. package/dist/src/seller-routing-strategy.js.map +1 -0
  79. package/dist/src/stream-failover.d.ts +23 -0
  80. package/dist/src/stream-failover.d.ts.map +1 -1
  81. package/dist/src/stream-failover.js +4 -0
  82. package/dist/src/stream-failover.js.map +1 -1
  83. package/dist/src/tb-proxyd.js +7 -21
  84. package/dist/src/tb-proxyd.js.map +1 -1
  85. package/dist/src/terminal-detect.d.ts +51 -0
  86. package/dist/src/terminal-detect.d.ts.map +1 -1
  87. package/dist/src/terminal-detect.js +42 -0
  88. package/dist/src/terminal-detect.js.map +1 -1
  89. package/dist/src/terminal-image.d.ts +41 -0
  90. package/dist/src/terminal-image.d.ts.map +1 -1
  91. package/dist/src/terminal-image.js +15 -0
  92. package/dist/src/terminal-image.js.map +1 -1
  93. package/package.json +1 -1
  94. package/src/buyer-store.ts +61 -0
  95. package/src/cli.ts +330 -68
  96. package/src/credit-tracker.ts +26 -0
  97. package/src/daemon.ts +363 -72
  98. package/src/doctor-clawtip-wallet.ts +25 -0
  99. package/src/doctor-diagnostics.ts +63 -1
  100. package/src/index.ts +4 -0
  101. package/src/init-clawtip-activation.ts +103 -0
  102. package/src/init-payment-options.ts +124 -0
  103. package/src/model-index.ts +9 -0
  104. package/src/prewarm-cache.ts +99 -1
  105. package/src/prewarm-scheduler.ts +97 -12
  106. package/src/provider-install.ts +125 -25
  107. package/src/route-failover.ts +48 -0
  108. package/src/seller-catalog.ts +158 -12
  109. package/src/seller-metadata-cache.ts +91 -0
  110. package/src/seller-pool.ts +77 -1
  111. package/src/seller-route-planner.ts +323 -0
  112. package/src/seller-routing-config.ts +198 -0
  113. package/src/seller-routing-strategy.ts +316 -0
  114. package/src/stream-failover.ts +23 -0
  115. package/src/tb-proxyd.ts +7 -23
  116. package/src/terminal-detect.ts +51 -0
  117. package/src/terminal-image.ts +41 -0
  118. package/tests/cli-routing.test.ts +287 -0
  119. package/tests/daemon-classify.test.ts +431 -0
  120. package/tests/daemon-roles.test.ts +92 -0
  121. package/tests/seller-catalog-utilities.test.ts +70 -0
  122. package/tests/seller-metadata-cache.test.ts +89 -0
  123. package/tests/seller-route-planner.test.ts +150 -0
  124. package/tests/seller-routing-config.test.ts +111 -0
  125. package/tests/seller-routing-strategy.test.ts +166 -0
  126. package/tests/tokenbuddy.test.ts +447 -33
  127. /package/{src → tests}/credit-tracker.test.ts +0 -0
  128. /package/{src → tests}/model-index.test.ts +0 -0
  129. /package/{src → tests}/prewarm-cache.test.ts +0 -0
  130. /package/{src → tests}/prewarm-scheduler.test.ts +0 -0
  131. /package/{src → tests}/route-failover.test.ts +0 -0
  132. /package/{src → tests}/seller-catalog-413.test.ts +0 -0
  133. /package/{src → tests}/seller-pool.test.ts +0 -0
  134. /package/{src → tests}/stream-failover.test.ts +0 -0
  135. /package/{src → tests}/thousand-seller.test.ts +0 -0
@@ -10,36 +10,96 @@ const logger = createModuleLogger("tb-proxyd:prewarm-cache");
10
10
  */
11
11
  export const DEFAULT_PREWARM_TTL_MS = 10 * 60 * 1000;
12
12
 
13
+ /**
14
+ * 缓存条目的状态机。
15
+ * - `warming`:调度中,候选尚未稳定
16
+ * - `warm`:上次 commit 成功且 TTL 内
17
+ * - `stale`:TTL 过期或连续 commit 失败
18
+ * - `empty`:commit 返回 0 候选(该 (model, protocol, payment) 在当前 registry 下无 seller)
19
+ */
13
20
  export type PrewarmState = "warming" | "warm" | "stale" | "empty";
14
21
 
22
+ /**
23
+ * 单个 seller 在某次 prewarm commit 后的健康画像。
24
+ * 由 `PrewarmCache.commitWarm()` 归一化字段(score 0-100、latency 非负)后写入。
25
+ */
15
26
  export interface PrewarmCandidate {
27
+ /** seller 全局 ID */
16
28
  sellerId: string;
29
+ /** 去掉尾部斜杠后的 seller URL */
17
30
  url: string;
31
+ /** 综合健康分,0-100,0 表示彻底坏 */
18
32
  healthScore: number; // 0-100
33
+ /** 上一次成功的 unix 毫秒时间戳;0 表示尚无成功 */
19
34
  lastSuccessAt: number;
35
+ /** 上一次失败的 unix 毫秒时间戳;0 表示尚无失败 */
20
36
  lastFailAt: number;
37
+ /** 平均延迟(毫秒),用于排序时的回退指标 */
21
38
  avgLatencyMs: number;
39
+ /** health probe 的延迟(毫秒),可选 */
40
+ healthProbeLatencyMs?: number;
41
+ /** 首 token 延迟(毫秒),可选;speed 排序的优先指标 */
42
+ ttftMs?: number;
43
+ /** 平均推理延迟(毫秒),可选 */
44
+ avgInferenceMs?: number;
45
+ /** 上游状态(与 seller 上报的语义对齐) */
46
+ upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
47
+ /** 上游错误类名(HTTP status / 错误码),仅在失败时存在 */
48
+ upstreamErrorClass?: string;
22
49
  }
23
50
 
51
+ /**
52
+ * 缓存条目:以 `(modelId, protocol, paymentMethod)` 为键,存储一组候选 seller 的健康画像。
53
+ * `warmedAt` 是 TTL 的起点;`consecutiveWarmingFailures` 触发指数退避。
54
+ */
24
55
  export interface PrewarmEntry {
56
+ /** 模型 ID(已归一化) */
25
57
  modelId: string;
58
+ /** 协议名(已归一化) */
26
59
  protocol: string;
60
+ /** 支付方式(已归一化) */
27
61
  paymentMethod: string;
62
+ /** 当前条目状态 */
28
63
  state: PrewarmState;
64
+ /** 该 (model, protocol, payment) 命中的候选 seller 列表 */
29
65
  candidates: PrewarmCandidate[];
66
+ /** 本次成功的 commit 时间戳,TTL 起点 */
30
67
  warmedAt: number;
68
+ /** 本条目的 TTL(毫秒),commit 时可被显式覆盖 */
31
69
  ttlMs: number;
70
+ /** 连续 warming 失败次数;医生面板据此判断"长期坏" */
32
71
  consecutiveWarmingFailures: number;
72
+ /** 最近一次进入 warming 的时间戳,调试用 */
33
73
  lastInFlightAt?: number;
34
74
  }
35
75
 
76
+ /**
77
+ * 调度器在 commit 时传入的"原始"候选数据:所有字段都可空,
78
+ * 由 `toCandidate()` 做归一化(score clamp、latency 截负等)。
79
+ */
36
80
  export interface PrewarmCandidateInput {
81
+ /** seller ID */
37
82
  sellerId: string;
83
+ /** seller URL */
38
84
  url: string;
85
+ /** 健康分(可选;缺省在归一化时落到 50) */
39
86
  healthScore?: number;
87
+ /** 上次成功时间戳(毫秒),可选 */
40
88
  lastSuccessAt?: number;
89
+ /** 上次失败时间戳(毫秒),可选 */
41
90
  lastFailAt?: number;
91
+ /** 平均延迟(毫秒),可选 */
42
92
  avgLatencyMs?: number;
93
+ /** health probe 延迟(毫秒),可选 */
94
+ healthProbeLatencyMs?: number;
95
+ /** TTFT(毫秒),可选 */
96
+ ttftMs?: number;
97
+ /** 平均推理延迟(毫秒),可选 */
98
+ avgInferenceMs?: number;
99
+ /** 上游状态,可选 */
100
+ upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
101
+ /** 上游错误类名,可选 */
102
+ upstreamErrorClass?: string;
43
103
  }
44
104
 
45
105
  /**
@@ -69,6 +129,11 @@ interface PrewarmCacheOptions {
69
129
  now?: () => number;
70
130
  }
71
131
 
132
+ /**
133
+ * 进程内的 (model, protocol, payment) → 候选 seller 健康画像缓存。
134
+ * 单线程访问(Node JS 主线程),无内部锁;`commitWarm` 是写路径,
135
+ * `get/freshness` 是热路径读,TTL 由 `warmedAt + ttlMs` 决定。
136
+ */
72
137
  export class PrewarmCache {
73
138
  private readonly entries = new Map<string, PrewarmEntry>();
74
139
  private readonly defaultTtlMs: number;
@@ -327,24 +392,48 @@ export class PrewarmCache {
327
392
  }
328
393
  }
329
394
 
395
+ /**
396
+ * `PrewarmCache.freshness()` 的返回:一次"是否还能信任现有 prewarm"的快速判断。
397
+ * 在每条 inference 请求的热路径上使用,避免直接遍历缓存。
398
+ */
330
399
  export interface PrewarmFreshness {
400
+ /** 是否存在对应条目(false 等价于 cold path) */
331
401
  present: boolean;
402
+ /** 当前是否已超过 TTL */
332
403
  expired: boolean;
404
+ /** TTL 剩余 ≤ 10% 时视为"即将过期",调度器用此触发 idle 预热 */
333
405
  expiringSoon: boolean;
406
+ /** 剩余 TTL(毫秒),过期时省略 */
334
407
  remainingMs?: number;
408
+ /** 条目当前状态(过期时强制为 `stale`) */
335
409
  state: PrewarmState;
410
+ /** 关联的缓存条目(如果有) */
336
411
  entry?: PrewarmEntry;
337
412
  }
338
413
 
414
+ /**
415
+ * `PrewarmCache.beginWarming()` 的返回值:标记一次预热进入 in-flight。
416
+ * `hadPrevious` 用于上层决定是否在 `warming` 期间对外暴露旧候选。
417
+ */
339
418
  export interface PrewarmBeginResult {
419
+ /** 缓存键(与 `prewarmKey()` 的输出等价) */
340
420
  key: string;
421
+ /** 当前写回缓存的 entry(state=`warming`) */
341
422
  entry: PrewarmEntry;
423
+ /** 调用前缓存里是否已有 entry,用于上层做"保留旧候选"判断 */
342
424
  hadPrevious: boolean;
343
425
  }
344
426
 
427
+ /**
428
+ * `PrewarmCache.commitWarm()` 的返回值:成功提交一次候选集合。
429
+ * `replacedSellers` 用于上层做候选 churn 检测。
430
+ */
345
431
  export interface PrewarmCommitResult {
432
+ /** 缓存键 */
346
433
  key: string;
434
+ /** 提交后的新 entry(state=`warm` 或 `empty`) */
347
435
  entry: PrewarmEntry;
436
+ /** 上一版 entry 的候选 seller 列表(已不再缓存的 seller ID) */
348
437
  replacedSellers: string[];
349
438
  }
350
439
 
@@ -355,10 +444,19 @@ function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
355
444
  healthScore: clampScore(input.healthScore ?? 50),
356
445
  lastSuccessAt: input.lastSuccessAt ?? 0,
357
446
  lastFailAt: input.lastFailAt ?? 0,
358
- avgLatencyMs: Math.max(0, input.avgLatencyMs ?? 0)
447
+ avgLatencyMs: Math.max(0, input.avgLatencyMs ?? 0),
448
+ healthProbeLatencyMs: finiteNonNegative(input.healthProbeLatencyMs),
449
+ ttftMs: finiteNonNegative(input.ttftMs),
450
+ avgInferenceMs: finiteNonNegative(input.avgInferenceMs),
451
+ upstreamStatus: input.upstreamStatus,
452
+ upstreamErrorClass: input.upstreamErrorClass
359
453
  };
360
454
  }
361
455
 
456
+ function finiteNonNegative(value: number | undefined): number | undefined {
457
+ return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
458
+ }
459
+
362
460
  function clampScore(score: number): number {
363
461
  if (!Number.isFinite(score)) {
364
462
  return 50;
@@ -5,45 +5,87 @@ import type { PrewarmCache, PrewarmCandidate } from "./prewarm-cache.js";
5
5
 
6
6
  const logger = createModuleLogger("tb-proxyd:prewarm-scheduler");
7
7
 
8
+ /**
9
+ * 触发 prewarm 的原因,用于调度器决定并发与日志分组。
10
+ * - `startup`:daemon 启动时的批量预热,会受 startup jitter 影响
11
+ * - `lazy`:用户首次请求某 (model, protocol, payment) 时触发的预热
12
+ * - `idle`:后台 idle tick 触发的刷新
13
+ * - `explicit`:`tb doctor --prewarm` 之类的显式触发
14
+ */
8
15
  export type PrewarmReason = "startup" | "lazy" | "idle" | "explicit";
9
16
 
17
+ /**
18
+ * 单次 health probe 的结果,由 `SellerProber` 返回。
19
+ * 调度器会基于 `ok` + `latencyMs` 推算 `healthScore`,并把 `upstreamStatus` 直接透传。
20
+ */
10
21
  export interface ProbeResult {
22
+ /** probe 是否成功(HTTP 2xx 且语义上代表"健康") */
11
23
  ok: boolean;
24
+ /** probe 总耗时(毫秒),用于 healthScore 计算 */
12
25
  latencyMs: number;
26
+ /** HTTP status(如果 prober 能拿到) */
13
27
  httpStatus?: number;
28
+ /** 错误描述(仅当 `ok=false` 时存在,不携带敏感字段) */
14
29
  errorMessage?: string;
30
+ /** 上游报告的状态,与 health probe 端点或 fallback 推断的语义对齐 */
31
+ upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
32
+ /** 上游错误类名(status code / error code) */
33
+ upstreamErrorClass?: string;
34
+ /** 首 token 延迟(毫秒),可选;speed 排序时优先使用 */
35
+ ttftMs?: number;
36
+ /** 平均推理延迟(毫秒),可选 */
37
+ avgInferenceMs?: number;
15
38
  }
16
39
 
17
40
  /**
18
41
  * The probe function used by the scheduler. Decoupled so the scheduler can
19
42
  * be unit-tested without spinning up HTTP servers. The default
20
- * implementation in `health-probe.ts` (PR-2/PR-3) calls
21
- * `GET <seller.url>/healthz` with a 3s `AbortSignal.timeout`. Probers must
43
+ * implementation in `daemon.ts` calls `GET <seller.url>/health` with a 3s
44
+ * timeout. Probers must
22
45
  * observe the provided `AbortSignal` and reject when it aborts so the
23
46
  * scheduler can short-circuit in-flight probes on `stop()`.
24
47
  */
25
48
  export type SellerProber = (seller: RegistrySeller, signal: AbortSignal) => Promise<ProbeResult>;
26
49
 
50
+ /**
51
+ * 构造 `PrewarmScheduler` 所需的依赖与可调参数。默认值见设计文档
52
+ * buyer-driven-fallback-design.md §18.5-§18.6:并发 4、per-seller 30s、
53
+ * 全局 30/min、startup jitter 5-10s。
54
+ */
27
55
  export interface PrewarmSchedulerOptions {
56
+ /** 共享的 model index,用于把 modelId 解析为 seller 列表 */
28
57
  modelIndex: ModelIndex;
58
+ /** 共享的 prewarm 缓存,调度器写入并由控制器读取 */
29
59
  cache: PrewarmCache;
60
+ /** 注入的 health prober,调度器不直接发 HTTP */
30
61
  prober: SellerProber;
31
62
  // Limits (defaults match buyer-driven-fallback-design.md §18.6).
63
+ /** 并发上限,默认 4 */
32
64
  concurrency?: number;
65
+ /** 同一 seller 两次 probe 之间的最小间隔(毫秒),默认 30000 */
33
66
  perSellerMinIntervalMs?: number;
67
+ /** 每分钟最多 probe 多少次(全局节流),默认 30 */
34
68
  maxPrewarmPerMinute?: number;
35
69
  // Idle loop cadence; the scheduler can also be driven externally
36
70
  // (PR-2.1 wires `tickIdle` into the existing registry-loop heartbeat).
71
+ /** idle 循环的间隔(毫秒),默认 60000 */
37
72
  idleIntervalMs?: number;
38
73
  // Startup jitter (5-10s by default per §18.5.1).
74
+ /** startup 抖动的下界(毫秒),默认 5000 */
39
75
  startupJitterMinMs?: number;
76
+ /** startup 抖动的上界(毫秒),默认 10000 */
40
77
  startupJitterMaxMs?: number;
41
78
  // Hooks for testing; defaults to Node's setTimeout / setImmediate.
79
+ /** 可注入的 sleep(支持 abort),默认 Node setTimeout */
42
80
  sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;
81
+ /** 可注入的随机源,默认 `Math.random` */
43
82
  random?: () => number;
83
+ /** 可注入的时钟,默认 `Date.now` */
44
84
  now?: () => number;
45
85
  // Optional filter applied to every probe (e.g. preferred protocol).
86
+ /** 全局默认协议 filter,可被 `schedulePrewarm` 覆盖 */
46
87
  protocol?: string;
88
+ /** 全局默认支付方式 filter,可被 `schedulePrewarm` 覆盖 */
47
89
  paymentMethod?: string;
48
90
  }
49
91
 
@@ -61,15 +103,28 @@ interface PrewarmTask {
61
103
  errorMessage?: string;
62
104
  }
63
105
 
106
+ /**
107
+ * `PrewarmScheduler.stats()` 的返回:调度器当前的运行指标快照。
108
+ * `tb doctor` 据此判断调度是否过载、是否长期被限流。
109
+ */
64
110
  export interface PrewarmSchedulerStats {
111
+ /** 当前队列里尚未开始的 task 数 */
65
112
  queueDepth: number;
113
+ /** 正在 probe 的 task 数 */
66
114
  inFlight: number;
115
+ /** 累计入队的 task 数(包含 rate_limited) */
67
116
  totalScheduled: number;
117
+ /** 累计成功的 task 数 */
68
118
  totalSucceeded: number;
119
+ /** 累计失败的 task 数(所有候选 probe 都失败) */
69
120
  totalFailed: number;
121
+ /** 累计因全局节流被跳过的 task 数 */
70
122
  totalRateLimited: number;
123
+ /** 最近 60 秒内发起的 probe 总数(用于判断是否撞到 maxPrewarmPerMinute) */
71
124
  recentProbesInLastMinute: number;
125
+ /** 当前配置的并发上限 */
72
126
  concurrency: number;
127
+ /** 当前配置的每分钟 probe 上限 */
73
128
  maxPrewarmPerMinute: number;
74
129
  }
75
130
 
@@ -231,14 +286,15 @@ export class PrewarmScheduler {
231
286
  * the configured jitter window. Resolves once every scheduled task has
232
287
  * reached a terminal state.
233
288
  */
234
- async runStartupPrewarm(modelIds: string[]): Promise<void> {
289
+ async runStartupPrewarm(inputs: Array<string | { modelId: string; protocol?: string; paymentMethod?: string }>): Promise<void> {
235
290
  await this.sleep(this.jitterMs(), this.abortController?.signal);
236
291
  if (this.abortController?.signal.aborted) {
237
292
  return;
238
293
  }
239
- const tasks = modelIds.map((modelId) =>
240
- this.schedulePrewarm({ modelId, reason: "startup" })
241
- );
294
+ const tasks = inputs.map((input) => {
295
+ const task = typeof input === "string" ? { modelId: input } : input;
296
+ return this.schedulePrewarm({ ...task, reason: "startup" });
297
+ });
242
298
  await Promise.all(tasks);
243
299
  }
244
300
 
@@ -429,21 +485,32 @@ export class PrewarmScheduler {
429
485
  }
430
486
  this.lastProbeAtBySeller.set(seller.id, this.now());
431
487
  if (result.ok) {
432
- anyOk = true;
488
+ const healthScore = scoreProbeResult(result);
489
+ anyOk = anyOk || healthScore > 0;
433
490
  candidates.push({
434
491
  sellerId: seller.id,
435
492
  url: seller.url,
436
- healthScore: scoreFromLatency(result.latencyMs),
493
+ healthScore,
437
494
  lastSuccessAt: this.now(),
438
495
  lastFailAt: 0,
439
- avgLatencyMs: result.latencyMs
496
+ avgLatencyMs: result.latencyMs,
497
+ healthProbeLatencyMs: result.latencyMs,
498
+ ttftMs: result.ttftMs,
499
+ avgInferenceMs: result.avgInferenceMs,
500
+ upstreamStatus: result.upstreamStatus,
501
+ upstreamErrorClass: result.upstreamErrorClass
440
502
  });
441
503
  logger.info("prewarm.succeeded", "seller probe succeeded", {
442
504
  taskId: task.id,
443
505
  sellerId: seller.id,
444
506
  modelId: task.modelId,
445
507
  latencyMs: result.latencyMs,
446
- httpStatus: result.httpStatus
508
+ httpStatus: result.httpStatus,
509
+ healthScore,
510
+ upstreamStatus: result.upstreamStatus,
511
+ upstreamErrorClass: result.upstreamErrorClass,
512
+ ttftMs: result.ttftMs,
513
+ avgInferenceMs: result.avgInferenceMs
447
514
  });
448
515
  } else {
449
516
  candidates.push({
@@ -452,14 +519,21 @@ export class PrewarmScheduler {
452
519
  healthScore: 0,
453
520
  lastSuccessAt: 0,
454
521
  lastFailAt: this.now(),
455
- avgLatencyMs: result.latencyMs
522
+ avgLatencyMs: result.latencyMs,
523
+ healthProbeLatencyMs: result.latencyMs,
524
+ ttftMs: result.ttftMs,
525
+ avgInferenceMs: result.avgInferenceMs,
526
+ upstreamStatus: result.upstreamStatus,
527
+ upstreamErrorClass: result.upstreamErrorClass
456
528
  });
457
529
  logger.warn("prewarm.failed", "seller probe failed", {
458
530
  taskId: task.id,
459
531
  sellerId: seller.id,
460
532
  modelId: task.modelId,
461
533
  errorMessage: result.errorMessage,
462
- httpStatus: result.httpStatus
534
+ httpStatus: result.httpStatus,
535
+ upstreamStatus: result.upstreamStatus,
536
+ upstreamErrorClass: result.upstreamErrorClass
463
537
  });
464
538
  }
465
539
  }
@@ -579,3 +653,14 @@ function scoreFromLatency(latencyMs: number): number {
579
653
  if (latencyMs <= 3000) return 40;
580
654
  return 20;
581
655
  }
656
+
657
+ function scoreProbeResult(result: ProbeResult): number {
658
+ if (result.upstreamStatus === "unhealthy") {
659
+ return 0;
660
+ }
661
+ const base = scoreFromLatency(result.latencyMs);
662
+ if (result.upstreamStatus === "degraded") {
663
+ return Math.min(base, 40);
664
+ }
665
+ return base;
666
+ }