@tokenbuddy/tokenbuddy 1.0.36 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/dist/src/buyer-store.d.ts +7 -2
  2. package/dist/src/buyer-store.js +46 -7
  3. package/dist/src/cli.d.ts +1 -0
  4. package/dist/src/cli.js +15 -7
  5. package/dist/src/daemon.d.ts +12 -0
  6. package/dist/src/daemon.js +791 -61
  7. package/dist/src/doctor-diagnostics.js +1 -6
  8. package/dist/src/provider-install.d.ts +2 -2
  9. package/dist/src/provider-install.js +248 -2
  10. package/dist/src/seller-catalog.d.ts +21 -0
  11. package/dist/src/seller-catalog.js +17 -0
  12. package/dist/src/seller-route-planner.d.ts +4 -1
  13. package/dist/src/seller-route-planner.js +3 -0
  14. package/dist/src/seller-routing-strategy.d.ts +3 -0
  15. package/dist/src/terminal-detect.d.ts +1 -1
  16. package/dist/src/terminal-detect.js +3 -2
  17. package/dist/src/workdir.d.ts +10 -0
  18. package/dist/src/workdir.js +26 -0
  19. package/package.json +15 -2
  20. package/static/ui/assets/index-Djfl9tw5.js +271 -0
  21. package/static/ui/assets/index-DkfztCkn.css +1 -0
  22. package/static/ui/index.html +2 -2
  23. package/dist/src/buyer-store.d.ts.map +0 -1
  24. package/dist/src/buyer-store.js.map +0 -1
  25. package/dist/src/clawtip-bootstrap.d.ts.map +0 -1
  26. package/dist/src/clawtip-bootstrap.js.map +0 -1
  27. package/dist/src/cli.d.ts.map +0 -1
  28. package/dist/src/cli.js.map +0 -1
  29. package/dist/src/credit-tracker.d.ts.map +0 -1
  30. package/dist/src/credit-tracker.js.map +0 -1
  31. package/dist/src/daemon.d.ts.map +0 -1
  32. package/dist/src/daemon.js.map +0 -1
  33. package/dist/src/doctor-clawtip-wallet.d.ts.map +0 -1
  34. package/dist/src/doctor-clawtip-wallet.js.map +0 -1
  35. package/dist/src/doctor-diagnostics.d.ts.map +0 -1
  36. package/dist/src/doctor-diagnostics.js.map +0 -1
  37. package/dist/src/index.d.ts.map +0 -1
  38. package/dist/src/index.js.map +0 -1
  39. package/dist/src/init-clawtip-activation.d.ts.map +0 -1
  40. package/dist/src/init-clawtip-activation.js.map +0 -1
  41. package/dist/src/init-payment-options.d.ts.map +0 -1
  42. package/dist/src/init-payment-options.js.map +0 -1
  43. package/dist/src/init-setup.d.ts.map +0 -1
  44. package/dist/src/init-setup.js.map +0 -1
  45. package/dist/src/model-index.d.ts.map +0 -1
  46. package/dist/src/model-index.js.map +0 -1
  47. package/dist/src/package-update.d.ts.map +0 -1
  48. package/dist/src/package-update.js.map +0 -1
  49. package/dist/src/prewarm-cache.d.ts.map +0 -1
  50. package/dist/src/prewarm-cache.js.map +0 -1
  51. package/dist/src/prewarm-scheduler.d.ts.map +0 -1
  52. package/dist/src/prewarm-scheduler.js.map +0 -1
  53. package/dist/src/provider-install.d.ts.map +0 -1
  54. package/dist/src/provider-install.js.map +0 -1
  55. package/dist/src/provider-routing-config.d.ts.map +0 -1
  56. package/dist/src/provider-routing-config.js.map +0 -1
  57. package/dist/src/registry-trust.d.ts.map +0 -1
  58. package/dist/src/registry-trust.js.map +0 -1
  59. package/dist/src/route-failover.d.ts.map +0 -1
  60. package/dist/src/route-failover.js.map +0 -1
  61. package/dist/src/seller-catalog.d.ts.map +0 -1
  62. package/dist/src/seller-catalog.js.map +0 -1
  63. package/dist/src/seller-concurrency-limiter.d.ts.map +0 -1
  64. package/dist/src/seller-concurrency-limiter.js.map +0 -1
  65. package/dist/src/seller-metadata-cache.d.ts.map +0 -1
  66. package/dist/src/seller-metadata-cache.js.map +0 -1
  67. package/dist/src/seller-pool.d.ts.map +0 -1
  68. package/dist/src/seller-pool.js.map +0 -1
  69. package/dist/src/seller-route-planner.d.ts.map +0 -1
  70. package/dist/src/seller-route-planner.js.map +0 -1
  71. package/dist/src/seller-routing-config.d.ts.map +0 -1
  72. package/dist/src/seller-routing-config.js.map +0 -1
  73. package/dist/src/seller-routing-strategy.d.ts.map +0 -1
  74. package/dist/src/seller-routing-strategy.js.map +0 -1
  75. package/dist/src/stream-failover.d.ts.map +0 -1
  76. package/dist/src/stream-failover.js.map +0 -1
  77. package/dist/src/tb-clawtip-proof.d.ts.map +0 -1
  78. package/dist/src/tb-clawtip-proof.js.map +0 -1
  79. package/dist/src/tb-proxyd.d.ts.map +0 -1
  80. package/dist/src/tb-proxyd.js.map +0 -1
  81. package/dist/src/terminal-detect.d.ts.map +0 -1
  82. package/dist/src/terminal-detect.js.map +0 -1
  83. package/dist/src/terminal-image.d.ts.map +0 -1
  84. package/dist/src/terminal-image.js.map +0 -1
  85. package/src/buyer-store.ts +0 -1090
  86. package/src/clawtip-bootstrap.ts +0 -65
  87. package/src/cli.ts +0 -2243
  88. package/src/credit-tracker.ts +0 -295
  89. package/src/daemon.ts +0 -5475
  90. package/src/doctor-clawtip-wallet.ts +0 -95
  91. package/src/doctor-diagnostics.ts +0 -1026
  92. package/src/index.ts +0 -16
  93. package/src/init-clawtip-activation.ts +0 -695
  94. package/src/init-payment-options.ts +0 -373
  95. package/src/init-setup.ts +0 -165
  96. package/src/model-index.ts +0 -278
  97. package/src/package-update.ts +0 -311
  98. package/src/prewarm-cache.ts +0 -485
  99. package/src/prewarm-scheduler.ts +0 -675
  100. package/src/provider-install.ts +0 -1006
  101. package/src/provider-routing-config.ts +0 -410
  102. package/src/registry-trust.ts +0 -51
  103. package/src/route-failover.ts +0 -304
  104. package/src/seller-catalog.ts +0 -505
  105. package/src/seller-concurrency-limiter.ts +0 -161
  106. package/src/seller-metadata-cache.ts +0 -91
  107. package/src/seller-pool.ts +0 -557
  108. package/src/seller-route-planner.ts +0 -513
  109. package/src/seller-routing-config.ts +0 -211
  110. package/src/seller-routing-strategy.ts +0 -362
  111. package/src/stream-failover.ts +0 -152
  112. package/src/tb-clawtip-proof.ts +0 -28
  113. package/src/tb-proxyd.ts +0 -101
  114. package/src/terminal-detect.ts +0 -333
  115. package/src/terminal-image.ts +0 -228
  116. package/static/ui/assets/index-0MVXD7bH.css +0 -1
  117. package/static/ui/assets/index-BVbeDEwq.js +0 -271
  118. package/static/ui/assets/index-BVbeDEwq.js.map +0 -1
  119. package/tests/cli-routing.test.ts +0 -363
  120. package/tests/control-plane-ui-endpoints.test.ts +0 -1630
  121. package/tests/credit-tracker.test.ts +0 -165
  122. package/tests/daemon-413-fallback.test.ts +0 -92
  123. package/tests/daemon-classify.test.ts +0 -452
  124. package/tests/daemon-roles.test.ts +0 -92
  125. package/tests/daemon-trusted-registry-cache.test.ts +0 -132
  126. package/tests/e2e.test.ts +0 -366
  127. package/tests/image-generation-e2e.test.ts +0 -230
  128. package/tests/model-index.test.ts +0 -198
  129. package/tests/package-update.test.ts +0 -147
  130. package/tests/prewarm-cache.test.ts +0 -296
  131. package/tests/prewarm-scheduler.test.ts +0 -367
  132. package/tests/provider-routing-config.test.ts +0 -150
  133. package/tests/registry-trust.test.ts +0 -28
  134. package/tests/route-failover.test.ts +0 -222
  135. package/tests/seller-catalog-413.test.ts +0 -120
  136. package/tests/seller-catalog-utilities.test.ts +0 -124
  137. package/tests/seller-concurrency-limiter.test.ts +0 -83
  138. package/tests/seller-metadata-cache.test.ts +0 -89
  139. package/tests/seller-pool.test.ts +0 -365
  140. package/tests/seller-route-planner.test.ts +0 -312
  141. package/tests/seller-routing-config.test.ts +0 -124
  142. package/tests/seller-routing-strategy.test.ts +0 -167
  143. package/tests/stream-failover.test.ts +0 -52
  144. package/tests/thousand-seller.test.ts +0 -151
  145. package/tests/tokenbuddy.test.ts +0 -4043
  146. package/tsconfig.json +0 -8
@@ -1,675 +0,0 @@
1
- import { createModuleLogger } from "@tokenbuddy/logging";
2
- import type { RegistrySeller } from "./seller-catalog.js";
3
- import type { ModelIndex } from "./model-index.js";
4
- import type { PrewarmCache, PrewarmCandidate } from "./prewarm-cache.js";
5
-
6
- const logger = createModuleLogger("tb-proxyd:prewarm-scheduler");
7
-
8
- /**
9
- * 触发 prewarm 的原因,用于调度器决定并发与日志分组。
10
- * - `startup`:daemon 启动时的批量预热,会受 startup jitter 影响
11
- * - `lazy`:用户首次请求某 (model, protocol, payment) 时触发的预热
12
- * - `idle`:后台 idle tick 触发的刷新
13
- * - `explicit`:`tb doctor --prewarm` 之类的显式触发
14
- */
15
- export type PrewarmReason = "startup" | "lazy" | "idle" | "explicit";
16
-
17
- /**
18
- * 单次 health probe 的结果,由 `SellerProber` 返回。
19
- * 调度器会基于 `ok` + `latencyMs` 推算 `healthScore`,并把 `upstreamStatus` 直接透传。
20
- */
21
- export interface ProbeResult {
22
- /** probe 是否成功(HTTP 2xx 且语义上代表"健康") */
23
- ok: boolean;
24
- /** probe 总耗时(毫秒),用于 healthScore 计算 */
25
- latencyMs: number;
26
- /** HTTP status(如果 prober 能拿到) */
27
- httpStatus?: number;
28
- /** 错误描述(仅当 `ok=false` 时存在,不携带敏感字段) */
29
- errorMessage?: string;
30
- /** 上游报告的状态,与 health probe 端点或 fallback 推断的语义对齐 */
31
- upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
32
- /** 上游错误类名(status code / error code) */
33
- upstreamErrorClass?: string;
34
- /** 首 token 延迟(毫秒),可选;speed 排序时优先使用 */
35
- ttftMs?: number;
36
- /** 平均推理延迟(毫秒),可选 */
37
- avgInferenceMs?: number;
38
- /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
39
- avgTokensPerSecond?: number;
40
- /** 临时容量避让截止时间;大于当前时间时不参与路由 */
41
- capacityBlockedUntil?: number;
42
- }
43
-
44
- /**
45
- * The probe function used by the scheduler. Decoupled so the scheduler can
46
- * be unit-tested without spinning up HTTP servers. The default
47
- * implementation in `daemon.ts` calls `GET <seller.url>/health` with a 3s
48
- * timeout. Probers must
49
- * observe the provided `AbortSignal` and reject when it aborts so the
50
- * scheduler can short-circuit in-flight probes on `stop()`.
51
- */
52
- export type SellerProber = (seller: RegistrySeller, signal: AbortSignal) => Promise<ProbeResult>;
53
-
54
- /**
55
- * 构造 `PrewarmScheduler` 所需的依赖与可调参数。默认值见设计文档
56
- * buyer-driven-fallback-design.md §18.5-§18.6:并发 4、per-seller 30s、
57
- * 全局 30/min、startup jitter 5-10s。
58
- */
59
- export interface PrewarmSchedulerOptions {
60
- /** 共享的 model index,用于把 modelId 解析为 seller 列表 */
61
- modelIndex: ModelIndex;
62
- /** 共享的 prewarm 缓存,调度器写入并由控制器读取 */
63
- cache: PrewarmCache;
64
- /** 注入的 health prober,调度器不直接发 HTTP */
65
- prober: SellerProber;
66
- // Limits (defaults match buyer-driven-fallback-design.md §18.6).
67
- /** 并发上限,默认 4 */
68
- concurrency?: number;
69
- /** 同一 seller 两次 probe 之间的最小间隔(毫秒),默认 30000 */
70
- perSellerMinIntervalMs?: number;
71
- /** 每分钟最多 probe 多少次(全局节流),默认 30 */
72
- maxPrewarmPerMinute?: number;
73
- // Idle loop cadence; the scheduler can also be driven externally
74
- // (PR-2.1 wires `tickIdle` into the existing registry-loop heartbeat).
75
- /** idle 循环的间隔(毫秒),默认 60000 */
76
- idleIntervalMs?: number;
77
- // Startup jitter (5-10s by default per §18.5.1).
78
- /** startup 抖动的下界(毫秒),默认 5000 */
79
- startupJitterMinMs?: number;
80
- /** startup 抖动的上界(毫秒),默认 10000 */
81
- startupJitterMaxMs?: number;
82
- // Hooks for testing; defaults to Node's setTimeout / setImmediate.
83
- /** 可注入的 sleep(支持 abort),默认 Node setTimeout */
84
- sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;
85
- /** 可注入的随机源,默认 `Math.random` */
86
- random?: () => number;
87
- /** 可注入的时钟,默认 `Date.now` */
88
- now?: () => number;
89
- // Optional filter applied to every probe (e.g. preferred protocol).
90
- /** 全局默认协议 filter,可被 `schedulePrewarm` 覆盖 */
91
- protocol?: string;
92
- /** 全局默认支付方式 filter,可被 `schedulePrewarm` 覆盖 */
93
- paymentMethod?: string;
94
- }
95
-
96
- interface PrewarmTask {
97
- id: number;
98
- modelId: string;
99
- reason: PrewarmReason;
100
- protocol: string;
101
- paymentMethod: string;
102
- enqueuedAt: number;
103
- sellerIds: string[];
104
- startedAt?: number;
105
- completedAt?: number;
106
- status: "queued" | "running" | "succeeded" | "failed" | "canceled" | "rate_limited";
107
- errorMessage?: string;
108
- }
109
-
110
- /**
111
- * `PrewarmScheduler.stats()` 的返回:调度器当前的运行指标快照。
112
- * `tb doctor` 据此判断调度是否过载、是否长期被限流。
113
- */
114
- export interface PrewarmSchedulerStats {
115
- /** 当前队列里尚未开始的 task 数 */
116
- queueDepth: number;
117
- /** 正在 probe 的 task 数 */
118
- inFlight: number;
119
- /** 累计入队的 task 数(包含 rate_limited) */
120
- totalScheduled: number;
121
- /** 累计成功的 task 数 */
122
- totalSucceeded: number;
123
- /** 累计失败的 task 数(所有候选 probe 都失败) */
124
- totalFailed: number;
125
- /** 累计因全局节流被跳过的 task 数 */
126
- totalRateLimited: number;
127
- /** 最近 60 秒内发起的 probe 总数(用于判断是否撞到 maxPrewarmPerMinute) */
128
- recentProbesInLastMinute: number;
129
- /** 当前配置的并发上限 */
130
- concurrency: number;
131
- /** 当前配置的每分钟 probe 上限 */
132
- maxPrewarmPerMinute: number;
133
- }
134
-
135
- /**
136
- * Background scheduler that warms up sellers for a (model, protocol,
137
- * payment) triple on demand. The scheduler owns:
138
- * - queue management with bounded concurrency (default 4)
139
- * - per-seller rate limiting (default 30s between probes to the same
140
- * seller, even across different models)
141
- * - global rate limiting (default 30 probes/minute)
142
- * - jitter on startup and between probes to avoid thundering herds
143
- *
144
- * The scheduler does NOT own HTTP I/O; that lives in the injected
145
- * `prober` so tests can swap in a deterministic stub.
146
- */
147
- export class PrewarmScheduler {
148
- private readonly modelIndex: ModelIndex;
149
- private readonly cache: PrewarmCache;
150
- private readonly prober: SellerProber;
151
-
152
- private readonly concurrency: number;
153
- private readonly perSellerMinIntervalMs: number;
154
- private readonly maxPrewarmPerMinute: number;
155
- private readonly idleIntervalMs: number;
156
- private readonly startupJitterMinMs: number;
157
- private readonly startupJitterMaxMs: number;
158
- private readonly sleep: (ms: number, signal?: AbortSignal) => Promise<void>;
159
- private readonly random: () => number;
160
- private readonly now: () => number;
161
- private readonly protocol: string | undefined;
162
- private readonly paymentMethod: string | undefined;
163
-
164
- private readonly queue: PrewarmTask[] = [];
165
- private inFlight = 0;
166
- private recentProbes: number[] = [];
167
- private lastProbeAtBySeller = new Map<string, number>();
168
- private nextTaskId = 1;
169
-
170
- private totalScheduled = 0;
171
- private totalSucceeded = 0;
172
- private totalFailed = 0;
173
- private totalRateLimited = 0;
174
-
175
- private abortController: AbortController | null = null;
176
- private idleLoopPromise: Promise<void> | null = null;
177
-
178
- constructor(options: PrewarmSchedulerOptions) {
179
- this.modelIndex = options.modelIndex;
180
- this.cache = options.cache;
181
- this.prober = options.prober;
182
- this.concurrency = options.concurrency ?? 4;
183
- this.perSellerMinIntervalMs = options.perSellerMinIntervalMs ?? 30_000;
184
- this.maxPrewarmPerMinute = options.maxPrewarmPerMinute ?? 30;
185
- this.idleIntervalMs = options.idleIntervalMs ?? 60_000;
186
- this.startupJitterMinMs = options.startupJitterMinMs ?? 5_000;
187
- this.startupJitterMaxMs = options.startupJitterMaxMs ?? 10_000;
188
- this.sleep = options.sleep ?? defaultSleep;
189
- this.random = options.random ?? Math.random;
190
- this.now = options.now ?? Date.now;
191
- this.protocol = options.protocol;
192
- this.paymentMethod = options.paymentMethod;
193
- }
194
-
195
- /**
196
- * Start the background idle loop. Safe to call once per scheduler
197
- * instance; subsequent calls are no-ops. The idle loop probes any cached
198
- * entry whose TTL is within 10% of expiry (`isExpiringSoon`).
199
- */
200
- start(): void {
201
- if (this.abortController) {
202
- return;
203
- }
204
- this.abortController = new AbortController();
205
- this.idleLoopPromise = this.runIdleLoop(this.abortController.signal);
206
- }
207
-
208
- /**
209
- * Cancel the idle loop and any pending tasks. Existing `inFlight` probes
210
- * are not aborted (the prober owns its own timeout) but will not be
211
- * dispatched to the cache.
212
- */
213
- async stop(): Promise<void> {
214
- if (!this.abortController) {
215
- return;
216
- }
217
- this.abortController.abort();
218
- this.abortController = null;
219
- if (this.idleLoopPromise) {
220
- await this.idleLoopPromise.catch(() => undefined);
221
- this.idleLoopPromise = null;
222
- }
223
- // Mark queued tasks as canceled so callers awaiting them can short-circuit.
224
- for (const task of this.queue) {
225
- task.status = "canceled";
226
- }
227
- this.queue.length = 0;
228
- }
229
-
230
- /**
231
- * Enqueue a prewarm for a (model, protocol, payment) triple. The
232
- * `reason` controls how aggressively the scheduler resolves candidates
233
- * (e.g. `startup` defers; `lazy` waits on the returned promise). The
234
- * returned promise resolves with the final task status once the queue
235
- * drains or the scheduler is stopped.
236
- */
237
- schedulePrewarm(input: {
238
- modelId: string;
239
- reason: PrewarmReason;
240
- protocol?: string;
241
- paymentMethod?: string;
242
- blockOnFirst?: boolean;
243
- }): Promise<PrewarmTask> {
244
- const protocol = input.protocol ?? this.protocol ?? "chat_completions";
245
- const paymentMethod = input.paymentMethod ?? this.paymentMethod ?? "clawtip";
246
- const task: PrewarmTask = {
247
- id: this.nextTaskId++,
248
- modelId: input.modelId,
249
- reason: input.reason,
250
- protocol,
251
- paymentMethod,
252
- enqueuedAt: this.now(),
253
- sellerIds: [],
254
- status: "queued"
255
- };
256
- this.queue.push(task);
257
- this.totalScheduled += 1;
258
- logger.info("prewarm.scheduled", "prewarm task enqueued", {
259
- taskId: task.id,
260
- modelId: task.modelId,
261
- reason: task.reason,
262
- protocol,
263
- paymentMethod,
264
- queueDepth: this.queue.length
265
- });
266
-
267
- // Fire-and-forget dispatch; the awaiter observes `task.status` via
268
- // `taskResolved` rather than blocking the queue.
269
- this.dispatch().catch((err) => {
270
- logger.error("prewarm.dispatch.unexpected", "dispatcher threw unexpectedly", {
271
- errorMessage: err instanceof Error ? err.message : String(err)
272
- });
273
- });
274
-
275
- return new Promise<PrewarmTask>((resolve) => {
276
- const check = () => {
277
- if (task.status === "succeeded" || task.status === "failed" || task.status === "canceled" || task.status === "rate_limited") {
278
- resolve(task);
279
- } else {
280
- setImmediate(check);
281
- }
282
- };
283
- check();
284
- });
285
- }
286
-
287
- /**
288
- * Run a one-shot sweep that probes every focus-set model. Used by the
289
- * `tb doctor --prewarm` explicit trigger and by the startup hook after
290
- * the configured jitter window. Resolves once every scheduled task has
291
- * reached a terminal state.
292
- */
293
- async runStartupPrewarm(inputs: Array<string | { modelId: string; protocol?: string; paymentMethod?: string }>): Promise<void> {
294
- await this.sleep(this.jitterMs(), this.abortController?.signal);
295
- if (this.abortController?.signal.aborted) {
296
- return;
297
- }
298
- const tasks = inputs.map((input) => {
299
- const task = typeof input === "string" ? { modelId: input } : input;
300
- return this.schedulePrewarm({ ...task, reason: "startup" });
301
- });
302
- await Promise.all(tasks);
303
- }
304
-
305
- /**
306
- * Force a sweep of any cache key whose TTL is about to expire. Returns
307
- * the number of tasks that were enqueued. Intended to be called from
308
- * the registry loop's heartbeat (replaces the v1 "all sellers" probe
309
- * cycle with "only the ones we are about to forget").
310
- */
311
- tickIdle(): number {
312
- const expiring = this.cache
313
- .snapshot()
314
- .filter((entry) => this.cache.isExpiringSoon(entry.modelId, entry.protocol, entry.paymentMethod, 60_000));
315
- if (expiring.length === 0) {
316
- return 0;
317
- }
318
- for (const entry of expiring) {
319
- this.schedulePrewarm({
320
- modelId: entry.modelId,
321
- protocol: entry.protocol,
322
- paymentMethod: entry.paymentMethod,
323
- reason: "idle"
324
- });
325
- }
326
- return expiring.length;
327
- }
328
-
329
- stats(): PrewarmSchedulerStats {
330
- const now = this.now();
331
- const cutoff = now - 60_000;
332
- const recentProbesInLastMinute = this.recentProbes.filter((ts) => ts >= cutoff).length;
333
- return {
334
- queueDepth: this.queue.length,
335
- inFlight: this.inFlight,
336
- totalScheduled: this.totalScheduled,
337
- totalSucceeded: this.totalSucceeded,
338
- totalFailed: this.totalFailed,
339
- totalRateLimited: this.totalRateLimited,
340
- recentProbesInLastMinute,
341
- concurrency: this.concurrency,
342
- maxPrewarmPerMinute: this.maxPrewarmPerMinute
343
- };
344
- }
345
-
346
- private jitterMs(): number {
347
- const span = Math.max(0, this.startupJitterMaxMs - this.startupJitterMinMs);
348
- return this.startupJitterMinMs + Math.floor(this.random() * span);
349
- }
350
-
351
- private async runIdleLoop(signal: AbortSignal): Promise<void> {
352
- while (!signal.aborted) {
353
- try {
354
- await this.sleep(this.idleIntervalMs, signal);
355
- } catch {
356
- return;
357
- }
358
- if (signal.aborted) {
359
- return;
360
- }
361
- try {
362
- this.tickIdle();
363
- } catch (err) {
364
- logger.error("prewarm.idle.failed", "idle tick threw unexpectedly", {
365
- errorMessage: err instanceof Error ? err.message : String(err)
366
- });
367
- }
368
- }
369
- }
370
-
371
- private async dispatch(): Promise<void> {
372
- while (this.queue.length > 0 && this.inFlight < this.concurrency) {
373
- const task = this.queue.shift();
374
- if (!task) {
375
- break;
376
- }
377
- if (task.status === "canceled") {
378
- continue;
379
- }
380
- if (this.isOverBudget()) {
381
- task.status = "rate_limited";
382
- this.totalRateLimited += 1;
383
- logger.warn("prewarm.rate_limited", "global per-minute probe budget exhausted", {
384
- taskId: task.id,
385
- modelId: task.modelId,
386
- recentProbes: this.recentProbesInLastMinute()
387
- });
388
- continue;
389
- }
390
- const sellers = this.modelIndex.sellersFor(task.modelId, {
391
- protocol: task.protocol,
392
- paymentMethod: task.paymentMethod
393
- });
394
- task.sellerIds = sellers.map((s) => s.id);
395
- if (sellers.length === 0) {
396
- task.status = "failed";
397
- task.errorMessage = "no sellers for model";
398
- task.completedAt = this.now();
399
- this.totalFailed += 1;
400
- logger.warn("prewarm.no_sellers", "no registry sellers match model", {
401
- taskId: task.id,
402
- modelId: task.modelId,
403
- protocol: task.protocol,
404
- paymentMethod: task.paymentMethod
405
- });
406
- continue;
407
- }
408
- // Ensure an abort controller exists so `stop()` works even when the
409
- // caller never invoked `start()`. `start()` is otherwise responsible
410
- // for the idle loop; dispatch only borrows the controller for
411
- // short-lived abort propagation.
412
- if (!this.abortController) {
413
- this.abortController = new AbortController();
414
- }
415
- this.inFlight += 1;
416
- task.status = "running";
417
- task.startedAt = this.now();
418
- // Capture the abort signal so an in-flight task can still observe
419
- // `stop()` even after the controller reference is cleared.
420
- const signal = this.abortController.signal;
421
- // Run async without awaiting; the loop continues to dispatch.
422
- void this.runTask(task, sellers, signal);
423
- }
424
- }
425
-
426
- private async runTask(task: PrewarmTask, sellers: RegistrySeller[], signal: AbortSignal | undefined): Promise<void> {
427
- const begin = this.cache.beginWarming(task.modelId, task.protocol, task.paymentMethod);
428
- const candidates: PrewarmCandidate[] = [];
429
- let anyOk = false;
430
- let probedAny = false;
431
-
432
- for (const seller of sellers) {
433
- if (signal?.aborted) {
434
- task.status = "canceled";
435
- task.completedAt = this.now();
436
- this.inFlight -= 1;
437
- this.dispatch().catch(() => undefined);
438
- return;
439
- }
440
- if (this.isSellerRateLimited(seller.id)) {
441
- logger.debug("prewarm.seller_skipped", "seller probe skipped due to per-seller rate limit", {
442
- taskId: task.id,
443
- sellerId: seller.id
444
- });
445
- continue;
446
- }
447
- probedAny = true;
448
- this.recordProbeAttempt();
449
- const probeSignal = composeProbeSignal(signal);
450
- let result: ProbeResult;
451
- try {
452
- result = await this.prober(seller, probeSignal.signal);
453
- } catch (err) {
454
- // Prober rejected (typically because of `stop()` aborting the probe
455
- // signal). Treat the rejection as a canceled run and exit early.
456
- if (signal?.aborted) {
457
- task.status = "canceled";
458
- task.errorMessage = err instanceof Error ? err.message : String(err);
459
- task.completedAt = this.now();
460
- this.inFlight -= 1;
461
- this.dispatch().catch(() => undefined);
462
- return;
463
- }
464
- // An unexpected prober error is recorded as a per-seller failure
465
- // and the loop continues with the next seller.
466
- logger.error("prewarm.probe.threw", "seller prober threw unexpectedly", {
467
- taskId: task.id,
468
- sellerId: seller.id,
469
- modelId: task.modelId,
470
- errorMessage: err instanceof Error ? err.message : String(err)
471
- });
472
- candidates.push({
473
- sellerId: seller.id,
474
- url: seller.url,
475
- healthScore: 0,
476
- lastSuccessAt: 0,
477
- lastFailAt: this.now(),
478
- avgLatencyMs: 0
479
- });
480
- continue;
481
- }
482
- if (signal?.aborted) {
483
- task.status = "canceled";
484
- task.completedAt = this.now();
485
- this.lastProbeAtBySeller.set(seller.id, this.now());
486
- this.inFlight -= 1;
487
- this.dispatch().catch(() => undefined);
488
- return;
489
- }
490
- this.lastProbeAtBySeller.set(seller.id, this.now());
491
- if (result.ok) {
492
- const healthScore = scoreProbeResult(result);
493
- anyOk = anyOk || healthScore > 0;
494
- candidates.push({
495
- sellerId: seller.id,
496
- url: seller.url,
497
- healthScore,
498
- lastSuccessAt: this.now(),
499
- lastFailAt: 0,
500
- avgLatencyMs: result.latencyMs,
501
- healthProbeLatencyMs: result.latencyMs,
502
- ttftMs: result.ttftMs,
503
- avgInferenceMs: result.avgInferenceMs,
504
- avgTokensPerSecond: result.avgTokensPerSecond,
505
- upstreamStatus: result.upstreamStatus,
506
- upstreamErrorClass: result.upstreamErrorClass,
507
- capacityBlockedUntil: result.capacityBlockedUntil
508
- });
509
- logger.info("prewarm.succeeded", "seller probe succeeded", {
510
- taskId: task.id,
511
- sellerId: seller.id,
512
- modelId: task.modelId,
513
- latencyMs: result.latencyMs,
514
- httpStatus: result.httpStatus,
515
- healthScore,
516
- upstreamStatus: result.upstreamStatus,
517
- upstreamErrorClass: result.upstreamErrorClass,
518
- ttftMs: result.ttftMs,
519
- avgInferenceMs: result.avgInferenceMs,
520
- avgTokensPerSecond: result.avgTokensPerSecond
521
- });
522
- } else {
523
- candidates.push({
524
- sellerId: seller.id,
525
- url: seller.url,
526
- healthScore: 0,
527
- lastSuccessAt: 0,
528
- lastFailAt: this.now(),
529
- avgLatencyMs: result.latencyMs,
530
- healthProbeLatencyMs: result.latencyMs,
531
- ttftMs: result.ttftMs,
532
- avgInferenceMs: result.avgInferenceMs,
533
- avgTokensPerSecond: result.avgTokensPerSecond,
534
- upstreamStatus: result.upstreamStatus,
535
- upstreamErrorClass: result.upstreamErrorClass,
536
- capacityBlockedUntil: result.capacityBlockedUntil
537
- });
538
- logger.warn("prewarm.failed", "seller probe failed", {
539
- taskId: task.id,
540
- sellerId: seller.id,
541
- modelId: task.modelId,
542
- errorMessage: result.errorMessage,
543
- httpStatus: result.httpStatus,
544
- upstreamStatus: result.upstreamStatus,
545
- upstreamErrorClass: result.upstreamErrorClass
546
- });
547
- }
548
- }
549
-
550
- if (!probedAny) {
551
- // Every seller was rate-limited; the task is a no-op. Preserve the
552
- // prior cache entry untouched and report the task as a no-op success
553
- // so callers do not see a transient failure.
554
- task.status = "succeeded";
555
- task.completedAt = this.now();
556
- this.totalSucceeded += 1;
557
- this.inFlight -= 1;
558
- this.dispatch().catch(() => undefined);
559
- return;
560
- }
561
-
562
- // Filter out hard failures so the cache only stores reachable sellers.
563
- const viable = candidates.filter((c) => c.healthScore > 0);
564
- this.cache.commitWarm({
565
- modelId: task.modelId,
566
- protocol: task.protocol,
567
- paymentMethod: task.paymentMethod,
568
- candidates: viable
569
- });
570
-
571
- if (!anyOk) {
572
- // No seller responded; record failure for the (model, protocol, payment)
573
- // entry so the scheduler can back off.
574
- this.cache.recordFailure(task.modelId, task.protocol, task.paymentMethod, "all probes failed");
575
- task.status = "failed";
576
- task.errorMessage = "all probes failed";
577
- this.totalFailed += 1;
578
- } else {
579
- task.status = "succeeded";
580
- this.totalSucceeded += 1;
581
- }
582
- task.completedAt = this.now();
583
-
584
- // Free a slot and keep dispatching.
585
- this.inFlight -= 1;
586
- this.dispatch().catch(() => undefined);
587
- }
588
-
589
- private isOverBudget(): boolean {
590
- return this.recentProbesInLastMinute() >= this.maxPrewarmPerMinute;
591
- }
592
-
593
- private recentProbesInLastMinute(): number {
594
- const cutoff = this.now() - 60_000;
595
- while (this.recentProbes.length > 0 && this.recentProbes[0] < cutoff) {
596
- this.recentProbes.shift();
597
- }
598
- return this.recentProbes.length;
599
- }
600
-
601
- private recordProbeAttempt(): void {
602
- this.recentProbes.push(this.now());
603
- }
604
-
605
- private isSellerRateLimited(sellerId: string): boolean {
606
- const last = this.lastProbeAtBySeller.get(sellerId);
607
- if (last === undefined) {
608
- return false;
609
- }
610
- return this.now() - last < this.perSellerMinIntervalMs;
611
- }
612
- }
613
-
614
- function defaultSleep(ms: number, signal?: AbortSignal): Promise<void> {
615
- return new Promise<void>((resolve, reject) => {
616
- if (signal?.aborted) {
617
- reject(new Error("aborted"));
618
- return;
619
- }
620
- const timer = setTimeout(() => {
621
- cleanup();
622
- resolve();
623
- }, ms);
624
- const onAbort = () => {
625
- cleanup();
626
- reject(new Error("aborted"));
627
- };
628
- const cleanup = () => {
629
- clearTimeout(timer);
630
- signal?.removeEventListener("abort", onAbort);
631
- };
632
- signal?.addEventListener("abort", onAbort, { once: true });
633
- });
634
- }
635
-
636
- /**
637
- * Build a per-probe abort signal that mirrors the scheduler's overall abort
638
- * signal. Probers receive this scoped signal so aborting the scheduler
639
- * propagates into any in-flight HTTP request (typically wired through
640
- * `fetch(..., { signal })`).
641
- */
642
- function composeProbeSignal(parent: AbortSignal | undefined): { signal: AbortSignal; abort(reason?: unknown): void } {
643
- const controller = new AbortController();
644
- if (parent) {
645
- if (parent.aborted) {
646
- controller.abort(parent.reason);
647
- } else {
648
- parent.addEventListener("abort", () => controller.abort(parent.reason), { once: true });
649
- }
650
- }
651
- return controller;
652
- }
653
-
654
- function scoreFromLatency(latencyMs: number): number {
655
- if (!Number.isFinite(latencyMs) || latencyMs < 0) {
656
- return 30;
657
- }
658
- if (latencyMs <= 100) return 100;
659
- if (latencyMs <= 300) return 90;
660
- if (latencyMs <= 800) return 75;
661
- if (latencyMs <= 1500) return 60;
662
- if (latencyMs <= 3000) return 40;
663
- return 20;
664
- }
665
-
666
- function scoreProbeResult(result: ProbeResult): number {
667
- if (result.upstreamStatus === "unhealthy") {
668
- return 0;
669
- }
670
- const base = scoreFromLatency(result.latencyMs);
671
- if (result.upstreamStatus === "degraded") {
672
- return Math.min(base, 40);
673
- }
674
- return base;
675
- }