@tokenbuddy/tokenbuddy 1.0.36 → 1.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +7 -2
- package/dist/src/buyer-store.js +46 -7
- package/dist/src/cli.d.ts +1 -0
- package/dist/src/cli.js +15 -7
- package/dist/src/daemon.d.ts +12 -0
- package/dist/src/daemon.js +791 -61
- package/dist/src/doctor-diagnostics.js +1 -6
- package/dist/src/provider-install.d.ts +2 -2
- package/dist/src/provider-install.js +248 -2
- package/dist/src/seller-catalog.d.ts +21 -0
- package/dist/src/seller-catalog.js +17 -0
- package/dist/src/seller-route-planner.d.ts +4 -1
- package/dist/src/seller-route-planner.js +3 -0
- package/dist/src/seller-routing-strategy.d.ts +3 -0
- package/dist/src/terminal-detect.d.ts +1 -1
- package/dist/src/terminal-detect.js +3 -2
- package/dist/src/workdir.d.ts +10 -0
- package/dist/src/workdir.js +26 -0
- package/package.json +15 -2
- package/static/ui/assets/index-Djfl9tw5.js +271 -0
- package/static/ui/assets/index-DkfztCkn.css +1 -0
- package/static/ui/index.html +2 -2
- package/dist/src/buyer-store.d.ts.map +0 -1
- package/dist/src/buyer-store.js.map +0 -1
- package/dist/src/clawtip-bootstrap.d.ts.map +0 -1
- package/dist/src/clawtip-bootstrap.js.map +0 -1
- package/dist/src/cli.d.ts.map +0 -1
- package/dist/src/cli.js.map +0 -1
- package/dist/src/credit-tracker.d.ts.map +0 -1
- package/dist/src/credit-tracker.js.map +0 -1
- package/dist/src/daemon.d.ts.map +0 -1
- package/dist/src/daemon.js.map +0 -1
- package/dist/src/doctor-clawtip-wallet.d.ts.map +0 -1
- package/dist/src/doctor-clawtip-wallet.js.map +0 -1
- package/dist/src/doctor-diagnostics.d.ts.map +0 -1
- package/dist/src/doctor-diagnostics.js.map +0 -1
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js.map +0 -1
- package/dist/src/init-clawtip-activation.d.ts.map +0 -1
- package/dist/src/init-clawtip-activation.js.map +0 -1
- package/dist/src/init-payment-options.d.ts.map +0 -1
- package/dist/src/init-payment-options.js.map +0 -1
- package/dist/src/init-setup.d.ts.map +0 -1
- package/dist/src/init-setup.js.map +0 -1
- package/dist/src/model-index.d.ts.map +0 -1
- package/dist/src/model-index.js.map +0 -1
- package/dist/src/package-update.d.ts.map +0 -1
- package/dist/src/package-update.js.map +0 -1
- package/dist/src/prewarm-cache.d.ts.map +0 -1
- package/dist/src/prewarm-cache.js.map +0 -1
- package/dist/src/prewarm-scheduler.d.ts.map +0 -1
- package/dist/src/prewarm-scheduler.js.map +0 -1
- package/dist/src/provider-install.d.ts.map +0 -1
- package/dist/src/provider-install.js.map +0 -1
- package/dist/src/provider-routing-config.d.ts.map +0 -1
- package/dist/src/provider-routing-config.js.map +0 -1
- package/dist/src/registry-trust.d.ts.map +0 -1
- package/dist/src/registry-trust.js.map +0 -1
- package/dist/src/route-failover.d.ts.map +0 -1
- package/dist/src/route-failover.js.map +0 -1
- package/dist/src/seller-catalog.d.ts.map +0 -1
- package/dist/src/seller-catalog.js.map +0 -1
- package/dist/src/seller-concurrency-limiter.d.ts.map +0 -1
- package/dist/src/seller-concurrency-limiter.js.map +0 -1
- package/dist/src/seller-metadata-cache.d.ts.map +0 -1
- package/dist/src/seller-metadata-cache.js.map +0 -1
- package/dist/src/seller-pool.d.ts.map +0 -1
- package/dist/src/seller-pool.js.map +0 -1
- package/dist/src/seller-route-planner.d.ts.map +0 -1
- package/dist/src/seller-route-planner.js.map +0 -1
- package/dist/src/seller-routing-config.d.ts.map +0 -1
- package/dist/src/seller-routing-config.js.map +0 -1
- package/dist/src/seller-routing-strategy.d.ts.map +0 -1
- package/dist/src/seller-routing-strategy.js.map +0 -1
- package/dist/src/stream-failover.d.ts.map +0 -1
- package/dist/src/stream-failover.js.map +0 -1
- package/dist/src/tb-clawtip-proof.d.ts.map +0 -1
- package/dist/src/tb-clawtip-proof.js.map +0 -1
- package/dist/src/tb-proxyd.d.ts.map +0 -1
- package/dist/src/tb-proxyd.js.map +0 -1
- package/dist/src/terminal-detect.d.ts.map +0 -1
- package/dist/src/terminal-detect.js.map +0 -1
- package/dist/src/terminal-image.d.ts.map +0 -1
- package/dist/src/terminal-image.js.map +0 -1
- package/src/buyer-store.ts +0 -1090
- package/src/clawtip-bootstrap.ts +0 -65
- package/src/cli.ts +0 -2243
- package/src/credit-tracker.ts +0 -295
- package/src/daemon.ts +0 -5475
- package/src/doctor-clawtip-wallet.ts +0 -95
- package/src/doctor-diagnostics.ts +0 -1026
- package/src/index.ts +0 -16
- package/src/init-clawtip-activation.ts +0 -695
- package/src/init-payment-options.ts +0 -373
- package/src/init-setup.ts +0 -165
- package/src/model-index.ts +0 -278
- package/src/package-update.ts +0 -311
- package/src/prewarm-cache.ts +0 -485
- package/src/prewarm-scheduler.ts +0 -675
- package/src/provider-install.ts +0 -1006
- package/src/provider-routing-config.ts +0 -410
- package/src/registry-trust.ts +0 -51
- package/src/route-failover.ts +0 -304
- package/src/seller-catalog.ts +0 -505
- package/src/seller-concurrency-limiter.ts +0 -161
- package/src/seller-metadata-cache.ts +0 -91
- package/src/seller-pool.ts +0 -557
- package/src/seller-route-planner.ts +0 -513
- package/src/seller-routing-config.ts +0 -211
- package/src/seller-routing-strategy.ts +0 -362
- package/src/stream-failover.ts +0 -152
- package/src/tb-clawtip-proof.ts +0 -28
- package/src/tb-proxyd.ts +0 -101
- package/src/terminal-detect.ts +0 -333
- package/src/terminal-image.ts +0 -228
- package/static/ui/assets/index-0MVXD7bH.css +0 -1
- package/static/ui/assets/index-BVbeDEwq.js +0 -271
- package/static/ui/assets/index-BVbeDEwq.js.map +0 -1
- package/tests/cli-routing.test.ts +0 -363
- package/tests/control-plane-ui-endpoints.test.ts +0 -1630
- package/tests/credit-tracker.test.ts +0 -165
- package/tests/daemon-413-fallback.test.ts +0 -92
- package/tests/daemon-classify.test.ts +0 -452
- package/tests/daemon-roles.test.ts +0 -92
- package/tests/daemon-trusted-registry-cache.test.ts +0 -132
- package/tests/e2e.test.ts +0 -366
- package/tests/image-generation-e2e.test.ts +0 -230
- package/tests/model-index.test.ts +0 -198
- package/tests/package-update.test.ts +0 -147
- package/tests/prewarm-cache.test.ts +0 -296
- package/tests/prewarm-scheduler.test.ts +0 -367
- package/tests/provider-routing-config.test.ts +0 -150
- package/tests/registry-trust.test.ts +0 -28
- package/tests/route-failover.test.ts +0 -222
- package/tests/seller-catalog-413.test.ts +0 -120
- package/tests/seller-catalog-utilities.test.ts +0 -124
- package/tests/seller-concurrency-limiter.test.ts +0 -83
- package/tests/seller-metadata-cache.test.ts +0 -89
- package/tests/seller-pool.test.ts +0 -365
- package/tests/seller-route-planner.test.ts +0 -312
- package/tests/seller-routing-config.test.ts +0 -124
- package/tests/seller-routing-strategy.test.ts +0 -167
- package/tests/stream-failover.test.ts +0 -52
- package/tests/thousand-seller.test.ts +0 -151
- package/tests/tokenbuddy.test.ts +0 -4043
- package/tsconfig.json +0 -8
package/src/prewarm-scheduler.ts
DELETED
|
@@ -1,675 +0,0 @@
|
|
|
1
|
-
import { createModuleLogger } from "@tokenbuddy/logging";
|
|
2
|
-
import type { RegistrySeller } from "./seller-catalog.js";
|
|
3
|
-
import type { ModelIndex } from "./model-index.js";
|
|
4
|
-
import type { PrewarmCache, PrewarmCandidate } from "./prewarm-cache.js";
|
|
5
|
-
|
|
6
|
-
const logger = createModuleLogger("tb-proxyd:prewarm-scheduler");
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* 触发 prewarm 的原因,用于调度器决定并发与日志分组。
|
|
10
|
-
* - `startup`:daemon 启动时的批量预热,会受 startup jitter 影响
|
|
11
|
-
* - `lazy`:用户首次请求某 (model, protocol, payment) 时触发的预热
|
|
12
|
-
* - `idle`:后台 idle tick 触发的刷新
|
|
13
|
-
* - `explicit`:`tb doctor --prewarm` 之类的显式触发
|
|
14
|
-
*/
|
|
15
|
-
export type PrewarmReason = "startup" | "lazy" | "idle" | "explicit";
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* 单次 health probe 的结果,由 `SellerProber` 返回。
|
|
19
|
-
* 调度器会基于 `ok` + `latencyMs` 推算 `healthScore`,并把 `upstreamStatus` 直接透传。
|
|
20
|
-
*/
|
|
21
|
-
export interface ProbeResult {
|
|
22
|
-
/** probe 是否成功(HTTP 2xx 且语义上代表"健康") */
|
|
23
|
-
ok: boolean;
|
|
24
|
-
/** probe 总耗时(毫秒),用于 healthScore 计算 */
|
|
25
|
-
latencyMs: number;
|
|
26
|
-
/** HTTP status(如果 prober 能拿到) */
|
|
27
|
-
httpStatus?: number;
|
|
28
|
-
/** 错误描述(仅当 `ok=false` 时存在,不携带敏感字段) */
|
|
29
|
-
errorMessage?: string;
|
|
30
|
-
/** 上游报告的状态,与 health probe 端点或 fallback 推断的语义对齐 */
|
|
31
|
-
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
32
|
-
/** 上游错误类名(status code / error code) */
|
|
33
|
-
upstreamErrorClass?: string;
|
|
34
|
-
/** 首 token 延迟(毫秒),可选;speed 排序时优先使用 */
|
|
35
|
-
ttftMs?: number;
|
|
36
|
-
/** 平均推理延迟(毫秒),可选 */
|
|
37
|
-
avgInferenceMs?: number;
|
|
38
|
-
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
39
|
-
avgTokensPerSecond?: number;
|
|
40
|
-
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
41
|
-
capacityBlockedUntil?: number;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
/**
|
|
45
|
-
* The probe function used by the scheduler. Decoupled so the scheduler can
|
|
46
|
-
* be unit-tested without spinning up HTTP servers. The default
|
|
47
|
-
* implementation in `daemon.ts` calls `GET <seller.url>/health` with a 3s
|
|
48
|
-
* timeout. Probers must
|
|
49
|
-
* observe the provided `AbortSignal` and reject when it aborts so the
|
|
50
|
-
* scheduler can short-circuit in-flight probes on `stop()`.
|
|
51
|
-
*/
|
|
52
|
-
export type SellerProber = (seller: RegistrySeller, signal: AbortSignal) => Promise<ProbeResult>;
|
|
53
|
-
|
|
54
|
-
/**
|
|
55
|
-
* 构造 `PrewarmScheduler` 所需的依赖与可调参数。默认值见设计文档
|
|
56
|
-
* buyer-driven-fallback-design.md §18.5-§18.6:并发 4、per-seller 30s、
|
|
57
|
-
* 全局 30/min、startup jitter 5-10s。
|
|
58
|
-
*/
|
|
59
|
-
export interface PrewarmSchedulerOptions {
|
|
60
|
-
/** 共享的 model index,用于把 modelId 解析为 seller 列表 */
|
|
61
|
-
modelIndex: ModelIndex;
|
|
62
|
-
/** 共享的 prewarm 缓存,调度器写入并由控制器读取 */
|
|
63
|
-
cache: PrewarmCache;
|
|
64
|
-
/** 注入的 health prober,调度器不直接发 HTTP */
|
|
65
|
-
prober: SellerProber;
|
|
66
|
-
// Limits (defaults match buyer-driven-fallback-design.md §18.6).
|
|
67
|
-
/** 并发上限,默认 4 */
|
|
68
|
-
concurrency?: number;
|
|
69
|
-
/** 同一 seller 两次 probe 之间的最小间隔(毫秒),默认 30000 */
|
|
70
|
-
perSellerMinIntervalMs?: number;
|
|
71
|
-
/** 每分钟最多 probe 多少次(全局节流),默认 30 */
|
|
72
|
-
maxPrewarmPerMinute?: number;
|
|
73
|
-
// Idle loop cadence; the scheduler can also be driven externally
|
|
74
|
-
// (PR-2.1 wires `tickIdle` into the existing registry-loop heartbeat).
|
|
75
|
-
/** idle 循环的间隔(毫秒),默认 60000 */
|
|
76
|
-
idleIntervalMs?: number;
|
|
77
|
-
// Startup jitter (5-10s by default per §18.5.1).
|
|
78
|
-
/** startup 抖动的下界(毫秒),默认 5000 */
|
|
79
|
-
startupJitterMinMs?: number;
|
|
80
|
-
/** startup 抖动的上界(毫秒),默认 10000 */
|
|
81
|
-
startupJitterMaxMs?: number;
|
|
82
|
-
// Hooks for testing; defaults to Node's setTimeout / setImmediate.
|
|
83
|
-
/** 可注入的 sleep(支持 abort),默认 Node setTimeout */
|
|
84
|
-
sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;
|
|
85
|
-
/** 可注入的随机源,默认 `Math.random` */
|
|
86
|
-
random?: () => number;
|
|
87
|
-
/** 可注入的时钟,默认 `Date.now` */
|
|
88
|
-
now?: () => number;
|
|
89
|
-
// Optional filter applied to every probe (e.g. preferred protocol).
|
|
90
|
-
/** 全局默认协议 filter,可被 `schedulePrewarm` 覆盖 */
|
|
91
|
-
protocol?: string;
|
|
92
|
-
/** 全局默认支付方式 filter,可被 `schedulePrewarm` 覆盖 */
|
|
93
|
-
paymentMethod?: string;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
interface PrewarmTask {
|
|
97
|
-
id: number;
|
|
98
|
-
modelId: string;
|
|
99
|
-
reason: PrewarmReason;
|
|
100
|
-
protocol: string;
|
|
101
|
-
paymentMethod: string;
|
|
102
|
-
enqueuedAt: number;
|
|
103
|
-
sellerIds: string[];
|
|
104
|
-
startedAt?: number;
|
|
105
|
-
completedAt?: number;
|
|
106
|
-
status: "queued" | "running" | "succeeded" | "failed" | "canceled" | "rate_limited";
|
|
107
|
-
errorMessage?: string;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
/**
|
|
111
|
-
* `PrewarmScheduler.stats()` 的返回:调度器当前的运行指标快照。
|
|
112
|
-
* `tb doctor` 据此判断调度是否过载、是否长期被限流。
|
|
113
|
-
*/
|
|
114
|
-
export interface PrewarmSchedulerStats {
|
|
115
|
-
/** 当前队列里尚未开始的 task 数 */
|
|
116
|
-
queueDepth: number;
|
|
117
|
-
/** 正在 probe 的 task 数 */
|
|
118
|
-
inFlight: number;
|
|
119
|
-
/** 累计入队的 task 数(包含 rate_limited) */
|
|
120
|
-
totalScheduled: number;
|
|
121
|
-
/** 累计成功的 task 数 */
|
|
122
|
-
totalSucceeded: number;
|
|
123
|
-
/** 累计失败的 task 数(所有候选 probe 都失败) */
|
|
124
|
-
totalFailed: number;
|
|
125
|
-
/** 累计因全局节流被跳过的 task 数 */
|
|
126
|
-
totalRateLimited: number;
|
|
127
|
-
/** 最近 60 秒内发起的 probe 总数(用于判断是否撞到 maxPrewarmPerMinute) */
|
|
128
|
-
recentProbesInLastMinute: number;
|
|
129
|
-
/** 当前配置的并发上限 */
|
|
130
|
-
concurrency: number;
|
|
131
|
-
/** 当前配置的每分钟 probe 上限 */
|
|
132
|
-
maxPrewarmPerMinute: number;
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
/**
|
|
136
|
-
* Background scheduler that warms up sellers for a (model, protocol,
|
|
137
|
-
* payment) triple on demand. The scheduler owns:
|
|
138
|
-
* - queue management with bounded concurrency (default 4)
|
|
139
|
-
* - per-seller rate limiting (default 30s between probes to the same
|
|
140
|
-
* seller, even across different models)
|
|
141
|
-
* - global rate limiting (default 30 probes/minute)
|
|
142
|
-
* - jitter on startup and between probes to avoid thundering herds
|
|
143
|
-
*
|
|
144
|
-
* The scheduler does NOT own HTTP I/O; that lives in the injected
|
|
145
|
-
* `prober` so tests can swap in a deterministic stub.
|
|
146
|
-
*/
|
|
147
|
-
export class PrewarmScheduler {
|
|
148
|
-
private readonly modelIndex: ModelIndex;
|
|
149
|
-
private readonly cache: PrewarmCache;
|
|
150
|
-
private readonly prober: SellerProber;
|
|
151
|
-
|
|
152
|
-
private readonly concurrency: number;
|
|
153
|
-
private readonly perSellerMinIntervalMs: number;
|
|
154
|
-
private readonly maxPrewarmPerMinute: number;
|
|
155
|
-
private readonly idleIntervalMs: number;
|
|
156
|
-
private readonly startupJitterMinMs: number;
|
|
157
|
-
private readonly startupJitterMaxMs: number;
|
|
158
|
-
private readonly sleep: (ms: number, signal?: AbortSignal) => Promise<void>;
|
|
159
|
-
private readonly random: () => number;
|
|
160
|
-
private readonly now: () => number;
|
|
161
|
-
private readonly protocol: string | undefined;
|
|
162
|
-
private readonly paymentMethod: string | undefined;
|
|
163
|
-
|
|
164
|
-
private readonly queue: PrewarmTask[] = [];
|
|
165
|
-
private inFlight = 0;
|
|
166
|
-
private recentProbes: number[] = [];
|
|
167
|
-
private lastProbeAtBySeller = new Map<string, number>();
|
|
168
|
-
private nextTaskId = 1;
|
|
169
|
-
|
|
170
|
-
private totalScheduled = 0;
|
|
171
|
-
private totalSucceeded = 0;
|
|
172
|
-
private totalFailed = 0;
|
|
173
|
-
private totalRateLimited = 0;
|
|
174
|
-
|
|
175
|
-
private abortController: AbortController | null = null;
|
|
176
|
-
private idleLoopPromise: Promise<void> | null = null;
|
|
177
|
-
|
|
178
|
-
constructor(options: PrewarmSchedulerOptions) {
|
|
179
|
-
this.modelIndex = options.modelIndex;
|
|
180
|
-
this.cache = options.cache;
|
|
181
|
-
this.prober = options.prober;
|
|
182
|
-
this.concurrency = options.concurrency ?? 4;
|
|
183
|
-
this.perSellerMinIntervalMs = options.perSellerMinIntervalMs ?? 30_000;
|
|
184
|
-
this.maxPrewarmPerMinute = options.maxPrewarmPerMinute ?? 30;
|
|
185
|
-
this.idleIntervalMs = options.idleIntervalMs ?? 60_000;
|
|
186
|
-
this.startupJitterMinMs = options.startupJitterMinMs ?? 5_000;
|
|
187
|
-
this.startupJitterMaxMs = options.startupJitterMaxMs ?? 10_000;
|
|
188
|
-
this.sleep = options.sleep ?? defaultSleep;
|
|
189
|
-
this.random = options.random ?? Math.random;
|
|
190
|
-
this.now = options.now ?? Date.now;
|
|
191
|
-
this.protocol = options.protocol;
|
|
192
|
-
this.paymentMethod = options.paymentMethod;
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* Start the background idle loop. Safe to call once per scheduler
|
|
197
|
-
* instance; subsequent calls are no-ops. The idle loop probes any cached
|
|
198
|
-
* entry whose TTL is within 10% of expiry (`isExpiringSoon`).
|
|
199
|
-
*/
|
|
200
|
-
start(): void {
|
|
201
|
-
if (this.abortController) {
|
|
202
|
-
return;
|
|
203
|
-
}
|
|
204
|
-
this.abortController = new AbortController();
|
|
205
|
-
this.idleLoopPromise = this.runIdleLoop(this.abortController.signal);
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
/**
|
|
209
|
-
* Cancel the idle loop and any pending tasks. Existing `inFlight` probes
|
|
210
|
-
* are not aborted (the prober owns its own timeout) but will not be
|
|
211
|
-
* dispatched to the cache.
|
|
212
|
-
*/
|
|
213
|
-
async stop(): Promise<void> {
|
|
214
|
-
if (!this.abortController) {
|
|
215
|
-
return;
|
|
216
|
-
}
|
|
217
|
-
this.abortController.abort();
|
|
218
|
-
this.abortController = null;
|
|
219
|
-
if (this.idleLoopPromise) {
|
|
220
|
-
await this.idleLoopPromise.catch(() => undefined);
|
|
221
|
-
this.idleLoopPromise = null;
|
|
222
|
-
}
|
|
223
|
-
// Mark queued tasks as canceled so callers awaiting them can short-circuit.
|
|
224
|
-
for (const task of this.queue) {
|
|
225
|
-
task.status = "canceled";
|
|
226
|
-
}
|
|
227
|
-
this.queue.length = 0;
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
/**
|
|
231
|
-
* Enqueue a prewarm for a (model, protocol, payment) triple. The
|
|
232
|
-
* `reason` controls how aggressively the scheduler resolves candidates
|
|
233
|
-
* (e.g. `startup` defers; `lazy` waits on the returned promise). The
|
|
234
|
-
* returned promise resolves with the final task status once the queue
|
|
235
|
-
* drains or the scheduler is stopped.
|
|
236
|
-
*/
|
|
237
|
-
schedulePrewarm(input: {
|
|
238
|
-
modelId: string;
|
|
239
|
-
reason: PrewarmReason;
|
|
240
|
-
protocol?: string;
|
|
241
|
-
paymentMethod?: string;
|
|
242
|
-
blockOnFirst?: boolean;
|
|
243
|
-
}): Promise<PrewarmTask> {
|
|
244
|
-
const protocol = input.protocol ?? this.protocol ?? "chat_completions";
|
|
245
|
-
const paymentMethod = input.paymentMethod ?? this.paymentMethod ?? "clawtip";
|
|
246
|
-
const task: PrewarmTask = {
|
|
247
|
-
id: this.nextTaskId++,
|
|
248
|
-
modelId: input.modelId,
|
|
249
|
-
reason: input.reason,
|
|
250
|
-
protocol,
|
|
251
|
-
paymentMethod,
|
|
252
|
-
enqueuedAt: this.now(),
|
|
253
|
-
sellerIds: [],
|
|
254
|
-
status: "queued"
|
|
255
|
-
};
|
|
256
|
-
this.queue.push(task);
|
|
257
|
-
this.totalScheduled += 1;
|
|
258
|
-
logger.info("prewarm.scheduled", "prewarm task enqueued", {
|
|
259
|
-
taskId: task.id,
|
|
260
|
-
modelId: task.modelId,
|
|
261
|
-
reason: task.reason,
|
|
262
|
-
protocol,
|
|
263
|
-
paymentMethod,
|
|
264
|
-
queueDepth: this.queue.length
|
|
265
|
-
});
|
|
266
|
-
|
|
267
|
-
// Fire-and-forget dispatch; the awaiter observes `task.status` via
|
|
268
|
-
// `taskResolved` rather than blocking the queue.
|
|
269
|
-
this.dispatch().catch((err) => {
|
|
270
|
-
logger.error("prewarm.dispatch.unexpected", "dispatcher threw unexpectedly", {
|
|
271
|
-
errorMessage: err instanceof Error ? err.message : String(err)
|
|
272
|
-
});
|
|
273
|
-
});
|
|
274
|
-
|
|
275
|
-
return new Promise<PrewarmTask>((resolve) => {
|
|
276
|
-
const check = () => {
|
|
277
|
-
if (task.status === "succeeded" || task.status === "failed" || task.status === "canceled" || task.status === "rate_limited") {
|
|
278
|
-
resolve(task);
|
|
279
|
-
} else {
|
|
280
|
-
setImmediate(check);
|
|
281
|
-
}
|
|
282
|
-
};
|
|
283
|
-
check();
|
|
284
|
-
});
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
/**
|
|
288
|
-
* Run a one-shot sweep that probes every focus-set model. Used by the
|
|
289
|
-
* `tb doctor --prewarm` explicit trigger and by the startup hook after
|
|
290
|
-
* the configured jitter window. Resolves once every scheduled task has
|
|
291
|
-
* reached a terminal state.
|
|
292
|
-
*/
|
|
293
|
-
async runStartupPrewarm(inputs: Array<string | { modelId: string; protocol?: string; paymentMethod?: string }>): Promise<void> {
|
|
294
|
-
await this.sleep(this.jitterMs(), this.abortController?.signal);
|
|
295
|
-
if (this.abortController?.signal.aborted) {
|
|
296
|
-
return;
|
|
297
|
-
}
|
|
298
|
-
const tasks = inputs.map((input) => {
|
|
299
|
-
const task = typeof input === "string" ? { modelId: input } : input;
|
|
300
|
-
return this.schedulePrewarm({ ...task, reason: "startup" });
|
|
301
|
-
});
|
|
302
|
-
await Promise.all(tasks);
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
/**
|
|
306
|
-
* Force a sweep of any cache key whose TTL is about to expire. Returns
|
|
307
|
-
* the number of tasks that were enqueued. Intended to be called from
|
|
308
|
-
* the registry loop's heartbeat (replaces the v1 "all sellers" probe
|
|
309
|
-
* cycle with "only the ones we are about to forget").
|
|
310
|
-
*/
|
|
311
|
-
tickIdle(): number {
|
|
312
|
-
const expiring = this.cache
|
|
313
|
-
.snapshot()
|
|
314
|
-
.filter((entry) => this.cache.isExpiringSoon(entry.modelId, entry.protocol, entry.paymentMethod, 60_000));
|
|
315
|
-
if (expiring.length === 0) {
|
|
316
|
-
return 0;
|
|
317
|
-
}
|
|
318
|
-
for (const entry of expiring) {
|
|
319
|
-
this.schedulePrewarm({
|
|
320
|
-
modelId: entry.modelId,
|
|
321
|
-
protocol: entry.protocol,
|
|
322
|
-
paymentMethod: entry.paymentMethod,
|
|
323
|
-
reason: "idle"
|
|
324
|
-
});
|
|
325
|
-
}
|
|
326
|
-
return expiring.length;
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
stats(): PrewarmSchedulerStats {
|
|
330
|
-
const now = this.now();
|
|
331
|
-
const cutoff = now - 60_000;
|
|
332
|
-
const recentProbesInLastMinute = this.recentProbes.filter((ts) => ts >= cutoff).length;
|
|
333
|
-
return {
|
|
334
|
-
queueDepth: this.queue.length,
|
|
335
|
-
inFlight: this.inFlight,
|
|
336
|
-
totalScheduled: this.totalScheduled,
|
|
337
|
-
totalSucceeded: this.totalSucceeded,
|
|
338
|
-
totalFailed: this.totalFailed,
|
|
339
|
-
totalRateLimited: this.totalRateLimited,
|
|
340
|
-
recentProbesInLastMinute,
|
|
341
|
-
concurrency: this.concurrency,
|
|
342
|
-
maxPrewarmPerMinute: this.maxPrewarmPerMinute
|
|
343
|
-
};
|
|
344
|
-
}
|
|
345
|
-
|
|
346
|
-
private jitterMs(): number {
|
|
347
|
-
const span = Math.max(0, this.startupJitterMaxMs - this.startupJitterMinMs);
|
|
348
|
-
return this.startupJitterMinMs + Math.floor(this.random() * span);
|
|
349
|
-
}
|
|
350
|
-
|
|
351
|
-
private async runIdleLoop(signal: AbortSignal): Promise<void> {
|
|
352
|
-
while (!signal.aborted) {
|
|
353
|
-
try {
|
|
354
|
-
await this.sleep(this.idleIntervalMs, signal);
|
|
355
|
-
} catch {
|
|
356
|
-
return;
|
|
357
|
-
}
|
|
358
|
-
if (signal.aborted) {
|
|
359
|
-
return;
|
|
360
|
-
}
|
|
361
|
-
try {
|
|
362
|
-
this.tickIdle();
|
|
363
|
-
} catch (err) {
|
|
364
|
-
logger.error("prewarm.idle.failed", "idle tick threw unexpectedly", {
|
|
365
|
-
errorMessage: err instanceof Error ? err.message : String(err)
|
|
366
|
-
});
|
|
367
|
-
}
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
|
|
371
|
-
private async dispatch(): Promise<void> {
|
|
372
|
-
while (this.queue.length > 0 && this.inFlight < this.concurrency) {
|
|
373
|
-
const task = this.queue.shift();
|
|
374
|
-
if (!task) {
|
|
375
|
-
break;
|
|
376
|
-
}
|
|
377
|
-
if (task.status === "canceled") {
|
|
378
|
-
continue;
|
|
379
|
-
}
|
|
380
|
-
if (this.isOverBudget()) {
|
|
381
|
-
task.status = "rate_limited";
|
|
382
|
-
this.totalRateLimited += 1;
|
|
383
|
-
logger.warn("prewarm.rate_limited", "global per-minute probe budget exhausted", {
|
|
384
|
-
taskId: task.id,
|
|
385
|
-
modelId: task.modelId,
|
|
386
|
-
recentProbes: this.recentProbesInLastMinute()
|
|
387
|
-
});
|
|
388
|
-
continue;
|
|
389
|
-
}
|
|
390
|
-
const sellers = this.modelIndex.sellersFor(task.modelId, {
|
|
391
|
-
protocol: task.protocol,
|
|
392
|
-
paymentMethod: task.paymentMethod
|
|
393
|
-
});
|
|
394
|
-
task.sellerIds = sellers.map((s) => s.id);
|
|
395
|
-
if (sellers.length === 0) {
|
|
396
|
-
task.status = "failed";
|
|
397
|
-
task.errorMessage = "no sellers for model";
|
|
398
|
-
task.completedAt = this.now();
|
|
399
|
-
this.totalFailed += 1;
|
|
400
|
-
logger.warn("prewarm.no_sellers", "no registry sellers match model", {
|
|
401
|
-
taskId: task.id,
|
|
402
|
-
modelId: task.modelId,
|
|
403
|
-
protocol: task.protocol,
|
|
404
|
-
paymentMethod: task.paymentMethod
|
|
405
|
-
});
|
|
406
|
-
continue;
|
|
407
|
-
}
|
|
408
|
-
// Ensure an abort controller exists so `stop()` works even when the
|
|
409
|
-
// caller never invoked `start()`. `start()` is otherwise responsible
|
|
410
|
-
// for the idle loop; dispatch only borrows the controller for
|
|
411
|
-
// short-lived abort propagation.
|
|
412
|
-
if (!this.abortController) {
|
|
413
|
-
this.abortController = new AbortController();
|
|
414
|
-
}
|
|
415
|
-
this.inFlight += 1;
|
|
416
|
-
task.status = "running";
|
|
417
|
-
task.startedAt = this.now();
|
|
418
|
-
// Capture the abort signal so an in-flight task can still observe
|
|
419
|
-
// `stop()` even after the controller reference is cleared.
|
|
420
|
-
const signal = this.abortController.signal;
|
|
421
|
-
// Run async without awaiting; the loop continues to dispatch.
|
|
422
|
-
void this.runTask(task, sellers, signal);
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
private async runTask(task: PrewarmTask, sellers: RegistrySeller[], signal: AbortSignal | undefined): Promise<void> {
|
|
427
|
-
const begin = this.cache.beginWarming(task.modelId, task.protocol, task.paymentMethod);
|
|
428
|
-
const candidates: PrewarmCandidate[] = [];
|
|
429
|
-
let anyOk = false;
|
|
430
|
-
let probedAny = false;
|
|
431
|
-
|
|
432
|
-
for (const seller of sellers) {
|
|
433
|
-
if (signal?.aborted) {
|
|
434
|
-
task.status = "canceled";
|
|
435
|
-
task.completedAt = this.now();
|
|
436
|
-
this.inFlight -= 1;
|
|
437
|
-
this.dispatch().catch(() => undefined);
|
|
438
|
-
return;
|
|
439
|
-
}
|
|
440
|
-
if (this.isSellerRateLimited(seller.id)) {
|
|
441
|
-
logger.debug("prewarm.seller_skipped", "seller probe skipped due to per-seller rate limit", {
|
|
442
|
-
taskId: task.id,
|
|
443
|
-
sellerId: seller.id
|
|
444
|
-
});
|
|
445
|
-
continue;
|
|
446
|
-
}
|
|
447
|
-
probedAny = true;
|
|
448
|
-
this.recordProbeAttempt();
|
|
449
|
-
const probeSignal = composeProbeSignal(signal);
|
|
450
|
-
let result: ProbeResult;
|
|
451
|
-
try {
|
|
452
|
-
result = await this.prober(seller, probeSignal.signal);
|
|
453
|
-
} catch (err) {
|
|
454
|
-
// Prober rejected (typically because of `stop()` aborting the probe
|
|
455
|
-
// signal). Treat the rejection as a canceled run and exit early.
|
|
456
|
-
if (signal?.aborted) {
|
|
457
|
-
task.status = "canceled";
|
|
458
|
-
task.errorMessage = err instanceof Error ? err.message : String(err);
|
|
459
|
-
task.completedAt = this.now();
|
|
460
|
-
this.inFlight -= 1;
|
|
461
|
-
this.dispatch().catch(() => undefined);
|
|
462
|
-
return;
|
|
463
|
-
}
|
|
464
|
-
// An unexpected prober error is recorded as a per-seller failure
|
|
465
|
-
// and the loop continues with the next seller.
|
|
466
|
-
logger.error("prewarm.probe.threw", "seller prober threw unexpectedly", {
|
|
467
|
-
taskId: task.id,
|
|
468
|
-
sellerId: seller.id,
|
|
469
|
-
modelId: task.modelId,
|
|
470
|
-
errorMessage: err instanceof Error ? err.message : String(err)
|
|
471
|
-
});
|
|
472
|
-
candidates.push({
|
|
473
|
-
sellerId: seller.id,
|
|
474
|
-
url: seller.url,
|
|
475
|
-
healthScore: 0,
|
|
476
|
-
lastSuccessAt: 0,
|
|
477
|
-
lastFailAt: this.now(),
|
|
478
|
-
avgLatencyMs: 0
|
|
479
|
-
});
|
|
480
|
-
continue;
|
|
481
|
-
}
|
|
482
|
-
if (signal?.aborted) {
|
|
483
|
-
task.status = "canceled";
|
|
484
|
-
task.completedAt = this.now();
|
|
485
|
-
this.lastProbeAtBySeller.set(seller.id, this.now());
|
|
486
|
-
this.inFlight -= 1;
|
|
487
|
-
this.dispatch().catch(() => undefined);
|
|
488
|
-
return;
|
|
489
|
-
}
|
|
490
|
-
this.lastProbeAtBySeller.set(seller.id, this.now());
|
|
491
|
-
if (result.ok) {
|
|
492
|
-
const healthScore = scoreProbeResult(result);
|
|
493
|
-
anyOk = anyOk || healthScore > 0;
|
|
494
|
-
candidates.push({
|
|
495
|
-
sellerId: seller.id,
|
|
496
|
-
url: seller.url,
|
|
497
|
-
healthScore,
|
|
498
|
-
lastSuccessAt: this.now(),
|
|
499
|
-
lastFailAt: 0,
|
|
500
|
-
avgLatencyMs: result.latencyMs,
|
|
501
|
-
healthProbeLatencyMs: result.latencyMs,
|
|
502
|
-
ttftMs: result.ttftMs,
|
|
503
|
-
avgInferenceMs: result.avgInferenceMs,
|
|
504
|
-
avgTokensPerSecond: result.avgTokensPerSecond,
|
|
505
|
-
upstreamStatus: result.upstreamStatus,
|
|
506
|
-
upstreamErrorClass: result.upstreamErrorClass,
|
|
507
|
-
capacityBlockedUntil: result.capacityBlockedUntil
|
|
508
|
-
});
|
|
509
|
-
logger.info("prewarm.succeeded", "seller probe succeeded", {
|
|
510
|
-
taskId: task.id,
|
|
511
|
-
sellerId: seller.id,
|
|
512
|
-
modelId: task.modelId,
|
|
513
|
-
latencyMs: result.latencyMs,
|
|
514
|
-
httpStatus: result.httpStatus,
|
|
515
|
-
healthScore,
|
|
516
|
-
upstreamStatus: result.upstreamStatus,
|
|
517
|
-
upstreamErrorClass: result.upstreamErrorClass,
|
|
518
|
-
ttftMs: result.ttftMs,
|
|
519
|
-
avgInferenceMs: result.avgInferenceMs,
|
|
520
|
-
avgTokensPerSecond: result.avgTokensPerSecond
|
|
521
|
-
});
|
|
522
|
-
} else {
|
|
523
|
-
candidates.push({
|
|
524
|
-
sellerId: seller.id,
|
|
525
|
-
url: seller.url,
|
|
526
|
-
healthScore: 0,
|
|
527
|
-
lastSuccessAt: 0,
|
|
528
|
-
lastFailAt: this.now(),
|
|
529
|
-
avgLatencyMs: result.latencyMs,
|
|
530
|
-
healthProbeLatencyMs: result.latencyMs,
|
|
531
|
-
ttftMs: result.ttftMs,
|
|
532
|
-
avgInferenceMs: result.avgInferenceMs,
|
|
533
|
-
avgTokensPerSecond: result.avgTokensPerSecond,
|
|
534
|
-
upstreamStatus: result.upstreamStatus,
|
|
535
|
-
upstreamErrorClass: result.upstreamErrorClass,
|
|
536
|
-
capacityBlockedUntil: result.capacityBlockedUntil
|
|
537
|
-
});
|
|
538
|
-
logger.warn("prewarm.failed", "seller probe failed", {
|
|
539
|
-
taskId: task.id,
|
|
540
|
-
sellerId: seller.id,
|
|
541
|
-
modelId: task.modelId,
|
|
542
|
-
errorMessage: result.errorMessage,
|
|
543
|
-
httpStatus: result.httpStatus,
|
|
544
|
-
upstreamStatus: result.upstreamStatus,
|
|
545
|
-
upstreamErrorClass: result.upstreamErrorClass
|
|
546
|
-
});
|
|
547
|
-
}
|
|
548
|
-
}
|
|
549
|
-
|
|
550
|
-
if (!probedAny) {
|
|
551
|
-
// Every seller was rate-limited; the task is a no-op. Preserve the
|
|
552
|
-
// prior cache entry untouched and report the task as a no-op success
|
|
553
|
-
// so callers do not see a transient failure.
|
|
554
|
-
task.status = "succeeded";
|
|
555
|
-
task.completedAt = this.now();
|
|
556
|
-
this.totalSucceeded += 1;
|
|
557
|
-
this.inFlight -= 1;
|
|
558
|
-
this.dispatch().catch(() => undefined);
|
|
559
|
-
return;
|
|
560
|
-
}
|
|
561
|
-
|
|
562
|
-
// Filter out hard failures so the cache only stores reachable sellers.
|
|
563
|
-
const viable = candidates.filter((c) => c.healthScore > 0);
|
|
564
|
-
this.cache.commitWarm({
|
|
565
|
-
modelId: task.modelId,
|
|
566
|
-
protocol: task.protocol,
|
|
567
|
-
paymentMethod: task.paymentMethod,
|
|
568
|
-
candidates: viable
|
|
569
|
-
});
|
|
570
|
-
|
|
571
|
-
if (!anyOk) {
|
|
572
|
-
// No seller responded; record failure for the (model, protocol, payment)
|
|
573
|
-
// entry so the scheduler can back off.
|
|
574
|
-
this.cache.recordFailure(task.modelId, task.protocol, task.paymentMethod, "all probes failed");
|
|
575
|
-
task.status = "failed";
|
|
576
|
-
task.errorMessage = "all probes failed";
|
|
577
|
-
this.totalFailed += 1;
|
|
578
|
-
} else {
|
|
579
|
-
task.status = "succeeded";
|
|
580
|
-
this.totalSucceeded += 1;
|
|
581
|
-
}
|
|
582
|
-
task.completedAt = this.now();
|
|
583
|
-
|
|
584
|
-
// Free a slot and keep dispatching.
|
|
585
|
-
this.inFlight -= 1;
|
|
586
|
-
this.dispatch().catch(() => undefined);
|
|
587
|
-
}
|
|
588
|
-
|
|
589
|
-
private isOverBudget(): boolean {
|
|
590
|
-
return this.recentProbesInLastMinute() >= this.maxPrewarmPerMinute;
|
|
591
|
-
}
|
|
592
|
-
|
|
593
|
-
private recentProbesInLastMinute(): number {
|
|
594
|
-
const cutoff = this.now() - 60_000;
|
|
595
|
-
while (this.recentProbes.length > 0 && this.recentProbes[0] < cutoff) {
|
|
596
|
-
this.recentProbes.shift();
|
|
597
|
-
}
|
|
598
|
-
return this.recentProbes.length;
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
private recordProbeAttempt(): void {
|
|
602
|
-
this.recentProbes.push(this.now());
|
|
603
|
-
}
|
|
604
|
-
|
|
605
|
-
private isSellerRateLimited(sellerId: string): boolean {
|
|
606
|
-
const last = this.lastProbeAtBySeller.get(sellerId);
|
|
607
|
-
if (last === undefined) {
|
|
608
|
-
return false;
|
|
609
|
-
}
|
|
610
|
-
return this.now() - last < this.perSellerMinIntervalMs;
|
|
611
|
-
}
|
|
612
|
-
}
|
|
613
|
-
|
|
614
|
-
function defaultSleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
615
|
-
return new Promise<void>((resolve, reject) => {
|
|
616
|
-
if (signal?.aborted) {
|
|
617
|
-
reject(new Error("aborted"));
|
|
618
|
-
return;
|
|
619
|
-
}
|
|
620
|
-
const timer = setTimeout(() => {
|
|
621
|
-
cleanup();
|
|
622
|
-
resolve();
|
|
623
|
-
}, ms);
|
|
624
|
-
const onAbort = () => {
|
|
625
|
-
cleanup();
|
|
626
|
-
reject(new Error("aborted"));
|
|
627
|
-
};
|
|
628
|
-
const cleanup = () => {
|
|
629
|
-
clearTimeout(timer);
|
|
630
|
-
signal?.removeEventListener("abort", onAbort);
|
|
631
|
-
};
|
|
632
|
-
signal?.addEventListener("abort", onAbort, { once: true });
|
|
633
|
-
});
|
|
634
|
-
}
|
|
635
|
-
|
|
636
|
-
/**
|
|
637
|
-
* Build a per-probe abort signal that mirrors the scheduler's overall abort
|
|
638
|
-
* signal. Probers receive this scoped signal so aborting the scheduler
|
|
639
|
-
* propagates into any in-flight HTTP request (typically wired through
|
|
640
|
-
* `fetch(..., { signal })`).
|
|
641
|
-
*/
|
|
642
|
-
function composeProbeSignal(parent: AbortSignal | undefined): { signal: AbortSignal; abort(reason?: unknown): void } {
|
|
643
|
-
const controller = new AbortController();
|
|
644
|
-
if (parent) {
|
|
645
|
-
if (parent.aborted) {
|
|
646
|
-
controller.abort(parent.reason);
|
|
647
|
-
} else {
|
|
648
|
-
parent.addEventListener("abort", () => controller.abort(parent.reason), { once: true });
|
|
649
|
-
}
|
|
650
|
-
}
|
|
651
|
-
return controller;
|
|
652
|
-
}
|
|
653
|
-
|
|
654
|
-
function scoreFromLatency(latencyMs: number): number {
|
|
655
|
-
if (!Number.isFinite(latencyMs) || latencyMs < 0) {
|
|
656
|
-
return 30;
|
|
657
|
-
}
|
|
658
|
-
if (latencyMs <= 100) return 100;
|
|
659
|
-
if (latencyMs <= 300) return 90;
|
|
660
|
-
if (latencyMs <= 800) return 75;
|
|
661
|
-
if (latencyMs <= 1500) return 60;
|
|
662
|
-
if (latencyMs <= 3000) return 40;
|
|
663
|
-
return 20;
|
|
664
|
-
}
|
|
665
|
-
|
|
666
|
-
function scoreProbeResult(result: ProbeResult): number {
|
|
667
|
-
if (result.upstreamStatus === "unhealthy") {
|
|
668
|
-
return 0;
|
|
669
|
-
}
|
|
670
|
-
const base = scoreFromLatency(result.latencyMs);
|
|
671
|
-
if (result.upstreamStatus === "degraded") {
|
|
672
|
-
return Math.min(base, 40);
|
|
673
|
-
}
|
|
674
|
-
return base;
|
|
675
|
-
}
|