@tokenbuddy/tokenbuddy 1.0.12 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +61 -0
- package/dist/src/buyer-store.d.ts.map +1 -1
- package/dist/src/buyer-store.js +12 -0
- package/dist/src/buyer-store.js.map +1 -1
- package/dist/src/cli.d.ts +47 -0
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +287 -63
- package/dist/src/cli.js.map +1 -1
- package/dist/src/credit-tracker.d.ts +26 -0
- package/dist/src/credit-tracker.d.ts.map +1 -1
- package/dist/src/credit-tracker.js +8 -0
- package/dist/src/credit-tracker.js.map +1 -1
- package/dist/src/daemon.d.ts +29 -3
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +292 -65
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/doctor-clawtip-wallet.d.ts +25 -0
- package/dist/src/doctor-clawtip-wallet.d.ts.map +1 -1
- package/dist/src/doctor-clawtip-wallet.js +13 -0
- package/dist/src/doctor-clawtip-wallet.js.map +1 -1
- package/dist/src/doctor-diagnostics.d.ts +63 -0
- package/dist/src/doctor-diagnostics.d.ts.map +1 -1
- package/dist/src/doctor-diagnostics.js +39 -1
- package/dist/src/doctor-diagnostics.js.map +1 -1
- package/dist/src/index.d.ts +4 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +4 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/init-clawtip-activation.d.ts +103 -0
- package/dist/src/init-clawtip-activation.d.ts.map +1 -1
- package/dist/src/init-clawtip-activation.js +60 -0
- package/dist/src/init-clawtip-activation.js.map +1 -1
- package/dist/src/init-payment-options.d.ts +124 -0
- package/dist/src/init-payment-options.d.ts.map +1 -1
- package/dist/src/init-payment-options.js +68 -0
- package/dist/src/init-payment-options.js.map +1 -1
- package/dist/src/model-index.d.ts +9 -0
- package/dist/src/model-index.d.ts.map +1 -1
- package/dist/src/model-index.js.map +1 -1
- package/dist/src/prewarm-cache.d.ts +89 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -1
- package/dist/src/prewarm-cache.js +14 -1
- package/dist/src/prewarm-cache.js.map +1 -1
- package/dist/src/prewarm-scheduler.d.ts +62 -3
- package/dist/src/prewarm-scheduler.d.ts.map +1 -1
- package/dist/src/prewarm-scheduler.js +39 -8
- package/dist/src/prewarm-scheduler.js.map +1 -1
- package/dist/src/provider-install.d.ts +89 -3
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +77 -19
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/route-failover.d.ts +48 -0
- package/dist/src/route-failover.d.ts.map +1 -1
- package/dist/src/route-failover.js.map +1 -1
- package/dist/src/seller-catalog.d.ts +158 -10
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js +79 -5
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-metadata-cache.d.ts +29 -0
- package/dist/src/seller-metadata-cache.d.ts.map +1 -0
- package/dist/src/seller-metadata-cache.js +71 -0
- package/dist/src/seller-metadata-cache.js.map +1 -0
- package/dist/src/seller-pool.d.ts +71 -0
- package/dist/src/seller-pool.d.ts.map +1 -1
- package/dist/src/seller-pool.js +6 -1
- package/dist/src/seller-pool.js.map +1 -1
- package/dist/src/seller-route-planner.d.ts +118 -0
- package/dist/src/seller-route-planner.d.ts.map +1 -0
- package/dist/src/seller-route-planner.js +160 -0
- package/dist/src/seller-route-planner.js.map +1 -0
- package/dist/src/seller-routing-config.d.ts +69 -0
- package/dist/src/seller-routing-config.d.ts.map +1 -0
- package/dist/src/seller-routing-config.js +164 -0
- package/dist/src/seller-routing-config.js.map +1 -0
- package/dist/src/seller-routing-strategy.d.ts +118 -0
- package/dist/src/seller-routing-strategy.d.ts.map +1 -0
- package/dist/src/seller-routing-strategy.js +183 -0
- package/dist/src/seller-routing-strategy.js.map +1 -0
- package/dist/src/stream-failover.d.ts +23 -0
- package/dist/src/stream-failover.d.ts.map +1 -1
- package/dist/src/stream-failover.js +4 -0
- package/dist/src/stream-failover.js.map +1 -1
- package/dist/src/tb-proxyd.js +7 -21
- package/dist/src/tb-proxyd.js.map +1 -1
- package/dist/src/terminal-detect.d.ts +51 -0
- package/dist/src/terminal-detect.d.ts.map +1 -1
- package/dist/src/terminal-detect.js +42 -0
- package/dist/src/terminal-detect.js.map +1 -1
- package/dist/src/terminal-image.d.ts +41 -0
- package/dist/src/terminal-image.d.ts.map +1 -1
- package/dist/src/terminal-image.js +15 -0
- package/dist/src/terminal-image.js.map +1 -1
- package/package.json +1 -1
- package/src/buyer-store.ts +61 -0
- package/src/cli.ts +330 -68
- package/src/credit-tracker.ts +26 -0
- package/src/daemon.ts +363 -72
- package/src/doctor-clawtip-wallet.ts +25 -0
- package/src/doctor-diagnostics.ts +63 -1
- package/src/index.ts +4 -0
- package/src/init-clawtip-activation.ts +103 -0
- package/src/init-payment-options.ts +124 -0
- package/src/model-index.ts +9 -0
- package/src/prewarm-cache.ts +99 -1
- package/src/prewarm-scheduler.ts +97 -12
- package/src/provider-install.ts +125 -27
- package/src/route-failover.ts +48 -0
- package/src/seller-catalog.ts +158 -12
- package/src/seller-metadata-cache.ts +91 -0
- package/src/seller-pool.ts +77 -1
- package/src/seller-route-planner.ts +323 -0
- package/src/seller-routing-config.ts +198 -0
- package/src/seller-routing-strategy.ts +316 -0
- package/src/stream-failover.ts +23 -0
- package/src/tb-proxyd.ts +7 -23
- package/src/terminal-detect.ts +51 -0
- package/src/terminal-image.ts +41 -0
- package/tests/cli-routing.test.ts +287 -0
- package/tests/daemon-classify.test.ts +431 -0
- package/tests/daemon-roles.test.ts +92 -0
- package/tests/seller-catalog-utilities.test.ts +70 -0
- package/tests/seller-metadata-cache.test.ts +89 -0
- package/tests/seller-route-planner.test.ts +150 -0
- package/tests/seller-routing-config.test.ts +111 -0
- package/tests/seller-routing-strategy.test.ts +166 -0
- package/tests/tokenbuddy.test.ts +446 -34
- /package/{src → tests}/credit-tracker.test.ts +0 -0
- /package/{src → tests}/model-index.test.ts +0 -0
- /package/{src → tests}/prewarm-cache.test.ts +0 -0
- /package/{src → tests}/prewarm-scheduler.test.ts +0 -0
- /package/{src → tests}/route-failover.test.ts +0 -0
- /package/{src → tests}/seller-catalog-413.test.ts +0 -0
- /package/{src → tests}/seller-pool.test.ts +0 -0
- /package/{src → tests}/stream-failover.test.ts +0 -0
- /package/{src → tests}/thousand-seller.test.ts +0 -0
package/src/prewarm-cache.ts
CHANGED
|
@@ -10,36 +10,96 @@ const logger = createModuleLogger("tb-proxyd:prewarm-cache");
|
|
|
10
10
|
*/
|
|
11
11
|
export const DEFAULT_PREWARM_TTL_MS = 10 * 60 * 1000;
|
|
12
12
|
|
|
13
|
+
/**
|
|
14
|
+
* 缓存条目的状态机。
|
|
15
|
+
* - `warming`:调度中,候选尚未稳定
|
|
16
|
+
* - `warm`:上次 commit 成功且 TTL 内
|
|
17
|
+
* - `stale`:TTL 过期或连续 commit 失败
|
|
18
|
+
* - `empty`:commit 返回 0 候选(该 (model, protocol, payment) 在当前 registry 下无 seller)
|
|
19
|
+
*/
|
|
13
20
|
export type PrewarmState = "warming" | "warm" | "stale" | "empty";
|
|
14
21
|
|
|
22
|
+
/**
|
|
23
|
+
* 单个 seller 在某次 prewarm commit 后的健康画像。
|
|
24
|
+
* 由 `PrewarmCache.commitWarm()` 归一化字段(score 0-100、latency 非负)后写入。
|
|
25
|
+
*/
|
|
15
26
|
export interface PrewarmCandidate {
|
|
27
|
+
/** seller 全局 ID */
|
|
16
28
|
sellerId: string;
|
|
29
|
+
/** 去掉尾部斜杠后的 seller URL */
|
|
17
30
|
url: string;
|
|
31
|
+
/** 综合健康分,0-100,0 表示彻底坏 */
|
|
18
32
|
healthScore: number; // 0-100
|
|
33
|
+
/** 上一次成功的 unix 毫秒时间戳;0 表示尚无成功 */
|
|
19
34
|
lastSuccessAt: number;
|
|
35
|
+
/** 上一次失败的 unix 毫秒时间戳;0 表示尚无失败 */
|
|
20
36
|
lastFailAt: number;
|
|
37
|
+
/** 平均延迟(毫秒),用于排序时的回退指标 */
|
|
21
38
|
avgLatencyMs: number;
|
|
39
|
+
/** health probe 的延迟(毫秒),可选 */
|
|
40
|
+
healthProbeLatencyMs?: number;
|
|
41
|
+
/** 首 token 延迟(毫秒),可选;speed 排序的优先指标 */
|
|
42
|
+
ttftMs?: number;
|
|
43
|
+
/** 平均推理延迟(毫秒),可选 */
|
|
44
|
+
avgInferenceMs?: number;
|
|
45
|
+
/** 上游状态(与 seller 上报的语义对齐) */
|
|
46
|
+
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
47
|
+
/** 上游错误类名(HTTP status / 错误码),仅在失败时存在 */
|
|
48
|
+
upstreamErrorClass?: string;
|
|
22
49
|
}
|
|
23
50
|
|
|
51
|
+
/**
|
|
52
|
+
* 缓存条目:以 `(modelId, protocol, paymentMethod)` 为键,存储一组候选 seller 的健康画像。
|
|
53
|
+
* `warmedAt` 是 TTL 的起点;`consecutiveWarmingFailures` 触发指数退避。
|
|
54
|
+
*/
|
|
24
55
|
export interface PrewarmEntry {
|
|
56
|
+
/** 模型 ID(已归一化) */
|
|
25
57
|
modelId: string;
|
|
58
|
+
/** 协议名(已归一化) */
|
|
26
59
|
protocol: string;
|
|
60
|
+
/** 支付方式(已归一化) */
|
|
27
61
|
paymentMethod: string;
|
|
62
|
+
/** 当前条目状态 */
|
|
28
63
|
state: PrewarmState;
|
|
64
|
+
/** 该 (model, protocol, payment) 命中的候选 seller 列表 */
|
|
29
65
|
candidates: PrewarmCandidate[];
|
|
66
|
+
/** 本次成功的 commit 时间戳,TTL 起点 */
|
|
30
67
|
warmedAt: number;
|
|
68
|
+
/** 本条目的 TTL(毫秒),commit 时可被显式覆盖 */
|
|
31
69
|
ttlMs: number;
|
|
70
|
+
/** 连续 warming 失败次数;医生面板据此判断"长期坏" */
|
|
32
71
|
consecutiveWarmingFailures: number;
|
|
72
|
+
/** 最近一次进入 warming 的时间戳,调试用 */
|
|
33
73
|
lastInFlightAt?: number;
|
|
34
74
|
}
|
|
35
75
|
|
|
76
|
+
/**
|
|
77
|
+
* 调度器在 commit 时传入的"原始"候选数据:所有字段都可空,
|
|
78
|
+
* 由 `toCandidate()` 做归一化(score clamp、latency 截负等)。
|
|
79
|
+
*/
|
|
36
80
|
export interface PrewarmCandidateInput {
|
|
81
|
+
/** seller ID */
|
|
37
82
|
sellerId: string;
|
|
83
|
+
/** seller URL */
|
|
38
84
|
url: string;
|
|
85
|
+
/** 健康分(可选;缺省在归一化时落到 50) */
|
|
39
86
|
healthScore?: number;
|
|
87
|
+
/** 上次成功时间戳(毫秒),可选 */
|
|
40
88
|
lastSuccessAt?: number;
|
|
89
|
+
/** 上次失败时间戳(毫秒),可选 */
|
|
41
90
|
lastFailAt?: number;
|
|
91
|
+
/** 平均延迟(毫秒),可选 */
|
|
42
92
|
avgLatencyMs?: number;
|
|
93
|
+
/** health probe 延迟(毫秒),可选 */
|
|
94
|
+
healthProbeLatencyMs?: number;
|
|
95
|
+
/** TTFT(毫秒),可选 */
|
|
96
|
+
ttftMs?: number;
|
|
97
|
+
/** 平均推理延迟(毫秒),可选 */
|
|
98
|
+
avgInferenceMs?: number;
|
|
99
|
+
/** 上游状态,可选 */
|
|
100
|
+
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
101
|
+
/** 上游错误类名,可选 */
|
|
102
|
+
upstreamErrorClass?: string;
|
|
43
103
|
}
|
|
44
104
|
|
|
45
105
|
/**
|
|
@@ -69,6 +129,11 @@ interface PrewarmCacheOptions {
|
|
|
69
129
|
now?: () => number;
|
|
70
130
|
}
|
|
71
131
|
|
|
132
|
+
/**
|
|
133
|
+
* 进程内的 (model, protocol, payment) → 候选 seller 健康画像缓存。
|
|
134
|
+
* 单线程访问(Node JS 主线程),无内部锁;`commitWarm` 是写路径,
|
|
135
|
+
* `get/freshness` 是热路径读,TTL 由 `warmedAt + ttlMs` 决定。
|
|
136
|
+
*/
|
|
72
137
|
export class PrewarmCache {
|
|
73
138
|
private readonly entries = new Map<string, PrewarmEntry>();
|
|
74
139
|
private readonly defaultTtlMs: number;
|
|
@@ -327,24 +392,48 @@ export class PrewarmCache {
|
|
|
327
392
|
}
|
|
328
393
|
}
|
|
329
394
|
|
|
395
|
+
/**
|
|
396
|
+
* `PrewarmCache.freshness()` 的返回:一次"是否还能信任现有 prewarm"的快速判断。
|
|
397
|
+
* 在每条 inference 请求的热路径上使用,避免直接遍历缓存。
|
|
398
|
+
*/
|
|
330
399
|
export interface PrewarmFreshness {
|
|
400
|
+
/** 是否存在对应条目(false 等价于 cold path) */
|
|
331
401
|
present: boolean;
|
|
402
|
+
/** 当前是否已超过 TTL */
|
|
332
403
|
expired: boolean;
|
|
404
|
+
/** TTL 剩余 ≤ 10% 时视为"即将过期",调度器用此触发 idle 预热 */
|
|
333
405
|
expiringSoon: boolean;
|
|
406
|
+
/** 剩余 TTL(毫秒),过期时省略 */
|
|
334
407
|
remainingMs?: number;
|
|
408
|
+
/** 条目当前状态(过期时强制为 `stale`) */
|
|
335
409
|
state: PrewarmState;
|
|
410
|
+
/** 关联的缓存条目(如果有) */
|
|
336
411
|
entry?: PrewarmEntry;
|
|
337
412
|
}
|
|
338
413
|
|
|
414
|
+
/**
|
|
415
|
+
* `PrewarmCache.beginWarming()` 的返回值:标记一次预热进入 in-flight。
|
|
416
|
+
* `hadPrevious` 用于上层决定是否在 `warming` 期间对外暴露旧候选。
|
|
417
|
+
*/
|
|
339
418
|
export interface PrewarmBeginResult {
|
|
419
|
+
/** 缓存键(与 `prewarmKey()` 的输出等价) */
|
|
340
420
|
key: string;
|
|
421
|
+
/** 当前写回缓存的 entry(state=`warming`) */
|
|
341
422
|
entry: PrewarmEntry;
|
|
423
|
+
/** 调用前缓存里是否已有 entry,用于上层做"保留旧候选"判断 */
|
|
342
424
|
hadPrevious: boolean;
|
|
343
425
|
}
|
|
344
426
|
|
|
427
|
+
/**
|
|
428
|
+
* `PrewarmCache.commitWarm()` 的返回值:成功提交一次候选集合。
|
|
429
|
+
* `replacedSellers` 用于上层做候选 churn 检测。
|
|
430
|
+
*/
|
|
345
431
|
export interface PrewarmCommitResult {
|
|
432
|
+
/** 缓存键 */
|
|
346
433
|
key: string;
|
|
434
|
+
/** 提交后的新 entry(state=`warm` 或 `empty`) */
|
|
347
435
|
entry: PrewarmEntry;
|
|
436
|
+
/** 上一版 entry 的候选 seller 列表(已不再缓存的 seller ID) */
|
|
348
437
|
replacedSellers: string[];
|
|
349
438
|
}
|
|
350
439
|
|
|
@@ -355,10 +444,19 @@ function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
|
|
|
355
444
|
healthScore: clampScore(input.healthScore ?? 50),
|
|
356
445
|
lastSuccessAt: input.lastSuccessAt ?? 0,
|
|
357
446
|
lastFailAt: input.lastFailAt ?? 0,
|
|
358
|
-
avgLatencyMs: Math.max(0, input.avgLatencyMs ?? 0)
|
|
447
|
+
avgLatencyMs: Math.max(0, input.avgLatencyMs ?? 0),
|
|
448
|
+
healthProbeLatencyMs: finiteNonNegative(input.healthProbeLatencyMs),
|
|
449
|
+
ttftMs: finiteNonNegative(input.ttftMs),
|
|
450
|
+
avgInferenceMs: finiteNonNegative(input.avgInferenceMs),
|
|
451
|
+
upstreamStatus: input.upstreamStatus,
|
|
452
|
+
upstreamErrorClass: input.upstreamErrorClass
|
|
359
453
|
};
|
|
360
454
|
}
|
|
361
455
|
|
|
456
|
+
function finiteNonNegative(value: number | undefined): number | undefined {
|
|
457
|
+
return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
|
|
458
|
+
}
|
|
459
|
+
|
|
362
460
|
function clampScore(score: number): number {
|
|
363
461
|
if (!Number.isFinite(score)) {
|
|
364
462
|
return 50;
|
package/src/prewarm-scheduler.ts
CHANGED
|
@@ -5,45 +5,87 @@ import type { PrewarmCache, PrewarmCandidate } from "./prewarm-cache.js";
|
|
|
5
5
|
|
|
6
6
|
const logger = createModuleLogger("tb-proxyd:prewarm-scheduler");
|
|
7
7
|
|
|
8
|
+
/**
|
|
9
|
+
* 触发 prewarm 的原因,用于调度器决定并发与日志分组。
|
|
10
|
+
* - `startup`:daemon 启动时的批量预热,会受 startup jitter 影响
|
|
11
|
+
* - `lazy`:用户首次请求某 (model, protocol, payment) 时触发的预热
|
|
12
|
+
* - `idle`:后台 idle tick 触发的刷新
|
|
13
|
+
* - `explicit`:`tb doctor --prewarm` 之类的显式触发
|
|
14
|
+
*/
|
|
8
15
|
export type PrewarmReason = "startup" | "lazy" | "idle" | "explicit";
|
|
9
16
|
|
|
17
|
+
/**
|
|
18
|
+
* 单次 health probe 的结果,由 `SellerProber` 返回。
|
|
19
|
+
* 调度器会基于 `ok` + `latencyMs` 推算 `healthScore`,并把 `upstreamStatus` 直接透传。
|
|
20
|
+
*/
|
|
10
21
|
export interface ProbeResult {
|
|
22
|
+
/** probe 是否成功(HTTP 2xx 且语义上代表"健康") */
|
|
11
23
|
ok: boolean;
|
|
24
|
+
/** probe 总耗时(毫秒),用于 healthScore 计算 */
|
|
12
25
|
latencyMs: number;
|
|
26
|
+
/** HTTP status(如果 prober 能拿到) */
|
|
13
27
|
httpStatus?: number;
|
|
28
|
+
/** 错误描述(仅当 `ok=false` 时存在,不携带敏感字段) */
|
|
14
29
|
errorMessage?: string;
|
|
30
|
+
/** 上游报告的状态,与 health probe 端点或 fallback 推断的语义对齐 */
|
|
31
|
+
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
32
|
+
/** 上游错误类名(status code / error code) */
|
|
33
|
+
upstreamErrorClass?: string;
|
|
34
|
+
/** 首 token 延迟(毫秒),可选;speed 排序时优先使用 */
|
|
35
|
+
ttftMs?: number;
|
|
36
|
+
/** 平均推理延迟(毫秒),可选 */
|
|
37
|
+
avgInferenceMs?: number;
|
|
15
38
|
}
|
|
16
39
|
|
|
17
40
|
/**
|
|
18
41
|
* The probe function used by the scheduler. Decoupled so the scheduler can
|
|
19
42
|
* be unit-tested without spinning up HTTP servers. The default
|
|
20
|
-
* implementation in `
|
|
21
|
-
*
|
|
43
|
+
* implementation in `daemon.ts` calls `GET <seller.url>/health` with a 3s
|
|
44
|
+
* timeout. Probers must
|
|
22
45
|
* observe the provided `AbortSignal` and reject when it aborts so the
|
|
23
46
|
* scheduler can short-circuit in-flight probes on `stop()`.
|
|
24
47
|
*/
|
|
25
48
|
export type SellerProber = (seller: RegistrySeller, signal: AbortSignal) => Promise<ProbeResult>;
|
|
26
49
|
|
|
50
|
+
/**
|
|
51
|
+
* 构造 `PrewarmScheduler` 所需的依赖与可调参数。默认值见设计文档
|
|
52
|
+
* buyer-driven-fallback-design.md §18.5-§18.6:并发 4、per-seller 30s、
|
|
53
|
+
* 全局 30/min、startup jitter 5-10s。
|
|
54
|
+
*/
|
|
27
55
|
export interface PrewarmSchedulerOptions {
|
|
56
|
+
/** 共享的 model index,用于把 modelId 解析为 seller 列表 */
|
|
28
57
|
modelIndex: ModelIndex;
|
|
58
|
+
/** 共享的 prewarm 缓存,调度器写入并由控制器读取 */
|
|
29
59
|
cache: PrewarmCache;
|
|
60
|
+
/** 注入的 health prober,调度器不直接发 HTTP */
|
|
30
61
|
prober: SellerProber;
|
|
31
62
|
// Limits (defaults match buyer-driven-fallback-design.md §18.6).
|
|
63
|
+
/** 并发上限,默认 4 */
|
|
32
64
|
concurrency?: number;
|
|
65
|
+
/** 同一 seller 两次 probe 之间的最小间隔(毫秒),默认 30000 */
|
|
33
66
|
perSellerMinIntervalMs?: number;
|
|
67
|
+
/** 每分钟最多 probe 多少次(全局节流),默认 30 */
|
|
34
68
|
maxPrewarmPerMinute?: number;
|
|
35
69
|
// Idle loop cadence; the scheduler can also be driven externally
|
|
36
70
|
// (PR-2.1 wires `tickIdle` into the existing registry-loop heartbeat).
|
|
71
|
+
/** idle 循环的间隔(毫秒),默认 60000 */
|
|
37
72
|
idleIntervalMs?: number;
|
|
38
73
|
// Startup jitter (5-10s by default per §18.5.1).
|
|
74
|
+
/** startup 抖动的下界(毫秒),默认 5000 */
|
|
39
75
|
startupJitterMinMs?: number;
|
|
76
|
+
/** startup 抖动的上界(毫秒),默认 10000 */
|
|
40
77
|
startupJitterMaxMs?: number;
|
|
41
78
|
// Hooks for testing; defaults to Node's setTimeout / setImmediate.
|
|
79
|
+
/** 可注入的 sleep(支持 abort),默认 Node setTimeout */
|
|
42
80
|
sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;
|
|
81
|
+
/** 可注入的随机源,默认 `Math.random` */
|
|
43
82
|
random?: () => number;
|
|
83
|
+
/** 可注入的时钟,默认 `Date.now` */
|
|
44
84
|
now?: () => number;
|
|
45
85
|
// Optional filter applied to every probe (e.g. preferred protocol).
|
|
86
|
+
/** 全局默认协议 filter,可被 `schedulePrewarm` 覆盖 */
|
|
46
87
|
protocol?: string;
|
|
88
|
+
/** 全局默认支付方式 filter,可被 `schedulePrewarm` 覆盖 */
|
|
47
89
|
paymentMethod?: string;
|
|
48
90
|
}
|
|
49
91
|
|
|
@@ -61,15 +103,28 @@ interface PrewarmTask {
|
|
|
61
103
|
errorMessage?: string;
|
|
62
104
|
}
|
|
63
105
|
|
|
106
|
+
/**
|
|
107
|
+
* `PrewarmScheduler.stats()` 的返回:调度器当前的运行指标快照。
|
|
108
|
+
* `tb doctor` 据此判断调度是否过载、是否长期被限流。
|
|
109
|
+
*/
|
|
64
110
|
export interface PrewarmSchedulerStats {
|
|
111
|
+
/** 当前队列里尚未开始的 task 数 */
|
|
65
112
|
queueDepth: number;
|
|
113
|
+
/** 正在 probe 的 task 数 */
|
|
66
114
|
inFlight: number;
|
|
115
|
+
/** 累计入队的 task 数(包含 rate_limited) */
|
|
67
116
|
totalScheduled: number;
|
|
117
|
+
/** 累计成功的 task 数 */
|
|
68
118
|
totalSucceeded: number;
|
|
119
|
+
/** 累计失败的 task 数(所有候选 probe 都失败) */
|
|
69
120
|
totalFailed: number;
|
|
121
|
+
/** 累计因全局节流被跳过的 task 数 */
|
|
70
122
|
totalRateLimited: number;
|
|
123
|
+
/** 最近 60 秒内发起的 probe 总数(用于判断是否撞到 maxPrewarmPerMinute) */
|
|
71
124
|
recentProbesInLastMinute: number;
|
|
125
|
+
/** 当前配置的并发上限 */
|
|
72
126
|
concurrency: number;
|
|
127
|
+
/** 当前配置的每分钟 probe 上限 */
|
|
73
128
|
maxPrewarmPerMinute: number;
|
|
74
129
|
}
|
|
75
130
|
|
|
@@ -231,14 +286,15 @@ export class PrewarmScheduler {
|
|
|
231
286
|
* the configured jitter window. Resolves once every scheduled task has
|
|
232
287
|
* reached a terminal state.
|
|
233
288
|
*/
|
|
234
|
-
async runStartupPrewarm(
|
|
289
|
+
async runStartupPrewarm(inputs: Array<string | { modelId: string; protocol?: string; paymentMethod?: string }>): Promise<void> {
|
|
235
290
|
await this.sleep(this.jitterMs(), this.abortController?.signal);
|
|
236
291
|
if (this.abortController?.signal.aborted) {
|
|
237
292
|
return;
|
|
238
293
|
}
|
|
239
|
-
const tasks =
|
|
240
|
-
|
|
241
|
-
|
|
294
|
+
const tasks = inputs.map((input) => {
|
|
295
|
+
const task = typeof input === "string" ? { modelId: input } : input;
|
|
296
|
+
return this.schedulePrewarm({ ...task, reason: "startup" });
|
|
297
|
+
});
|
|
242
298
|
await Promise.all(tasks);
|
|
243
299
|
}
|
|
244
300
|
|
|
@@ -429,21 +485,32 @@ export class PrewarmScheduler {
|
|
|
429
485
|
}
|
|
430
486
|
this.lastProbeAtBySeller.set(seller.id, this.now());
|
|
431
487
|
if (result.ok) {
|
|
432
|
-
|
|
488
|
+
const healthScore = scoreProbeResult(result);
|
|
489
|
+
anyOk = anyOk || healthScore > 0;
|
|
433
490
|
candidates.push({
|
|
434
491
|
sellerId: seller.id,
|
|
435
492
|
url: seller.url,
|
|
436
|
-
healthScore
|
|
493
|
+
healthScore,
|
|
437
494
|
lastSuccessAt: this.now(),
|
|
438
495
|
lastFailAt: 0,
|
|
439
|
-
avgLatencyMs: result.latencyMs
|
|
496
|
+
avgLatencyMs: result.latencyMs,
|
|
497
|
+
healthProbeLatencyMs: result.latencyMs,
|
|
498
|
+
ttftMs: result.ttftMs,
|
|
499
|
+
avgInferenceMs: result.avgInferenceMs,
|
|
500
|
+
upstreamStatus: result.upstreamStatus,
|
|
501
|
+
upstreamErrorClass: result.upstreamErrorClass
|
|
440
502
|
});
|
|
441
503
|
logger.info("prewarm.succeeded", "seller probe succeeded", {
|
|
442
504
|
taskId: task.id,
|
|
443
505
|
sellerId: seller.id,
|
|
444
506
|
modelId: task.modelId,
|
|
445
507
|
latencyMs: result.latencyMs,
|
|
446
|
-
httpStatus: result.httpStatus
|
|
508
|
+
httpStatus: result.httpStatus,
|
|
509
|
+
healthScore,
|
|
510
|
+
upstreamStatus: result.upstreamStatus,
|
|
511
|
+
upstreamErrorClass: result.upstreamErrorClass,
|
|
512
|
+
ttftMs: result.ttftMs,
|
|
513
|
+
avgInferenceMs: result.avgInferenceMs
|
|
447
514
|
});
|
|
448
515
|
} else {
|
|
449
516
|
candidates.push({
|
|
@@ -452,14 +519,21 @@ export class PrewarmScheduler {
|
|
|
452
519
|
healthScore: 0,
|
|
453
520
|
lastSuccessAt: 0,
|
|
454
521
|
lastFailAt: this.now(),
|
|
455
|
-
avgLatencyMs: result.latencyMs
|
|
522
|
+
avgLatencyMs: result.latencyMs,
|
|
523
|
+
healthProbeLatencyMs: result.latencyMs,
|
|
524
|
+
ttftMs: result.ttftMs,
|
|
525
|
+
avgInferenceMs: result.avgInferenceMs,
|
|
526
|
+
upstreamStatus: result.upstreamStatus,
|
|
527
|
+
upstreamErrorClass: result.upstreamErrorClass
|
|
456
528
|
});
|
|
457
529
|
logger.warn("prewarm.failed", "seller probe failed", {
|
|
458
530
|
taskId: task.id,
|
|
459
531
|
sellerId: seller.id,
|
|
460
532
|
modelId: task.modelId,
|
|
461
533
|
errorMessage: result.errorMessage,
|
|
462
|
-
httpStatus: result.httpStatus
|
|
534
|
+
httpStatus: result.httpStatus,
|
|
535
|
+
upstreamStatus: result.upstreamStatus,
|
|
536
|
+
upstreamErrorClass: result.upstreamErrorClass
|
|
463
537
|
});
|
|
464
538
|
}
|
|
465
539
|
}
|
|
@@ -579,3 +653,14 @@ function scoreFromLatency(latencyMs: number): number {
|
|
|
579
653
|
if (latencyMs <= 3000) return 40;
|
|
580
654
|
return 20;
|
|
581
655
|
}
|
|
656
|
+
|
|
657
|
+
function scoreProbeResult(result: ProbeResult): number {
|
|
658
|
+
if (result.upstreamStatus === "unhealthy") {
|
|
659
|
+
return 0;
|
|
660
|
+
}
|
|
661
|
+
const base = scoreFromLatency(result.latencyMs);
|
|
662
|
+
if (result.upstreamStatus === "degraded") {
|
|
663
|
+
return Math.min(base, 40);
|
|
664
|
+
}
|
|
665
|
+
return base;
|
|
666
|
+
}
|