@tokenbuddy/tokenbuddy 1.0.35 → 1.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +6 -1
- package/dist/src/buyer-store.js +43 -4
- package/dist/src/cli.js +2 -2
- package/dist/src/daemon.d.ts +12 -0
- package/dist/src/daemon.js +791 -61
- package/dist/src/doctor-diagnostics.js +1 -6
- package/dist/src/provider-install.d.ts +2 -2
- package/dist/src/provider-install.js +248 -2
- package/dist/src/seller-catalog.d.ts +21 -0
- package/dist/src/seller-catalog.js +17 -0
- package/dist/src/seller-route-planner.d.ts +4 -1
- package/dist/src/seller-route-planner.js +3 -0
- package/dist/src/seller-routing-strategy.d.ts +3 -0
- package/dist/src/terminal-detect.d.ts +1 -1
- package/dist/src/terminal-detect.js +3 -2
- package/package.json +15 -2
- package/static/ui/assets/index-Djfl9tw5.js +271 -0
- package/static/ui/assets/index-DkfztCkn.css +1 -0
- package/static/ui/index.html +2 -2
- package/dist/src/buyer-store.d.ts.map +0 -1
- package/dist/src/buyer-store.js.map +0 -1
- package/dist/src/clawtip-bootstrap.d.ts.map +0 -1
- package/dist/src/clawtip-bootstrap.js.map +0 -1
- package/dist/src/cli.d.ts.map +0 -1
- package/dist/src/cli.js.map +0 -1
- package/dist/src/credit-tracker.d.ts.map +0 -1
- package/dist/src/credit-tracker.js.map +0 -1
- package/dist/src/daemon.d.ts.map +0 -1
- package/dist/src/daemon.js.map +0 -1
- package/dist/src/doctor-clawtip-wallet.d.ts.map +0 -1
- package/dist/src/doctor-clawtip-wallet.js.map +0 -1
- package/dist/src/doctor-diagnostics.d.ts.map +0 -1
- package/dist/src/doctor-diagnostics.js.map +0 -1
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js.map +0 -1
- package/dist/src/init-clawtip-activation.d.ts.map +0 -1
- package/dist/src/init-clawtip-activation.js.map +0 -1
- package/dist/src/init-payment-options.d.ts.map +0 -1
- package/dist/src/init-payment-options.js.map +0 -1
- package/dist/src/init-setup.d.ts.map +0 -1
- package/dist/src/init-setup.js.map +0 -1
- package/dist/src/model-index.d.ts.map +0 -1
- package/dist/src/model-index.js.map +0 -1
- package/dist/src/package-update.d.ts.map +0 -1
- package/dist/src/package-update.js.map +0 -1
- package/dist/src/prewarm-cache.d.ts.map +0 -1
- package/dist/src/prewarm-cache.js.map +0 -1
- package/dist/src/prewarm-scheduler.d.ts.map +0 -1
- package/dist/src/prewarm-scheduler.js.map +0 -1
- package/dist/src/provider-install.d.ts.map +0 -1
- package/dist/src/provider-install.js.map +0 -1
- package/dist/src/provider-routing-config.d.ts.map +0 -1
- package/dist/src/provider-routing-config.js.map +0 -1
- package/dist/src/registry-trust.d.ts.map +0 -1
- package/dist/src/registry-trust.js.map +0 -1
- package/dist/src/route-failover.d.ts.map +0 -1
- package/dist/src/route-failover.js.map +0 -1
- package/dist/src/seller-catalog.d.ts.map +0 -1
- package/dist/src/seller-catalog.js.map +0 -1
- package/dist/src/seller-concurrency-limiter.d.ts.map +0 -1
- package/dist/src/seller-concurrency-limiter.js.map +0 -1
- package/dist/src/seller-metadata-cache.d.ts.map +0 -1
- package/dist/src/seller-metadata-cache.js.map +0 -1
- package/dist/src/seller-pool.d.ts.map +0 -1
- package/dist/src/seller-pool.js.map +0 -1
- package/dist/src/seller-route-planner.d.ts.map +0 -1
- package/dist/src/seller-route-planner.js.map +0 -1
- package/dist/src/seller-routing-config.d.ts.map +0 -1
- package/dist/src/seller-routing-config.js.map +0 -1
- package/dist/src/seller-routing-strategy.d.ts.map +0 -1
- package/dist/src/seller-routing-strategy.js.map +0 -1
- package/dist/src/stream-failover.d.ts.map +0 -1
- package/dist/src/stream-failover.js.map +0 -1
- package/dist/src/tb-clawtip-proof.d.ts.map +0 -1
- package/dist/src/tb-clawtip-proof.js.map +0 -1
- package/dist/src/tb-proxyd.d.ts.map +0 -1
- package/dist/src/tb-proxyd.js.map +0 -1
- package/dist/src/terminal-detect.d.ts.map +0 -1
- package/dist/src/terminal-detect.js.map +0 -1
- package/dist/src/terminal-image.d.ts.map +0 -1
- package/dist/src/terminal-image.js.map +0 -1
- package/src/buyer-store.ts +0 -1090
- package/src/clawtip-bootstrap.ts +0 -65
- package/src/cli.ts +0 -2243
- package/src/credit-tracker.ts +0 -295
- package/src/daemon.ts +0 -5475
- package/src/doctor-clawtip-wallet.ts +0 -95
- package/src/doctor-diagnostics.ts +0 -1026
- package/src/index.ts +0 -16
- package/src/init-clawtip-activation.ts +0 -695
- package/src/init-payment-options.ts +0 -373
- package/src/init-setup.ts +0 -165
- package/src/model-index.ts +0 -278
- package/src/package-update.ts +0 -311
- package/src/prewarm-cache.ts +0 -485
- package/src/prewarm-scheduler.ts +0 -675
- package/src/provider-install.ts +0 -1006
- package/src/provider-routing-config.ts +0 -410
- package/src/registry-trust.ts +0 -51
- package/src/route-failover.ts +0 -304
- package/src/seller-catalog.ts +0 -505
- package/src/seller-concurrency-limiter.ts +0 -161
- package/src/seller-metadata-cache.ts +0 -91
- package/src/seller-pool.ts +0 -557
- package/src/seller-route-planner.ts +0 -513
- package/src/seller-routing-config.ts +0 -211
- package/src/seller-routing-strategy.ts +0 -362
- package/src/stream-failover.ts +0 -152
- package/src/tb-clawtip-proof.ts +0 -28
- package/src/tb-proxyd.ts +0 -101
- package/src/terminal-detect.ts +0 -333
- package/src/terminal-image.ts +0 -228
- package/static/ui/assets/index-0MVXD7bH.css +0 -1
- package/static/ui/assets/index-BVbeDEwq.js +0 -271
- package/static/ui/assets/index-BVbeDEwq.js.map +0 -1
- package/tests/cli-routing.test.ts +0 -363
- package/tests/control-plane-ui-endpoints.test.ts +0 -1630
- package/tests/credit-tracker.test.ts +0 -165
- package/tests/daemon-413-fallback.test.ts +0 -92
- package/tests/daemon-classify.test.ts +0 -452
- package/tests/daemon-roles.test.ts +0 -92
- package/tests/daemon-trusted-registry-cache.test.ts +0 -132
- package/tests/e2e.test.ts +0 -366
- package/tests/image-generation-e2e.test.ts +0 -230
- package/tests/model-index.test.ts +0 -198
- package/tests/package-update.test.ts +0 -147
- package/tests/prewarm-cache.test.ts +0 -296
- package/tests/prewarm-scheduler.test.ts +0 -367
- package/tests/provider-routing-config.test.ts +0 -150
- package/tests/registry-trust.test.ts +0 -28
- package/tests/route-failover.test.ts +0 -222
- package/tests/seller-catalog-413.test.ts +0 -120
- package/tests/seller-catalog-utilities.test.ts +0 -124
- package/tests/seller-concurrency-limiter.test.ts +0 -83
- package/tests/seller-metadata-cache.test.ts +0 -89
- package/tests/seller-pool.test.ts +0 -365
- package/tests/seller-route-planner.test.ts +0 -312
- package/tests/seller-routing-config.test.ts +0 -124
- package/tests/seller-routing-strategy.test.ts +0 -167
- package/tests/stream-failover.test.ts +0 -52
- package/tests/thousand-seller.test.ts +0 -151
- package/tests/tokenbuddy.test.ts +0 -4043
- package/tsconfig.json +0 -8
package/src/prewarm-cache.ts
DELETED
|
@@ -1,485 +0,0 @@
|
|
|
1
|
-
import { createModuleLogger } from "@tokenbuddy/logging";
|
|
2
|
-
|
|
3
|
-
const logger = createModuleLogger("tb-proxyd:prewarm-cache");
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* Default TTL for a successfully warmed entry. 10 minutes is the v1.2 starting
|
|
7
|
-
* point; see buyer-driven-fallback-design.md §18.13 for the trade-off. The
|
|
8
|
-
* cache constructor accepts an override so tests and the future PR-E config
|
|
9
|
-
* loader can change this without re-architecting.
|
|
10
|
-
*/
|
|
11
|
-
export const DEFAULT_PREWARM_TTL_MS = 10 * 60 * 1000;
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* 缓存条目的状态机。
|
|
15
|
-
* - `warming`:调度中,候选尚未稳定
|
|
16
|
-
* - `warm`:上次 commit 成功且 TTL 内
|
|
17
|
-
* - `stale`:TTL 过期或连续 commit 失败
|
|
18
|
-
* - `empty`:commit 返回 0 候选(该 (model, protocol, payment) 在当前 registry 下无 seller)
|
|
19
|
-
*/
|
|
20
|
-
export type PrewarmState = "warming" | "warm" | "stale" | "empty";
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* 单个 seller 在某次 prewarm commit 后的健康画像。
|
|
24
|
-
* 由 `PrewarmCache.commitWarm()` 归一化字段(score 0-100、latency 非负)后写入。
|
|
25
|
-
*/
|
|
26
|
-
export interface PrewarmCandidate {
|
|
27
|
-
/** seller 全局 ID */
|
|
28
|
-
sellerId: string;
|
|
29
|
-
/** 去掉尾部斜杠后的 seller URL */
|
|
30
|
-
url: string;
|
|
31
|
-
/** 综合健康分,0-100,0 表示彻底坏 */
|
|
32
|
-
healthScore: number; // 0-100
|
|
33
|
-
/** 上一次成功的 unix 毫秒时间戳;0 表示尚无成功 */
|
|
34
|
-
lastSuccessAt: number;
|
|
35
|
-
/** 上一次失败的 unix 毫秒时间戳;0 表示尚无失败 */
|
|
36
|
-
lastFailAt: number;
|
|
37
|
-
/** 平均延迟(毫秒),用于排序时的回退指标 */
|
|
38
|
-
avgLatencyMs: number;
|
|
39
|
-
/** health probe 的延迟(毫秒),可选 */
|
|
40
|
-
healthProbeLatencyMs?: number;
|
|
41
|
-
/** 首 token 延迟(毫秒),可选;speed 排序的优先指标 */
|
|
42
|
-
ttftMs?: number;
|
|
43
|
-
/** 平均推理延迟(毫秒),可选 */
|
|
44
|
-
avgInferenceMs?: number;
|
|
45
|
-
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
46
|
-
avgTokensPerSecond?: number;
|
|
47
|
-
/** 上游状态(与 seller 上报的语义对齐) */
|
|
48
|
-
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
49
|
-
/** 上游错误类名(HTTP status / 错误码),仅在失败时存在 */
|
|
50
|
-
upstreamErrorClass?: string;
|
|
51
|
-
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
52
|
-
capacityBlockedUntil?: number;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* 缓存条目:以 `(modelId, protocol, paymentMethod)` 为键,存储一组候选 seller 的健康画像。
|
|
57
|
-
* `warmedAt` 是 TTL 的起点;`consecutiveWarmingFailures` 触发指数退避。
|
|
58
|
-
*/
|
|
59
|
-
export interface PrewarmEntry {
|
|
60
|
-
/** 模型 ID(已归一化) */
|
|
61
|
-
modelId: string;
|
|
62
|
-
/** 协议名(已归一化) */
|
|
63
|
-
protocol: string;
|
|
64
|
-
/** 支付方式(已归一化) */
|
|
65
|
-
paymentMethod: string;
|
|
66
|
-
/** 当前条目状态 */
|
|
67
|
-
state: PrewarmState;
|
|
68
|
-
/** 该 (model, protocol, payment) 命中的候选 seller 列表 */
|
|
69
|
-
candidates: PrewarmCandidate[];
|
|
70
|
-
/** 本次成功的 commit 时间戳,TTL 起点 */
|
|
71
|
-
warmedAt: number;
|
|
72
|
-
/** 本条目的 TTL(毫秒),commit 时可被显式覆盖 */
|
|
73
|
-
ttlMs: number;
|
|
74
|
-
/** 连续 warming 失败次数;医生面板据此判断"长期坏" */
|
|
75
|
-
consecutiveWarmingFailures: number;
|
|
76
|
-
/** 最近一次进入 warming 的时间戳,调试用 */
|
|
77
|
-
lastInFlightAt?: number;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
/**
|
|
81
|
-
* 调度器在 commit 时传入的"原始"候选数据:所有字段都可空,
|
|
82
|
-
* 由 `toCandidate()` 做归一化(score clamp、latency 截负等)。
|
|
83
|
-
*/
|
|
84
|
-
export interface PrewarmCandidateInput {
|
|
85
|
-
/** seller ID */
|
|
86
|
-
sellerId: string;
|
|
87
|
-
/** seller URL */
|
|
88
|
-
url: string;
|
|
89
|
-
/** 健康分(可选;缺省在归一化时落到 50) */
|
|
90
|
-
healthScore?: number;
|
|
91
|
-
/** 上次成功时间戳(毫秒),可选 */
|
|
92
|
-
lastSuccessAt?: number;
|
|
93
|
-
/** 上次失败时间戳(毫秒),可选 */
|
|
94
|
-
lastFailAt?: number;
|
|
95
|
-
/** 平均延迟(毫秒),可选 */
|
|
96
|
-
avgLatencyMs?: number;
|
|
97
|
-
/** health probe 延迟(毫秒),可选 */
|
|
98
|
-
healthProbeLatencyMs?: number;
|
|
99
|
-
/** TTFT(毫秒),可选 */
|
|
100
|
-
ttftMs?: number;
|
|
101
|
-
/** 平均推理延迟(毫秒),可选 */
|
|
102
|
-
avgInferenceMs?: number;
|
|
103
|
-
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
104
|
-
avgTokensPerSecond?: number;
|
|
105
|
-
/** 上游状态,可选 */
|
|
106
|
-
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
107
|
-
/** 上游错误类名,可选 */
|
|
108
|
-
upstreamErrorClass?: string;
|
|
109
|
-
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
110
|
-
capacityBlockedUntil?: number;
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* Build the cache key for a (model, protocol, payment) triple. The colon
|
|
115
|
-
* separator is reserved at the model-id level because `RegistrySeller.models`
|
|
116
|
-
* entries are trimmed but not colon-escaped. v1.2 forbids `:` inside model
|
|
117
|
-
* ids so this format is collision-free.
|
|
118
|
-
*/
|
|
119
|
-
export function prewarmKey(modelId: string, protocol: string, paymentMethod: string): string {
|
|
120
|
-
return `${modelId.trim().toLowerCase()}\u0001${protocol.trim().toLowerCase()}\u0001${paymentMethod.trim().toLowerCase()}`;
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
function parseKey(key: string): { modelId: string; protocol: string; paymentMethod: string } | undefined {
|
|
124
|
-
const parts = key.split("\u0001");
|
|
125
|
-
if (parts.length !== 3) {
|
|
126
|
-
return undefined;
|
|
127
|
-
}
|
|
128
|
-
const [modelId, protocol, paymentMethod] = parts;
|
|
129
|
-
if (!modelId || !protocol || !paymentMethod) {
|
|
130
|
-
return undefined;
|
|
131
|
-
}
|
|
132
|
-
return { modelId, protocol, paymentMethod };
|
|
133
|
-
}
|
|
134
|
-
|
|
135
|
-
interface PrewarmCacheOptions {
|
|
136
|
-
defaultTtlMs?: number;
|
|
137
|
-
now?: () => number;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
/**
|
|
141
|
-
* 进程内的 (model, protocol, payment) → 候选 seller 健康画像缓存。
|
|
142
|
-
* 单线程访问(Node JS 主线程),无内部锁;`commitWarm` 是写路径,
|
|
143
|
-
* `get/freshness` 是热路径读,TTL 由 `warmedAt + ttlMs` 决定。
|
|
144
|
-
*/
|
|
145
|
-
export class PrewarmCache {
|
|
146
|
-
private readonly entries = new Map<string, PrewarmEntry>();
|
|
147
|
-
private readonly defaultTtlMs: number;
|
|
148
|
-
private readonly now: () => number;
|
|
149
|
-
|
|
150
|
-
constructor(options: PrewarmCacheOptions = {}) {
|
|
151
|
-
this.defaultTtlMs = options.defaultTtlMs ?? DEFAULT_PREWARM_TTL_MS;
|
|
152
|
-
this.now = options.now ?? Date.now;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
/**
|
|
156
|
-
* Read an entry without mutating state. Returns `undefined` when the key is
|
|
157
|
-
* unknown; the caller decides whether "absent" should be treated as a miss
|
|
158
|
-
* (i.e. trigger a fresh prewarm) or as a known empty model.
|
|
159
|
-
*/
|
|
160
|
-
get(modelId: string, protocol: string, paymentMethod: string): PrewarmEntry | undefined {
|
|
161
|
-
return this.entries.get(prewarmKey(modelId, protocol, paymentMethod));
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
/**
|
|
165
|
-
* Look up an entry and return a `Freshness` descriptor. This is the cheap
|
|
166
|
-
* path used on every inference request to decide whether a prewarm is
|
|
167
|
-
* still authoritative, expiring soon, or already stale.
|
|
168
|
-
*/
|
|
169
|
-
freshness(modelId: string, protocol: string, paymentMethod: string): PrewarmFreshness {
|
|
170
|
-
const entry = this.get(modelId, protocol, paymentMethod);
|
|
171
|
-
if (!entry) {
|
|
172
|
-
return { present: false, expired: true, expiringSoon: true, state: "empty" };
|
|
173
|
-
}
|
|
174
|
-
const now = this.now();
|
|
175
|
-
const ageMs = now - entry.warmedAt;
|
|
176
|
-
const expired = ageMs >= entry.ttlMs;
|
|
177
|
-
const remainingMs = Math.max(0, entry.ttlMs - ageMs);
|
|
178
|
-
return {
|
|
179
|
-
present: true,
|
|
180
|
-
expired,
|
|
181
|
-
expiringSoon: !expired && remainingMs <= entry.ttlMs * 0.1,
|
|
182
|
-
remainingMs,
|
|
183
|
-
state: expired ? "stale" : entry.state,
|
|
184
|
-
entry
|
|
185
|
-
};
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
/**
|
|
189
|
-
* Mark a (model, protocol, payment) triple as currently being warmed. If an
|
|
190
|
-
* existing warm entry is present it is kept untouched (the new probe
|
|
191
|
-
* supersedes it on commit) and the previous state is reported to the
|
|
192
|
-
* caller via the returned descriptor.
|
|
193
|
-
*/
|
|
194
|
-
beginWarming(modelId: string, protocol: string, paymentMethod: string, ttlMs?: number): PrewarmBeginResult {
|
|
195
|
-
const key = prewarmKey(modelId, protocol, paymentMethod);
|
|
196
|
-
const previous = this.entries.get(key);
|
|
197
|
-
const now = this.now();
|
|
198
|
-
const entry: PrewarmEntry = {
|
|
199
|
-
modelId,
|
|
200
|
-
protocol,
|
|
201
|
-
paymentMethod,
|
|
202
|
-
state: "warming",
|
|
203
|
-
candidates: previous?.candidates ?? [],
|
|
204
|
-
warmedAt: previous?.warmedAt ?? now,
|
|
205
|
-
ttlMs: ttlMs ?? previous?.ttlMs ?? this.defaultTtlMs,
|
|
206
|
-
consecutiveWarmingFailures: previous?.consecutiveWarmingFailures ?? 0,
|
|
207
|
-
lastInFlightAt: now
|
|
208
|
-
};
|
|
209
|
-
this.entries.set(key, entry);
|
|
210
|
-
logger.debug("prewarm.cache.warming_started", "prewarm probe in flight", {
|
|
211
|
-
modelId,
|
|
212
|
-
protocol,
|
|
213
|
-
paymentMethod,
|
|
214
|
-
ttlMs: entry.ttlMs,
|
|
215
|
-
previousState: previous?.state
|
|
216
|
-
});
|
|
217
|
-
return { key, entry, hadPrevious: Boolean(previous) };
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
/**
|
|
221
|
-
* Commit a successful warm. The entry's `warmedAt` is reset to the current
|
|
222
|
-
* time so the TTL window starts fresh, and any prior stale candidates are
|
|
223
|
-
* replaced with the new probe results. The previous candidate set is
|
|
224
|
-
* returned for caller-side telemetry (e.g. detecting churn).
|
|
225
|
-
*/
|
|
226
|
-
commitWarm(input: {
|
|
227
|
-
modelId: string;
|
|
228
|
-
protocol: string;
|
|
229
|
-
paymentMethod: string;
|
|
230
|
-
candidates: PrewarmCandidateInput[];
|
|
231
|
-
ttlMs?: number;
|
|
232
|
-
}): PrewarmCommitResult {
|
|
233
|
-
const key = prewarmKey(input.modelId, input.protocol, input.paymentMethod);
|
|
234
|
-
const previous = this.entries.get(key);
|
|
235
|
-
const now = this.now();
|
|
236
|
-
const next: PrewarmEntry = {
|
|
237
|
-
modelId: input.modelId,
|
|
238
|
-
protocol: input.protocol,
|
|
239
|
-
paymentMethod: input.paymentMethod,
|
|
240
|
-
state: input.candidates.length > 0 ? "warm" : "empty",
|
|
241
|
-
candidates: input.candidates.map(toCandidate),
|
|
242
|
-
warmedAt: now,
|
|
243
|
-
ttlMs: input.ttlMs ?? previous?.ttlMs ?? this.defaultTtlMs,
|
|
244
|
-
consecutiveWarmingFailures: 0,
|
|
245
|
-
lastInFlightAt: now
|
|
246
|
-
};
|
|
247
|
-
this.entries.set(key, next);
|
|
248
|
-
|
|
249
|
-
if (input.candidates.length === 0) {
|
|
250
|
-
logger.warn("prewarm.cache.commit_empty", "prewarm commit returned no candidates", {
|
|
251
|
-
modelId: input.modelId,
|
|
252
|
-
protocol: input.protocol,
|
|
253
|
-
paymentMethod: input.paymentMethod
|
|
254
|
-
});
|
|
255
|
-
} else {
|
|
256
|
-
logger.info("prewarm.cache.committed", "prewarm commit updated candidates", {
|
|
257
|
-
modelId: input.modelId,
|
|
258
|
-
protocol: input.protocol,
|
|
259
|
-
paymentMethod: input.paymentMethod,
|
|
260
|
-
candidateCount: next.candidates.length,
|
|
261
|
-
ttlMs: next.ttlMs
|
|
262
|
-
});
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
return {
|
|
266
|
-
key,
|
|
267
|
-
entry: next,
|
|
268
|
-
replacedSellers: previous?.candidates.map((c) => c.sellerId) ?? []
|
|
269
|
-
};
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
/**
|
|
273
|
-
* Mark a warm as failed. Consecutive failures are tracked so the scheduler
|
|
274
|
-
* can apply exponential backoff and so `tb doctor` can surface persistently
|
|
275
|
-
* broken models.
|
|
276
|
-
*/
|
|
277
|
-
recordFailure(modelId: string, protocol: string, paymentMethod: string, errorMessage?: string): PrewarmEntry | undefined {
|
|
278
|
-
const key = prewarmKey(modelId, protocol, paymentMethod);
|
|
279
|
-
const previous = this.entries.get(key);
|
|
280
|
-
if (!previous) {
|
|
281
|
-
return undefined;
|
|
282
|
-
}
|
|
283
|
-
const next: PrewarmEntry = {
|
|
284
|
-
...previous,
|
|
285
|
-
state: "stale",
|
|
286
|
-
consecutiveWarmingFailures: previous.consecutiveWarmingFailures + 1,
|
|
287
|
-
lastInFlightAt: this.now()
|
|
288
|
-
};
|
|
289
|
-
this.entries.set(key, next);
|
|
290
|
-
logger.warn("prewarm.cache.failure_recorded", "prewarm commit failed; entry marked stale", {
|
|
291
|
-
modelId,
|
|
292
|
-
protocol,
|
|
293
|
-
paymentMethod,
|
|
294
|
-
consecutiveFailures: next.consecutiveWarmingFailures,
|
|
295
|
-
errorMessage
|
|
296
|
-
});
|
|
297
|
-
return next;
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
/**
|
|
301
|
-
* Invalidate every entry that references the given seller. Used when the
|
|
302
|
-
* registry signals a seller is gone (grace period expires) or when a hard
|
|
303
|
-
* failure (e.g. 5xx storm) should drop the seller from the cache
|
|
304
|
-
* immediately.
|
|
305
|
-
*/
|
|
306
|
-
invalidateSeller(sellerId: string): number {
|
|
307
|
-
let removed = 0;
|
|
308
|
-
for (const [key, entry] of this.entries.entries()) {
|
|
309
|
-
const filtered = entry.candidates.filter((candidate) => candidate.sellerId !== sellerId);
|
|
310
|
-
if (filtered.length !== entry.candidates.length) {
|
|
311
|
-
removed += 1;
|
|
312
|
-
this.entries.set(key, {
|
|
313
|
-
...entry,
|
|
314
|
-
candidates: filtered,
|
|
315
|
-
state: filtered.length > 0 ? entry.state : "empty"
|
|
316
|
-
});
|
|
317
|
-
}
|
|
318
|
-
}
|
|
319
|
-
if (removed > 0) {
|
|
320
|
-
logger.info("prewarm.cache.seller_invalidated", "seller dropped from all prewarm entries", {
|
|
321
|
-
sellerId,
|
|
322
|
-
entriesAffected: removed
|
|
323
|
-
});
|
|
324
|
-
}
|
|
325
|
-
return removed;
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
/**
|
|
329
|
-
* Invalidate a specific cache key. Used by `tb doctor --refresh <model>`
|
|
330
|
-
* and by the registry loop when a model is removed from the focus set.
|
|
331
|
-
*/
|
|
332
|
-
invalidateKey(modelId: string, protocol: string, paymentMethod: string): boolean {
|
|
333
|
-
return this.entries.delete(prewarmKey(modelId, protocol, paymentMethod));
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
/**
|
|
337
|
-
* Drop every entry whose TTL has expired. Returns the number of removed
|
|
338
|
-
* entries so the caller can log it.
|
|
339
|
-
*/
|
|
340
|
-
evictExpired(now: number = this.now()): number {
|
|
341
|
-
let removed = 0;
|
|
342
|
-
for (const [key, entry] of this.entries.entries()) {
|
|
343
|
-
if (now - entry.warmedAt >= entry.ttlMs) {
|
|
344
|
-
this.entries.delete(key);
|
|
345
|
-
removed += 1;
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
if (removed > 0) {
|
|
349
|
-
logger.info("prewarm.cache.evicted", "expired prewarm entries evicted", { removed });
|
|
350
|
-
}
|
|
351
|
-
return removed;
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
/**
|
|
355
|
-
* Returns `true` when the entry's TTL is within `withinMs` of expiring. The
|
|
356
|
-
* scheduler uses this to schedule idle-cycle prewarms just-in-time rather
|
|
357
|
-
* than at fixed wall-clock intervals.
|
|
358
|
-
*/
|
|
359
|
-
isExpiringSoon(modelId: string, protocol: string, paymentMethod: string, withinMs: number, now: number = this.now()): boolean {
|
|
360
|
-
const entry = this.get(modelId, protocol, paymentMethod);
|
|
361
|
-
if (!entry) {
|
|
362
|
-
return false;
|
|
363
|
-
}
|
|
364
|
-
const age = now - entry.warmedAt;
|
|
365
|
-
return age >= entry.ttlMs - withinMs && age < entry.ttlMs;
|
|
366
|
-
}
|
|
367
|
-
|
|
368
|
-
/**
|
|
369
|
-
* Snapshot all entries for diagnostics. Returns a deep-copy of the values
|
|
370
|
-
* so callers can serialize without risking mutation of cache state.
|
|
371
|
-
*/
|
|
372
|
-
snapshot(): PrewarmEntry[] {
|
|
373
|
-
return Array.from(this.entries.values()).map((entry) => ({
|
|
374
|
-
...entry,
|
|
375
|
-
candidates: entry.candidates.map((candidate) => ({ ...candidate }))
|
|
376
|
-
}));
|
|
377
|
-
}
|
|
378
|
-
|
|
379
|
-
/**
|
|
380
|
-
* List every cached key, decoded back into its (model, protocol, payment)
|
|
381
|
-
* triple. Used by `tb doctor` to render the prewarm table.
|
|
382
|
-
*/
|
|
383
|
-
keys(): Array<{ modelId: string; protocol: string; paymentMethod: string }> {
|
|
384
|
-
const out: Array<{ modelId: string; protocol: string; paymentMethod: string }> = [];
|
|
385
|
-
for (const key of this.entries.keys()) {
|
|
386
|
-
const parsed = parseKey(key);
|
|
387
|
-
if (parsed) {
|
|
388
|
-
out.push(parsed);
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
return out;
|
|
392
|
-
}
|
|
393
|
-
|
|
394
|
-
size(): number {
|
|
395
|
-
return this.entries.size;
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
clear(): void {
|
|
399
|
-
this.entries.clear();
|
|
400
|
-
}
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
/**
|
|
404
|
-
* `PrewarmCache.freshness()` 的返回:一次"是否还能信任现有 prewarm"的快速判断。
|
|
405
|
-
* 在每条 inference 请求的热路径上使用,避免直接遍历缓存。
|
|
406
|
-
*/
|
|
407
|
-
export interface PrewarmFreshness {
|
|
408
|
-
/** 是否存在对应条目(false 等价于 cold path) */
|
|
409
|
-
present: boolean;
|
|
410
|
-
/** 当前是否已超过 TTL */
|
|
411
|
-
expired: boolean;
|
|
412
|
-
/** TTL 剩余 ≤ 10% 时视为"即将过期",调度器用此触发 idle 预热 */
|
|
413
|
-
expiringSoon: boolean;
|
|
414
|
-
/** 剩余 TTL(毫秒),过期时省略 */
|
|
415
|
-
remainingMs?: number;
|
|
416
|
-
/** 条目当前状态(过期时强制为 `stale`) */
|
|
417
|
-
state: PrewarmState;
|
|
418
|
-
/** 关联的缓存条目(如果有) */
|
|
419
|
-
entry?: PrewarmEntry;
|
|
420
|
-
}
|
|
421
|
-
|
|
422
|
-
/**
|
|
423
|
-
* `PrewarmCache.beginWarming()` 的返回值:标记一次预热进入 in-flight。
|
|
424
|
-
* `hadPrevious` 用于上层决定是否在 `warming` 期间对外暴露旧候选。
|
|
425
|
-
*/
|
|
426
|
-
export interface PrewarmBeginResult {
|
|
427
|
-
/** 缓存键(与 `prewarmKey()` 的输出等价) */
|
|
428
|
-
key: string;
|
|
429
|
-
/** 当前写回缓存的 entry(state=`warming`) */
|
|
430
|
-
entry: PrewarmEntry;
|
|
431
|
-
/** 调用前缓存里是否已有 entry,用于上层做"保留旧候选"判断 */
|
|
432
|
-
hadPrevious: boolean;
|
|
433
|
-
}
|
|
434
|
-
|
|
435
|
-
/**
|
|
436
|
-
* `PrewarmCache.commitWarm()` 的返回值:成功提交一次候选集合。
|
|
437
|
-
* `replacedSellers` 用于上层做候选 churn 检测。
|
|
438
|
-
*/
|
|
439
|
-
export interface PrewarmCommitResult {
|
|
440
|
-
/** 缓存键 */
|
|
441
|
-
key: string;
|
|
442
|
-
/** 提交后的新 entry(state=`warm` 或 `empty`) */
|
|
443
|
-
entry: PrewarmEntry;
|
|
444
|
-
/** 上一版 entry 的候选 seller 列表(已不再缓存的 seller ID) */
|
|
445
|
-
replacedSellers: string[];
|
|
446
|
-
}
|
|
447
|
-
|
|
448
|
-
function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
|
|
449
|
-
return {
|
|
450
|
-
sellerId: input.sellerId,
|
|
451
|
-
url: input.url,
|
|
452
|
-
healthScore: clampScore(input.healthScore ?? 50),
|
|
453
|
-
lastSuccessAt: input.lastSuccessAt ?? 0,
|
|
454
|
-
lastFailAt: input.lastFailAt ?? 0,
|
|
455
|
-
avgLatencyMs: Math.max(0, input.avgLatencyMs ?? 0),
|
|
456
|
-
healthProbeLatencyMs: finiteNonNegative(input.healthProbeLatencyMs),
|
|
457
|
-
ttftMs: finiteNonNegative(input.ttftMs),
|
|
458
|
-
avgInferenceMs: finiteNonNegative(input.avgInferenceMs),
|
|
459
|
-
avgTokensPerSecond: finitePositive(input.avgTokensPerSecond),
|
|
460
|
-
upstreamStatus: input.upstreamStatus,
|
|
461
|
-
upstreamErrorClass: input.upstreamErrorClass,
|
|
462
|
-
capacityBlockedUntil: finiteNonNegative(input.capacityBlockedUntil)
|
|
463
|
-
};
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
function finiteNonNegative(value: number | undefined): number | undefined {
|
|
467
|
-
return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
function finitePositive(value: number | undefined): number | undefined {
|
|
471
|
-
return Number.isFinite(value) && (value as number) > 0 ? value : undefined;
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
function clampScore(score: number): number {
|
|
475
|
-
if (!Number.isFinite(score)) {
|
|
476
|
-
return 50;
|
|
477
|
-
}
|
|
478
|
-
if (score < 0) {
|
|
479
|
-
return 0;
|
|
480
|
-
}
|
|
481
|
-
if (score > 100) {
|
|
482
|
-
return 100;
|
|
483
|
-
}
|
|
484
|
-
return score;
|
|
485
|
-
}
|