@tokenbuddy/tokenbuddy 1.0.36 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/dist/src/buyer-store.d.ts +7 -2
  2. package/dist/src/buyer-store.js +46 -7
  3. package/dist/src/cli.d.ts +1 -0
  4. package/dist/src/cli.js +15 -7
  5. package/dist/src/daemon.d.ts +12 -0
  6. package/dist/src/daemon.js +791 -61
  7. package/dist/src/doctor-diagnostics.js +1 -6
  8. package/dist/src/provider-install.d.ts +2 -2
  9. package/dist/src/provider-install.js +248 -2
  10. package/dist/src/seller-catalog.d.ts +21 -0
  11. package/dist/src/seller-catalog.js +17 -0
  12. package/dist/src/seller-route-planner.d.ts +4 -1
  13. package/dist/src/seller-route-planner.js +3 -0
  14. package/dist/src/seller-routing-strategy.d.ts +3 -0
  15. package/dist/src/terminal-detect.d.ts +1 -1
  16. package/dist/src/terminal-detect.js +3 -2
  17. package/dist/src/workdir.d.ts +10 -0
  18. package/dist/src/workdir.js +26 -0
  19. package/package.json +15 -2
  20. package/static/ui/assets/index-Djfl9tw5.js +271 -0
  21. package/static/ui/assets/index-DkfztCkn.css +1 -0
  22. package/static/ui/index.html +2 -2
  23. package/dist/src/buyer-store.d.ts.map +0 -1
  24. package/dist/src/buyer-store.js.map +0 -1
  25. package/dist/src/clawtip-bootstrap.d.ts.map +0 -1
  26. package/dist/src/clawtip-bootstrap.js.map +0 -1
  27. package/dist/src/cli.d.ts.map +0 -1
  28. package/dist/src/cli.js.map +0 -1
  29. package/dist/src/credit-tracker.d.ts.map +0 -1
  30. package/dist/src/credit-tracker.js.map +0 -1
  31. package/dist/src/daemon.d.ts.map +0 -1
  32. package/dist/src/daemon.js.map +0 -1
  33. package/dist/src/doctor-clawtip-wallet.d.ts.map +0 -1
  34. package/dist/src/doctor-clawtip-wallet.js.map +0 -1
  35. package/dist/src/doctor-diagnostics.d.ts.map +0 -1
  36. package/dist/src/doctor-diagnostics.js.map +0 -1
  37. package/dist/src/index.d.ts.map +0 -1
  38. package/dist/src/index.js.map +0 -1
  39. package/dist/src/init-clawtip-activation.d.ts.map +0 -1
  40. package/dist/src/init-clawtip-activation.js.map +0 -1
  41. package/dist/src/init-payment-options.d.ts.map +0 -1
  42. package/dist/src/init-payment-options.js.map +0 -1
  43. package/dist/src/init-setup.d.ts.map +0 -1
  44. package/dist/src/init-setup.js.map +0 -1
  45. package/dist/src/model-index.d.ts.map +0 -1
  46. package/dist/src/model-index.js.map +0 -1
  47. package/dist/src/package-update.d.ts.map +0 -1
  48. package/dist/src/package-update.js.map +0 -1
  49. package/dist/src/prewarm-cache.d.ts.map +0 -1
  50. package/dist/src/prewarm-cache.js.map +0 -1
  51. package/dist/src/prewarm-scheduler.d.ts.map +0 -1
  52. package/dist/src/prewarm-scheduler.js.map +0 -1
  53. package/dist/src/provider-install.d.ts.map +0 -1
  54. package/dist/src/provider-install.js.map +0 -1
  55. package/dist/src/provider-routing-config.d.ts.map +0 -1
  56. package/dist/src/provider-routing-config.js.map +0 -1
  57. package/dist/src/registry-trust.d.ts.map +0 -1
  58. package/dist/src/registry-trust.js.map +0 -1
  59. package/dist/src/route-failover.d.ts.map +0 -1
  60. package/dist/src/route-failover.js.map +0 -1
  61. package/dist/src/seller-catalog.d.ts.map +0 -1
  62. package/dist/src/seller-catalog.js.map +0 -1
  63. package/dist/src/seller-concurrency-limiter.d.ts.map +0 -1
  64. package/dist/src/seller-concurrency-limiter.js.map +0 -1
  65. package/dist/src/seller-metadata-cache.d.ts.map +0 -1
  66. package/dist/src/seller-metadata-cache.js.map +0 -1
  67. package/dist/src/seller-pool.d.ts.map +0 -1
  68. package/dist/src/seller-pool.js.map +0 -1
  69. package/dist/src/seller-route-planner.d.ts.map +0 -1
  70. package/dist/src/seller-route-planner.js.map +0 -1
  71. package/dist/src/seller-routing-config.d.ts.map +0 -1
  72. package/dist/src/seller-routing-config.js.map +0 -1
  73. package/dist/src/seller-routing-strategy.d.ts.map +0 -1
  74. package/dist/src/seller-routing-strategy.js.map +0 -1
  75. package/dist/src/stream-failover.d.ts.map +0 -1
  76. package/dist/src/stream-failover.js.map +0 -1
  77. package/dist/src/tb-clawtip-proof.d.ts.map +0 -1
  78. package/dist/src/tb-clawtip-proof.js.map +0 -1
  79. package/dist/src/tb-proxyd.d.ts.map +0 -1
  80. package/dist/src/tb-proxyd.js.map +0 -1
  81. package/dist/src/terminal-detect.d.ts.map +0 -1
  82. package/dist/src/terminal-detect.js.map +0 -1
  83. package/dist/src/terminal-image.d.ts.map +0 -1
  84. package/dist/src/terminal-image.js.map +0 -1
  85. package/src/buyer-store.ts +0 -1090
  86. package/src/clawtip-bootstrap.ts +0 -65
  87. package/src/cli.ts +0 -2243
  88. package/src/credit-tracker.ts +0 -295
  89. package/src/daemon.ts +0 -5475
  90. package/src/doctor-clawtip-wallet.ts +0 -95
  91. package/src/doctor-diagnostics.ts +0 -1026
  92. package/src/index.ts +0 -16
  93. package/src/init-clawtip-activation.ts +0 -695
  94. package/src/init-payment-options.ts +0 -373
  95. package/src/init-setup.ts +0 -165
  96. package/src/model-index.ts +0 -278
  97. package/src/package-update.ts +0 -311
  98. package/src/prewarm-cache.ts +0 -485
  99. package/src/prewarm-scheduler.ts +0 -675
  100. package/src/provider-install.ts +0 -1006
  101. package/src/provider-routing-config.ts +0 -410
  102. package/src/registry-trust.ts +0 -51
  103. package/src/route-failover.ts +0 -304
  104. package/src/seller-catalog.ts +0 -505
  105. package/src/seller-concurrency-limiter.ts +0 -161
  106. package/src/seller-metadata-cache.ts +0 -91
  107. package/src/seller-pool.ts +0 -557
  108. package/src/seller-route-planner.ts +0 -513
  109. package/src/seller-routing-config.ts +0 -211
  110. package/src/seller-routing-strategy.ts +0 -362
  111. package/src/stream-failover.ts +0 -152
  112. package/src/tb-clawtip-proof.ts +0 -28
  113. package/src/tb-proxyd.ts +0 -101
  114. package/src/terminal-detect.ts +0 -333
  115. package/src/terminal-image.ts +0 -228
  116. package/static/ui/assets/index-0MVXD7bH.css +0 -1
  117. package/static/ui/assets/index-BVbeDEwq.js +0 -271
  118. package/static/ui/assets/index-BVbeDEwq.js.map +0 -1
  119. package/tests/cli-routing.test.ts +0 -363
  120. package/tests/control-plane-ui-endpoints.test.ts +0 -1630
  121. package/tests/credit-tracker.test.ts +0 -165
  122. package/tests/daemon-413-fallback.test.ts +0 -92
  123. package/tests/daemon-classify.test.ts +0 -452
  124. package/tests/daemon-roles.test.ts +0 -92
  125. package/tests/daemon-trusted-registry-cache.test.ts +0 -132
  126. package/tests/e2e.test.ts +0 -366
  127. package/tests/image-generation-e2e.test.ts +0 -230
  128. package/tests/model-index.test.ts +0 -198
  129. package/tests/package-update.test.ts +0 -147
  130. package/tests/prewarm-cache.test.ts +0 -296
  131. package/tests/prewarm-scheduler.test.ts +0 -367
  132. package/tests/provider-routing-config.test.ts +0 -150
  133. package/tests/registry-trust.test.ts +0 -28
  134. package/tests/route-failover.test.ts +0 -222
  135. package/tests/seller-catalog-413.test.ts +0 -120
  136. package/tests/seller-catalog-utilities.test.ts +0 -124
  137. package/tests/seller-concurrency-limiter.test.ts +0 -83
  138. package/tests/seller-metadata-cache.test.ts +0 -89
  139. package/tests/seller-pool.test.ts +0 -365
  140. package/tests/seller-route-planner.test.ts +0 -312
  141. package/tests/seller-routing-config.test.ts +0 -124
  142. package/tests/seller-routing-strategy.test.ts +0 -167
  143. package/tests/stream-failover.test.ts +0 -52
  144. package/tests/thousand-seller.test.ts +0 -151
  145. package/tests/tokenbuddy.test.ts +0 -4043
  146. package/tsconfig.json +0 -8
@@ -1,485 +0,0 @@
1
- import { createModuleLogger } from "@tokenbuddy/logging";
2
-
3
- const logger = createModuleLogger("tb-proxyd:prewarm-cache");
4
-
5
- /**
6
- * Default TTL for a successfully warmed entry. 10 minutes is the v1.2 starting
7
- * point; see buyer-driven-fallback-design.md §18.13 for the trade-off. The
8
- * cache constructor accepts an override so tests and the future PR-E config
9
- * loader can change this without re-architecting.
10
- */
11
- export const DEFAULT_PREWARM_TTL_MS = 10 * 60 * 1000;
12
-
13
- /**
14
- * 缓存条目的状态机。
15
- * - `warming`:调度中,候选尚未稳定
16
- * - `warm`:上次 commit 成功且 TTL 内
17
- * - `stale`:TTL 过期或连续 commit 失败
18
- * - `empty`:commit 返回 0 候选(该 (model, protocol, payment) 在当前 registry 下无 seller)
19
- */
20
- export type PrewarmState = "warming" | "warm" | "stale" | "empty";
21
-
22
- /**
23
- * 单个 seller 在某次 prewarm commit 后的健康画像。
24
- * 由 `PrewarmCache.commitWarm()` 归一化字段(score 0-100、latency 非负)后写入。
25
- */
26
- export interface PrewarmCandidate {
27
- /** seller 全局 ID */
28
- sellerId: string;
29
- /** 去掉尾部斜杠后的 seller URL */
30
- url: string;
31
- /** 综合健康分,0-100,0 表示彻底坏 */
32
- healthScore: number; // 0-100
33
- /** 上一次成功的 unix 毫秒时间戳;0 表示尚无成功 */
34
- lastSuccessAt: number;
35
- /** 上一次失败的 unix 毫秒时间戳;0 表示尚无失败 */
36
- lastFailAt: number;
37
- /** 平均延迟(毫秒),用于排序时的回退指标 */
38
- avgLatencyMs: number;
39
- /** health probe 的延迟(毫秒),可选 */
40
- healthProbeLatencyMs?: number;
41
- /** 首 token 延迟(毫秒),可选;speed 排序的优先指标 */
42
- ttftMs?: number;
43
- /** 平均推理延迟(毫秒),可选 */
44
- avgInferenceMs?: number;
45
- /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
46
- avgTokensPerSecond?: number;
47
- /** 上游状态(与 seller 上报的语义对齐) */
48
- upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
49
- /** 上游错误类名(HTTP status / 错误码),仅在失败时存在 */
50
- upstreamErrorClass?: string;
51
- /** 临时容量避让截止时间;大于当前时间时不参与路由 */
52
- capacityBlockedUntil?: number;
53
- }
54
-
55
- /**
56
- * 缓存条目:以 `(modelId, protocol, paymentMethod)` 为键,存储一组候选 seller 的健康画像。
57
- * `warmedAt` 是 TTL 的起点;`consecutiveWarmingFailures` 触发指数退避。
58
- */
59
- export interface PrewarmEntry {
60
- /** 模型 ID(已归一化) */
61
- modelId: string;
62
- /** 协议名(已归一化) */
63
- protocol: string;
64
- /** 支付方式(已归一化) */
65
- paymentMethod: string;
66
- /** 当前条目状态 */
67
- state: PrewarmState;
68
- /** 该 (model, protocol, payment) 命中的候选 seller 列表 */
69
- candidates: PrewarmCandidate[];
70
- /** 本次成功的 commit 时间戳,TTL 起点 */
71
- warmedAt: number;
72
- /** 本条目的 TTL(毫秒),commit 时可被显式覆盖 */
73
- ttlMs: number;
74
- /** 连续 warming 失败次数;医生面板据此判断"长期坏" */
75
- consecutiveWarmingFailures: number;
76
- /** 最近一次进入 warming 的时间戳,调试用 */
77
- lastInFlightAt?: number;
78
- }
79
-
80
- /**
81
- * 调度器在 commit 时传入的"原始"候选数据:所有字段都可空,
82
- * 由 `toCandidate()` 做归一化(score clamp、latency 截负等)。
83
- */
84
- export interface PrewarmCandidateInput {
85
- /** seller ID */
86
- sellerId: string;
87
- /** seller URL */
88
- url: string;
89
- /** 健康分(可选;缺省在归一化时落到 50) */
90
- healthScore?: number;
91
- /** 上次成功时间戳(毫秒),可选 */
92
- lastSuccessAt?: number;
93
- /** 上次失败时间戳(毫秒),可选 */
94
- lastFailAt?: number;
95
- /** 平均延迟(毫秒),可选 */
96
- avgLatencyMs?: number;
97
- /** health probe 延迟(毫秒),可选 */
98
- healthProbeLatencyMs?: number;
99
- /** TTFT(毫秒),可选 */
100
- ttftMs?: number;
101
- /** 平均推理延迟(毫秒),可选 */
102
- avgInferenceMs?: number;
103
- /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
104
- avgTokensPerSecond?: number;
105
- /** 上游状态,可选 */
106
- upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
107
- /** 上游错误类名,可选 */
108
- upstreamErrorClass?: string;
109
- /** 临时容量避让截止时间;大于当前时间时不参与路由 */
110
- capacityBlockedUntil?: number;
111
- }
112
-
113
- /**
114
- * Build the cache key for a (model, protocol, payment) triple. The colon
115
- * separator is reserved at the model-id level because `RegistrySeller.models`
116
- * entries are trimmed but not colon-escaped. v1.2 forbids `:` inside model
117
- * ids so this format is collision-free.
118
- */
119
- export function prewarmKey(modelId: string, protocol: string, paymentMethod: string): string {
120
- return `${modelId.trim().toLowerCase()}\u0001${protocol.trim().toLowerCase()}\u0001${paymentMethod.trim().toLowerCase()}`;
121
- }
122
-
123
- function parseKey(key: string): { modelId: string; protocol: string; paymentMethod: string } | undefined {
124
- const parts = key.split("\u0001");
125
- if (parts.length !== 3) {
126
- return undefined;
127
- }
128
- const [modelId, protocol, paymentMethod] = parts;
129
- if (!modelId || !protocol || !paymentMethod) {
130
- return undefined;
131
- }
132
- return { modelId, protocol, paymentMethod };
133
- }
134
-
135
- interface PrewarmCacheOptions {
136
- defaultTtlMs?: number;
137
- now?: () => number;
138
- }
139
-
140
- /**
141
- * 进程内的 (model, protocol, payment) → 候选 seller 健康画像缓存。
142
- * 单线程访问(Node JS 主线程),无内部锁;`commitWarm` 是写路径,
143
- * `get/freshness` 是热路径读,TTL 由 `warmedAt + ttlMs` 决定。
144
- */
145
- export class PrewarmCache {
146
- private readonly entries = new Map<string, PrewarmEntry>();
147
- private readonly defaultTtlMs: number;
148
- private readonly now: () => number;
149
-
150
- constructor(options: PrewarmCacheOptions = {}) {
151
- this.defaultTtlMs = options.defaultTtlMs ?? DEFAULT_PREWARM_TTL_MS;
152
- this.now = options.now ?? Date.now;
153
- }
154
-
155
- /**
156
- * Read an entry without mutating state. Returns `undefined` when the key is
157
- * unknown; the caller decides whether "absent" should be treated as a miss
158
- * (i.e. trigger a fresh prewarm) or as a known empty model.
159
- */
160
- get(modelId: string, protocol: string, paymentMethod: string): PrewarmEntry | undefined {
161
- return this.entries.get(prewarmKey(modelId, protocol, paymentMethod));
162
- }
163
-
164
- /**
165
- * Look up an entry and return a `Freshness` descriptor. This is the cheap
166
- * path used on every inference request to decide whether a prewarm is
167
- * still authoritative, expiring soon, or already stale.
168
- */
169
- freshness(modelId: string, protocol: string, paymentMethod: string): PrewarmFreshness {
170
- const entry = this.get(modelId, protocol, paymentMethod);
171
- if (!entry) {
172
- return { present: false, expired: true, expiringSoon: true, state: "empty" };
173
- }
174
- const now = this.now();
175
- const ageMs = now - entry.warmedAt;
176
- const expired = ageMs >= entry.ttlMs;
177
- const remainingMs = Math.max(0, entry.ttlMs - ageMs);
178
- return {
179
- present: true,
180
- expired,
181
- expiringSoon: !expired && remainingMs <= entry.ttlMs * 0.1,
182
- remainingMs,
183
- state: expired ? "stale" : entry.state,
184
- entry
185
- };
186
- }
187
-
188
- /**
189
- * Mark a (model, protocol, payment) triple as currently being warmed. If an
190
- * existing warm entry is present it is kept untouched (the new probe
191
- * supersedes it on commit) and the previous state is reported to the
192
- * caller via the returned descriptor.
193
- */
194
- beginWarming(modelId: string, protocol: string, paymentMethod: string, ttlMs?: number): PrewarmBeginResult {
195
- const key = prewarmKey(modelId, protocol, paymentMethod);
196
- const previous = this.entries.get(key);
197
- const now = this.now();
198
- const entry: PrewarmEntry = {
199
- modelId,
200
- protocol,
201
- paymentMethod,
202
- state: "warming",
203
- candidates: previous?.candidates ?? [],
204
- warmedAt: previous?.warmedAt ?? now,
205
- ttlMs: ttlMs ?? previous?.ttlMs ?? this.defaultTtlMs,
206
- consecutiveWarmingFailures: previous?.consecutiveWarmingFailures ?? 0,
207
- lastInFlightAt: now
208
- };
209
- this.entries.set(key, entry);
210
- logger.debug("prewarm.cache.warming_started", "prewarm probe in flight", {
211
- modelId,
212
- protocol,
213
- paymentMethod,
214
- ttlMs: entry.ttlMs,
215
- previousState: previous?.state
216
- });
217
- return { key, entry, hadPrevious: Boolean(previous) };
218
- }
219
-
220
- /**
221
- * Commit a successful warm. The entry's `warmedAt` is reset to the current
222
- * time so the TTL window starts fresh, and any prior stale candidates are
223
- * replaced with the new probe results. The previous candidate set is
224
- * returned for caller-side telemetry (e.g. detecting churn).
225
- */
226
- commitWarm(input: {
227
- modelId: string;
228
- protocol: string;
229
- paymentMethod: string;
230
- candidates: PrewarmCandidateInput[];
231
- ttlMs?: number;
232
- }): PrewarmCommitResult {
233
- const key = prewarmKey(input.modelId, input.protocol, input.paymentMethod);
234
- const previous = this.entries.get(key);
235
- const now = this.now();
236
- const next: PrewarmEntry = {
237
- modelId: input.modelId,
238
- protocol: input.protocol,
239
- paymentMethod: input.paymentMethod,
240
- state: input.candidates.length > 0 ? "warm" : "empty",
241
- candidates: input.candidates.map(toCandidate),
242
- warmedAt: now,
243
- ttlMs: input.ttlMs ?? previous?.ttlMs ?? this.defaultTtlMs,
244
- consecutiveWarmingFailures: 0,
245
- lastInFlightAt: now
246
- };
247
- this.entries.set(key, next);
248
-
249
- if (input.candidates.length === 0) {
250
- logger.warn("prewarm.cache.commit_empty", "prewarm commit returned no candidates", {
251
- modelId: input.modelId,
252
- protocol: input.protocol,
253
- paymentMethod: input.paymentMethod
254
- });
255
- } else {
256
- logger.info("prewarm.cache.committed", "prewarm commit updated candidates", {
257
- modelId: input.modelId,
258
- protocol: input.protocol,
259
- paymentMethod: input.paymentMethod,
260
- candidateCount: next.candidates.length,
261
- ttlMs: next.ttlMs
262
- });
263
- }
264
-
265
- return {
266
- key,
267
- entry: next,
268
- replacedSellers: previous?.candidates.map((c) => c.sellerId) ?? []
269
- };
270
- }
271
-
272
- /**
273
- * Mark a warm as failed. Consecutive failures are tracked so the scheduler
274
- * can apply exponential backoff and so `tb doctor` can surface persistently
275
- * broken models.
276
- */
277
- recordFailure(modelId: string, protocol: string, paymentMethod: string, errorMessage?: string): PrewarmEntry | undefined {
278
- const key = prewarmKey(modelId, protocol, paymentMethod);
279
- const previous = this.entries.get(key);
280
- if (!previous) {
281
- return undefined;
282
- }
283
- const next: PrewarmEntry = {
284
- ...previous,
285
- state: "stale",
286
- consecutiveWarmingFailures: previous.consecutiveWarmingFailures + 1,
287
- lastInFlightAt: this.now()
288
- };
289
- this.entries.set(key, next);
290
- logger.warn("prewarm.cache.failure_recorded", "prewarm commit failed; entry marked stale", {
291
- modelId,
292
- protocol,
293
- paymentMethod,
294
- consecutiveFailures: next.consecutiveWarmingFailures,
295
- errorMessage
296
- });
297
- return next;
298
- }
299
-
300
- /**
301
- * Invalidate every entry that references the given seller. Used when the
302
- * registry signals a seller is gone (grace period expires) or when a hard
303
- * failure (e.g. 5xx storm) should drop the seller from the cache
304
- * immediately.
305
- */
306
- invalidateSeller(sellerId: string): number {
307
- let removed = 0;
308
- for (const [key, entry] of this.entries.entries()) {
309
- const filtered = entry.candidates.filter((candidate) => candidate.sellerId !== sellerId);
310
- if (filtered.length !== entry.candidates.length) {
311
- removed += 1;
312
- this.entries.set(key, {
313
- ...entry,
314
- candidates: filtered,
315
- state: filtered.length > 0 ? entry.state : "empty"
316
- });
317
- }
318
- }
319
- if (removed > 0) {
320
- logger.info("prewarm.cache.seller_invalidated", "seller dropped from all prewarm entries", {
321
- sellerId,
322
- entriesAffected: removed
323
- });
324
- }
325
- return removed;
326
- }
327
-
328
- /**
329
- * Invalidate a specific cache key. Used by `tb doctor --refresh <model>`
330
- * and by the registry loop when a model is removed from the focus set.
331
- */
332
- invalidateKey(modelId: string, protocol: string, paymentMethod: string): boolean {
333
- return this.entries.delete(prewarmKey(modelId, protocol, paymentMethod));
334
- }
335
-
336
- /**
337
- * Drop every entry whose TTL has expired. Returns the number of removed
338
- * entries so the caller can log it.
339
- */
340
- evictExpired(now: number = this.now()): number {
341
- let removed = 0;
342
- for (const [key, entry] of this.entries.entries()) {
343
- if (now - entry.warmedAt >= entry.ttlMs) {
344
- this.entries.delete(key);
345
- removed += 1;
346
- }
347
- }
348
- if (removed > 0) {
349
- logger.info("prewarm.cache.evicted", "expired prewarm entries evicted", { removed });
350
- }
351
- return removed;
352
- }
353
-
354
- /**
355
- * Returns `true` when the entry's TTL is within `withinMs` of expiring. The
356
- * scheduler uses this to schedule idle-cycle prewarms just-in-time rather
357
- * than at fixed wall-clock intervals.
358
- */
359
- isExpiringSoon(modelId: string, protocol: string, paymentMethod: string, withinMs: number, now: number = this.now()): boolean {
360
- const entry = this.get(modelId, protocol, paymentMethod);
361
- if (!entry) {
362
- return false;
363
- }
364
- const age = now - entry.warmedAt;
365
- return age >= entry.ttlMs - withinMs && age < entry.ttlMs;
366
- }
367
-
368
- /**
369
- * Snapshot all entries for diagnostics. Returns a deep-copy of the values
370
- * so callers can serialize without risking mutation of cache state.
371
- */
372
- snapshot(): PrewarmEntry[] {
373
- return Array.from(this.entries.values()).map((entry) => ({
374
- ...entry,
375
- candidates: entry.candidates.map((candidate) => ({ ...candidate }))
376
- }));
377
- }
378
-
379
- /**
380
- * List every cached key, decoded back into its (model, protocol, payment)
381
- * triple. Used by `tb doctor` to render the prewarm table.
382
- */
383
- keys(): Array<{ modelId: string; protocol: string; paymentMethod: string }> {
384
- const out: Array<{ modelId: string; protocol: string; paymentMethod: string }> = [];
385
- for (const key of this.entries.keys()) {
386
- const parsed = parseKey(key);
387
- if (parsed) {
388
- out.push(parsed);
389
- }
390
- }
391
- return out;
392
- }
393
-
394
- size(): number {
395
- return this.entries.size;
396
- }
397
-
398
- clear(): void {
399
- this.entries.clear();
400
- }
401
- }
402
-
403
- /**
404
- * `PrewarmCache.freshness()` 的返回:一次"是否还能信任现有 prewarm"的快速判断。
405
- * 在每条 inference 请求的热路径上使用,避免直接遍历缓存。
406
- */
407
- export interface PrewarmFreshness {
408
- /** 是否存在对应条目(false 等价于 cold path) */
409
- present: boolean;
410
- /** 当前是否已超过 TTL */
411
- expired: boolean;
412
- /** TTL 剩余 ≤ 10% 时视为"即将过期",调度器用此触发 idle 预热 */
413
- expiringSoon: boolean;
414
- /** 剩余 TTL(毫秒),过期时省略 */
415
- remainingMs?: number;
416
- /** 条目当前状态(过期时强制为 `stale`) */
417
- state: PrewarmState;
418
- /** 关联的缓存条目(如果有) */
419
- entry?: PrewarmEntry;
420
- }
421
-
422
- /**
423
- * `PrewarmCache.beginWarming()` 的返回值:标记一次预热进入 in-flight。
424
- * `hadPrevious` 用于上层决定是否在 `warming` 期间对外暴露旧候选。
425
- */
426
- export interface PrewarmBeginResult {
427
- /** 缓存键(与 `prewarmKey()` 的输出等价) */
428
- key: string;
429
- /** 当前写回缓存的 entry(state=`warming`) */
430
- entry: PrewarmEntry;
431
- /** 调用前缓存里是否已有 entry,用于上层做"保留旧候选"判断 */
432
- hadPrevious: boolean;
433
- }
434
-
435
- /**
436
- * `PrewarmCache.commitWarm()` 的返回值:成功提交一次候选集合。
437
- * `replacedSellers` 用于上层做候选 churn 检测。
438
- */
439
- export interface PrewarmCommitResult {
440
- /** 缓存键 */
441
- key: string;
442
- /** 提交后的新 entry(state=`warm` 或 `empty`) */
443
- entry: PrewarmEntry;
444
- /** 上一版 entry 的候选 seller 列表(已不再缓存的 seller ID) */
445
- replacedSellers: string[];
446
- }
447
-
448
- function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
449
- return {
450
- sellerId: input.sellerId,
451
- url: input.url,
452
- healthScore: clampScore(input.healthScore ?? 50),
453
- lastSuccessAt: input.lastSuccessAt ?? 0,
454
- lastFailAt: input.lastFailAt ?? 0,
455
- avgLatencyMs: Math.max(0, input.avgLatencyMs ?? 0),
456
- healthProbeLatencyMs: finiteNonNegative(input.healthProbeLatencyMs),
457
- ttftMs: finiteNonNegative(input.ttftMs),
458
- avgInferenceMs: finiteNonNegative(input.avgInferenceMs),
459
- avgTokensPerSecond: finitePositive(input.avgTokensPerSecond),
460
- upstreamStatus: input.upstreamStatus,
461
- upstreamErrorClass: input.upstreamErrorClass,
462
- capacityBlockedUntil: finiteNonNegative(input.capacityBlockedUntil)
463
- };
464
- }
465
-
466
- function finiteNonNegative(value: number | undefined): number | undefined {
467
- return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
468
- }
469
-
470
- function finitePositive(value: number | undefined): number | undefined {
471
- return Number.isFinite(value) && (value as number) > 0 ? value : undefined;
472
- }
473
-
474
- function clampScore(score: number): number {
475
- if (!Number.isFinite(score)) {
476
- return 50;
477
- }
478
- if (score < 0) {
479
- return 0;
480
- }
481
- if (score > 100) {
482
- return 100;
483
- }
484
- return score;
485
- }