@tokenbuddy/tokenbuddy 1.0.35 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/src/buyer-store.d.ts +6 -1
  2. package/dist/src/buyer-store.js +43 -4
  3. package/dist/src/cli.js +2 -2
  4. package/dist/src/daemon.d.ts +12 -0
  5. package/dist/src/daemon.js +791 -61
  6. package/dist/src/doctor-diagnostics.js +1 -6
  7. package/dist/src/provider-install.d.ts +2 -2
  8. package/dist/src/provider-install.js +248 -2
  9. package/dist/src/seller-catalog.d.ts +21 -0
  10. package/dist/src/seller-catalog.js +17 -0
  11. package/dist/src/seller-route-planner.d.ts +4 -1
  12. package/dist/src/seller-route-planner.js +3 -0
  13. package/dist/src/seller-routing-strategy.d.ts +3 -0
  14. package/dist/src/terminal-detect.d.ts +1 -1
  15. package/dist/src/terminal-detect.js +3 -2
  16. package/package.json +15 -2
  17. package/static/ui/assets/index-Djfl9tw5.js +271 -0
  18. package/static/ui/assets/index-DkfztCkn.css +1 -0
  19. package/static/ui/index.html +2 -2
  20. package/dist/src/buyer-store.d.ts.map +0 -1
  21. package/dist/src/buyer-store.js.map +0 -1
  22. package/dist/src/clawtip-bootstrap.d.ts.map +0 -1
  23. package/dist/src/clawtip-bootstrap.js.map +0 -1
  24. package/dist/src/cli.d.ts.map +0 -1
  25. package/dist/src/cli.js.map +0 -1
  26. package/dist/src/credit-tracker.d.ts.map +0 -1
  27. package/dist/src/credit-tracker.js.map +0 -1
  28. package/dist/src/daemon.d.ts.map +0 -1
  29. package/dist/src/daemon.js.map +0 -1
  30. package/dist/src/doctor-clawtip-wallet.d.ts.map +0 -1
  31. package/dist/src/doctor-clawtip-wallet.js.map +0 -1
  32. package/dist/src/doctor-diagnostics.d.ts.map +0 -1
  33. package/dist/src/doctor-diagnostics.js.map +0 -1
  34. package/dist/src/index.d.ts.map +0 -1
  35. package/dist/src/index.js.map +0 -1
  36. package/dist/src/init-clawtip-activation.d.ts.map +0 -1
  37. package/dist/src/init-clawtip-activation.js.map +0 -1
  38. package/dist/src/init-payment-options.d.ts.map +0 -1
  39. package/dist/src/init-payment-options.js.map +0 -1
  40. package/dist/src/init-setup.d.ts.map +0 -1
  41. package/dist/src/init-setup.js.map +0 -1
  42. package/dist/src/model-index.d.ts.map +0 -1
  43. package/dist/src/model-index.js.map +0 -1
  44. package/dist/src/package-update.d.ts.map +0 -1
  45. package/dist/src/package-update.js.map +0 -1
  46. package/dist/src/prewarm-cache.d.ts.map +0 -1
  47. package/dist/src/prewarm-cache.js.map +0 -1
  48. package/dist/src/prewarm-scheduler.d.ts.map +0 -1
  49. package/dist/src/prewarm-scheduler.js.map +0 -1
  50. package/dist/src/provider-install.d.ts.map +0 -1
  51. package/dist/src/provider-install.js.map +0 -1
  52. package/dist/src/provider-routing-config.d.ts.map +0 -1
  53. package/dist/src/provider-routing-config.js.map +0 -1
  54. package/dist/src/registry-trust.d.ts.map +0 -1
  55. package/dist/src/registry-trust.js.map +0 -1
  56. package/dist/src/route-failover.d.ts.map +0 -1
  57. package/dist/src/route-failover.js.map +0 -1
  58. package/dist/src/seller-catalog.d.ts.map +0 -1
  59. package/dist/src/seller-catalog.js.map +0 -1
  60. package/dist/src/seller-concurrency-limiter.d.ts.map +0 -1
  61. package/dist/src/seller-concurrency-limiter.js.map +0 -1
  62. package/dist/src/seller-metadata-cache.d.ts.map +0 -1
  63. package/dist/src/seller-metadata-cache.js.map +0 -1
  64. package/dist/src/seller-pool.d.ts.map +0 -1
  65. package/dist/src/seller-pool.js.map +0 -1
  66. package/dist/src/seller-route-planner.d.ts.map +0 -1
  67. package/dist/src/seller-route-planner.js.map +0 -1
  68. package/dist/src/seller-routing-config.d.ts.map +0 -1
  69. package/dist/src/seller-routing-config.js.map +0 -1
  70. package/dist/src/seller-routing-strategy.d.ts.map +0 -1
  71. package/dist/src/seller-routing-strategy.js.map +0 -1
  72. package/dist/src/stream-failover.d.ts.map +0 -1
  73. package/dist/src/stream-failover.js.map +0 -1
  74. package/dist/src/tb-clawtip-proof.d.ts.map +0 -1
  75. package/dist/src/tb-clawtip-proof.js.map +0 -1
  76. package/dist/src/tb-proxyd.d.ts.map +0 -1
  77. package/dist/src/tb-proxyd.js.map +0 -1
  78. package/dist/src/terminal-detect.d.ts.map +0 -1
  79. package/dist/src/terminal-detect.js.map +0 -1
  80. package/dist/src/terminal-image.d.ts.map +0 -1
  81. package/dist/src/terminal-image.js.map +0 -1
  82. package/src/buyer-store.ts +0 -1090
  83. package/src/clawtip-bootstrap.ts +0 -65
  84. package/src/cli.ts +0 -2243
  85. package/src/credit-tracker.ts +0 -295
  86. package/src/daemon.ts +0 -5475
  87. package/src/doctor-clawtip-wallet.ts +0 -95
  88. package/src/doctor-diagnostics.ts +0 -1026
  89. package/src/index.ts +0 -16
  90. package/src/init-clawtip-activation.ts +0 -695
  91. package/src/init-payment-options.ts +0 -373
  92. package/src/init-setup.ts +0 -165
  93. package/src/model-index.ts +0 -278
  94. package/src/package-update.ts +0 -311
  95. package/src/prewarm-cache.ts +0 -485
  96. package/src/prewarm-scheduler.ts +0 -675
  97. package/src/provider-install.ts +0 -1006
  98. package/src/provider-routing-config.ts +0 -410
  99. package/src/registry-trust.ts +0 -51
  100. package/src/route-failover.ts +0 -304
  101. package/src/seller-catalog.ts +0 -505
  102. package/src/seller-concurrency-limiter.ts +0 -161
  103. package/src/seller-metadata-cache.ts +0 -91
  104. package/src/seller-pool.ts +0 -557
  105. package/src/seller-route-planner.ts +0 -513
  106. package/src/seller-routing-config.ts +0 -211
  107. package/src/seller-routing-strategy.ts +0 -362
  108. package/src/stream-failover.ts +0 -152
  109. package/src/tb-clawtip-proof.ts +0 -28
  110. package/src/tb-proxyd.ts +0 -101
  111. package/src/terminal-detect.ts +0 -333
  112. package/src/terminal-image.ts +0 -228
  113. package/static/ui/assets/index-0MVXD7bH.css +0 -1
  114. package/static/ui/assets/index-BVbeDEwq.js +0 -271
  115. package/static/ui/assets/index-BVbeDEwq.js.map +0 -1
  116. package/tests/cli-routing.test.ts +0 -363
  117. package/tests/control-plane-ui-endpoints.test.ts +0 -1630
  118. package/tests/credit-tracker.test.ts +0 -165
  119. package/tests/daemon-413-fallback.test.ts +0 -92
  120. package/tests/daemon-classify.test.ts +0 -452
  121. package/tests/daemon-roles.test.ts +0 -92
  122. package/tests/daemon-trusted-registry-cache.test.ts +0 -132
  123. package/tests/e2e.test.ts +0 -366
  124. package/tests/image-generation-e2e.test.ts +0 -230
  125. package/tests/model-index.test.ts +0 -198
  126. package/tests/package-update.test.ts +0 -147
  127. package/tests/prewarm-cache.test.ts +0 -296
  128. package/tests/prewarm-scheduler.test.ts +0 -367
  129. package/tests/provider-routing-config.test.ts +0 -150
  130. package/tests/registry-trust.test.ts +0 -28
  131. package/tests/route-failover.test.ts +0 -222
  132. package/tests/seller-catalog-413.test.ts +0 -120
  133. package/tests/seller-catalog-utilities.test.ts +0 -124
  134. package/tests/seller-concurrency-limiter.test.ts +0 -83
  135. package/tests/seller-metadata-cache.test.ts +0 -89
  136. package/tests/seller-pool.test.ts +0 -365
  137. package/tests/seller-route-planner.test.ts +0 -312
  138. package/tests/seller-routing-config.test.ts +0 -124
  139. package/tests/seller-routing-strategy.test.ts +0 -167
  140. package/tests/stream-failover.test.ts +0 -52
  141. package/tests/thousand-seller.test.ts +0 -151
  142. package/tests/tokenbuddy.test.ts +0 -4043
  143. package/tsconfig.json +0 -8
@@ -1,485 +0,0 @@
1
- import { createModuleLogger } from "@tokenbuddy/logging";
2
-
3
- const logger = createModuleLogger("tb-proxyd:prewarm-cache");
4
-
5
- /**
6
- * Default TTL for a successfully warmed entry. 10 minutes is the v1.2 starting
7
- * point; see buyer-driven-fallback-design.md §18.13 for the trade-off. The
8
- * cache constructor accepts an override so tests and the future PR-E config
9
- * loader can change this without re-architecting.
10
- */
11
- export const DEFAULT_PREWARM_TTL_MS = 10 * 60 * 1000;
12
-
13
- /**
14
- * 缓存条目的状态机。
15
- * - `warming`:调度中,候选尚未稳定
16
- * - `warm`:上次 commit 成功且 TTL 内
17
- * - `stale`:TTL 过期或连续 commit 失败
18
- * - `empty`:commit 返回 0 候选(该 (model, protocol, payment) 在当前 registry 下无 seller)
19
- */
20
- export type PrewarmState = "warming" | "warm" | "stale" | "empty";
21
-
22
- /**
23
- * 单个 seller 在某次 prewarm commit 后的健康画像。
24
- * 由 `PrewarmCache.commitWarm()` 归一化字段(score 0-100、latency 非负)后写入。
25
- */
26
- export interface PrewarmCandidate {
27
- /** seller 全局 ID */
28
- sellerId: string;
29
- /** 去掉尾部斜杠后的 seller URL */
30
- url: string;
31
- /** 综合健康分,0-100,0 表示彻底坏 */
32
- healthScore: number; // 0-100
33
- /** 上一次成功的 unix 毫秒时间戳;0 表示尚无成功 */
34
- lastSuccessAt: number;
35
- /** 上一次失败的 unix 毫秒时间戳;0 表示尚无失败 */
36
- lastFailAt: number;
37
- /** 平均延迟(毫秒),用于排序时的回退指标 */
38
- avgLatencyMs: number;
39
- /** health probe 的延迟(毫秒),可选 */
40
- healthProbeLatencyMs?: number;
41
- /** 首 token 延迟(毫秒),可选;speed 排序的优先指标 */
42
- ttftMs?: number;
43
- /** 平均推理延迟(毫秒),可选 */
44
- avgInferenceMs?: number;
45
- /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
46
- avgTokensPerSecond?: number;
47
- /** 上游状态(与 seller 上报的语义对齐) */
48
- upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
49
- /** 上游错误类名(HTTP status / 错误码),仅在失败时存在 */
50
- upstreamErrorClass?: string;
51
- /** 临时容量避让截止时间;大于当前时间时不参与路由 */
52
- capacityBlockedUntil?: number;
53
- }
54
-
55
- /**
56
- * 缓存条目:以 `(modelId, protocol, paymentMethod)` 为键,存储一组候选 seller 的健康画像。
57
- * `warmedAt` 是 TTL 的起点;`consecutiveWarmingFailures` 触发指数退避。
58
- */
59
- export interface PrewarmEntry {
60
- /** 模型 ID(已归一化) */
61
- modelId: string;
62
- /** 协议名(已归一化) */
63
- protocol: string;
64
- /** 支付方式(已归一化) */
65
- paymentMethod: string;
66
- /** 当前条目状态 */
67
- state: PrewarmState;
68
- /** 该 (model, protocol, payment) 命中的候选 seller 列表 */
69
- candidates: PrewarmCandidate[];
70
- /** 本次成功的 commit 时间戳,TTL 起点 */
71
- warmedAt: number;
72
- /** 本条目的 TTL(毫秒),commit 时可被显式覆盖 */
73
- ttlMs: number;
74
- /** 连续 warming 失败次数;医生面板据此判断"长期坏" */
75
- consecutiveWarmingFailures: number;
76
- /** 最近一次进入 warming 的时间戳,调试用 */
77
- lastInFlightAt?: number;
78
- }
79
-
80
- /**
81
- * 调度器在 commit 时传入的"原始"候选数据:所有字段都可空,
82
- * 由 `toCandidate()` 做归一化(score clamp、latency 截负等)。
83
- */
84
- export interface PrewarmCandidateInput {
85
- /** seller ID */
86
- sellerId: string;
87
- /** seller URL */
88
- url: string;
89
- /** 健康分(可选;缺省在归一化时落到 50) */
90
- healthScore?: number;
91
- /** 上次成功时间戳(毫秒),可选 */
92
- lastSuccessAt?: number;
93
- /** 上次失败时间戳(毫秒),可选 */
94
- lastFailAt?: number;
95
- /** 平均延迟(毫秒),可选 */
96
- avgLatencyMs?: number;
97
- /** health probe 延迟(毫秒),可选 */
98
- healthProbeLatencyMs?: number;
99
- /** TTFT(毫秒),可选 */
100
- ttftMs?: number;
101
- /** 平均推理延迟(毫秒),可选 */
102
- avgInferenceMs?: number;
103
- /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
104
- avgTokensPerSecond?: number;
105
- /** 上游状态,可选 */
106
- upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
107
- /** 上游错误类名,可选 */
108
- upstreamErrorClass?: string;
109
- /** 临时容量避让截止时间;大于当前时间时不参与路由 */
110
- capacityBlockedUntil?: number;
111
- }
112
-
113
- /**
114
- * Build the cache key for a (model, protocol, payment) triple. The colon
115
- * separator is reserved at the model-id level because `RegistrySeller.models`
116
- * entries are trimmed but not colon-escaped. v1.2 forbids `:` inside model
117
- * ids so this format is collision-free.
118
- */
119
- export function prewarmKey(modelId: string, protocol: string, paymentMethod: string): string {
120
- return `${modelId.trim().toLowerCase()}\u0001${protocol.trim().toLowerCase()}\u0001${paymentMethod.trim().toLowerCase()}`;
121
- }
122
-
123
- function parseKey(key: string): { modelId: string; protocol: string; paymentMethod: string } | undefined {
124
- const parts = key.split("\u0001");
125
- if (parts.length !== 3) {
126
- return undefined;
127
- }
128
- const [modelId, protocol, paymentMethod] = parts;
129
- if (!modelId || !protocol || !paymentMethod) {
130
- return undefined;
131
- }
132
- return { modelId, protocol, paymentMethod };
133
- }
134
-
135
- interface PrewarmCacheOptions {
136
- defaultTtlMs?: number;
137
- now?: () => number;
138
- }
139
-
140
- /**
141
- * 进程内的 (model, protocol, payment) → 候选 seller 健康画像缓存。
142
- * 单线程访问(Node JS 主线程),无内部锁;`commitWarm` 是写路径,
143
- * `get/freshness` 是热路径读,TTL 由 `warmedAt + ttlMs` 决定。
144
- */
145
- export class PrewarmCache {
146
- private readonly entries = new Map<string, PrewarmEntry>();
147
- private readonly defaultTtlMs: number;
148
- private readonly now: () => number;
149
-
150
- constructor(options: PrewarmCacheOptions = {}) {
151
- this.defaultTtlMs = options.defaultTtlMs ?? DEFAULT_PREWARM_TTL_MS;
152
- this.now = options.now ?? Date.now;
153
- }
154
-
155
- /**
156
- * Read an entry without mutating state. Returns `undefined` when the key is
157
- * unknown; the caller decides whether "absent" should be treated as a miss
158
- * (i.e. trigger a fresh prewarm) or as a known empty model.
159
- */
160
- get(modelId: string, protocol: string, paymentMethod: string): PrewarmEntry | undefined {
161
- return this.entries.get(prewarmKey(modelId, protocol, paymentMethod));
162
- }
163
-
164
- /**
165
- * Look up an entry and return a `Freshness` descriptor. This is the cheap
166
- * path used on every inference request to decide whether a prewarm is
167
- * still authoritative, expiring soon, or already stale.
168
- */
169
- freshness(modelId: string, protocol: string, paymentMethod: string): PrewarmFreshness {
170
- const entry = this.get(modelId, protocol, paymentMethod);
171
- if (!entry) {
172
- return { present: false, expired: true, expiringSoon: true, state: "empty" };
173
- }
174
- const now = this.now();
175
- const ageMs = now - entry.warmedAt;
176
- const expired = ageMs >= entry.ttlMs;
177
- const remainingMs = Math.max(0, entry.ttlMs - ageMs);
178
- return {
179
- present: true,
180
- expired,
181
- expiringSoon: !expired && remainingMs <= entry.ttlMs * 0.1,
182
- remainingMs,
183
- state: expired ? "stale" : entry.state,
184
- entry
185
- };
186
- }
187
-
188
- /**
189
- * Mark a (model, protocol, payment) triple as currently being warmed. If an
190
- * existing warm entry is present it is kept untouched (the new probe
191
- * supersedes it on commit) and the previous state is reported to the
192
- * caller via the returned descriptor.
193
- */
194
- beginWarming(modelId: string, protocol: string, paymentMethod: string, ttlMs?: number): PrewarmBeginResult {
195
- const key = prewarmKey(modelId, protocol, paymentMethod);
196
- const previous = this.entries.get(key);
197
- const now = this.now();
198
- const entry: PrewarmEntry = {
199
- modelId,
200
- protocol,
201
- paymentMethod,
202
- state: "warming",
203
- candidates: previous?.candidates ?? [],
204
- warmedAt: previous?.warmedAt ?? now,
205
- ttlMs: ttlMs ?? previous?.ttlMs ?? this.defaultTtlMs,
206
- consecutiveWarmingFailures: previous?.consecutiveWarmingFailures ?? 0,
207
- lastInFlightAt: now
208
- };
209
- this.entries.set(key, entry);
210
- logger.debug("prewarm.cache.warming_started", "prewarm probe in flight", {
211
- modelId,
212
- protocol,
213
- paymentMethod,
214
- ttlMs: entry.ttlMs,
215
- previousState: previous?.state
216
- });
217
- return { key, entry, hadPrevious: Boolean(previous) };
218
- }
219
-
220
- /**
221
- * Commit a successful warm. The entry's `warmedAt` is reset to the current
222
- * time so the TTL window starts fresh, and any prior stale candidates are
223
- * replaced with the new probe results. The previous candidate set is
224
- * returned for caller-side telemetry (e.g. detecting churn).
225
- */
226
- commitWarm(input: {
227
- modelId: string;
228
- protocol: string;
229
- paymentMethod: string;
230
- candidates: PrewarmCandidateInput[];
231
- ttlMs?: number;
232
- }): PrewarmCommitResult {
233
- const key = prewarmKey(input.modelId, input.protocol, input.paymentMethod);
234
- const previous = this.entries.get(key);
235
- const now = this.now();
236
- const next: PrewarmEntry = {
237
- modelId: input.modelId,
238
- protocol: input.protocol,
239
- paymentMethod: input.paymentMethod,
240
- state: input.candidates.length > 0 ? "warm" : "empty",
241
- candidates: input.candidates.map(toCandidate),
242
- warmedAt: now,
243
- ttlMs: input.ttlMs ?? previous?.ttlMs ?? this.defaultTtlMs,
244
- consecutiveWarmingFailures: 0,
245
- lastInFlightAt: now
246
- };
247
- this.entries.set(key, next);
248
-
249
- if (input.candidates.length === 0) {
250
- logger.warn("prewarm.cache.commit_empty", "prewarm commit returned no candidates", {
251
- modelId: input.modelId,
252
- protocol: input.protocol,
253
- paymentMethod: input.paymentMethod
254
- });
255
- } else {
256
- logger.info("prewarm.cache.committed", "prewarm commit updated candidates", {
257
- modelId: input.modelId,
258
- protocol: input.protocol,
259
- paymentMethod: input.paymentMethod,
260
- candidateCount: next.candidates.length,
261
- ttlMs: next.ttlMs
262
- });
263
- }
264
-
265
- return {
266
- key,
267
- entry: next,
268
- replacedSellers: previous?.candidates.map((c) => c.sellerId) ?? []
269
- };
270
- }
271
-
272
- /**
273
- * Mark a warm as failed. Consecutive failures are tracked so the scheduler
274
- * can apply exponential backoff and so `tb doctor` can surface persistently
275
- * broken models.
276
- */
277
- recordFailure(modelId: string, protocol: string, paymentMethod: string, errorMessage?: string): PrewarmEntry | undefined {
278
- const key = prewarmKey(modelId, protocol, paymentMethod);
279
- const previous = this.entries.get(key);
280
- if (!previous) {
281
- return undefined;
282
- }
283
- const next: PrewarmEntry = {
284
- ...previous,
285
- state: "stale",
286
- consecutiveWarmingFailures: previous.consecutiveWarmingFailures + 1,
287
- lastInFlightAt: this.now()
288
- };
289
- this.entries.set(key, next);
290
- logger.warn("prewarm.cache.failure_recorded", "prewarm commit failed; entry marked stale", {
291
- modelId,
292
- protocol,
293
- paymentMethod,
294
- consecutiveFailures: next.consecutiveWarmingFailures,
295
- errorMessage
296
- });
297
- return next;
298
- }
299
-
300
- /**
301
- * Invalidate every entry that references the given seller. Used when the
302
- * registry signals a seller is gone (grace period expires) or when a hard
303
- * failure (e.g. 5xx storm) should drop the seller from the cache
304
- * immediately.
305
- */
306
- invalidateSeller(sellerId: string): number {
307
- let removed = 0;
308
- for (const [key, entry] of this.entries.entries()) {
309
- const filtered = entry.candidates.filter((candidate) => candidate.sellerId !== sellerId);
310
- if (filtered.length !== entry.candidates.length) {
311
- removed += 1;
312
- this.entries.set(key, {
313
- ...entry,
314
- candidates: filtered,
315
- state: filtered.length > 0 ? entry.state : "empty"
316
- });
317
- }
318
- }
319
- if (removed > 0) {
320
- logger.info("prewarm.cache.seller_invalidated", "seller dropped from all prewarm entries", {
321
- sellerId,
322
- entriesAffected: removed
323
- });
324
- }
325
- return removed;
326
- }
327
-
328
- /**
329
- * Invalidate a specific cache key. Used by `tb doctor --refresh <model>`
330
- * and by the registry loop when a model is removed from the focus set.
331
- */
332
- invalidateKey(modelId: string, protocol: string, paymentMethod: string): boolean {
333
- return this.entries.delete(prewarmKey(modelId, protocol, paymentMethod));
334
- }
335
-
336
- /**
337
- * Drop every entry whose TTL has expired. Returns the number of removed
338
- * entries so the caller can log it.
339
- */
340
- evictExpired(now: number = this.now()): number {
341
- let removed = 0;
342
- for (const [key, entry] of this.entries.entries()) {
343
- if (now - entry.warmedAt >= entry.ttlMs) {
344
- this.entries.delete(key);
345
- removed += 1;
346
- }
347
- }
348
- if (removed > 0) {
349
- logger.info("prewarm.cache.evicted", "expired prewarm entries evicted", { removed });
350
- }
351
- return removed;
352
- }
353
-
354
- /**
355
- * Returns `true` when the entry's TTL is within `withinMs` of expiring. The
356
- * scheduler uses this to schedule idle-cycle prewarms just-in-time rather
357
- * than at fixed wall-clock intervals.
358
- */
359
- isExpiringSoon(modelId: string, protocol: string, paymentMethod: string, withinMs: number, now: number = this.now()): boolean {
360
- const entry = this.get(modelId, protocol, paymentMethod);
361
- if (!entry) {
362
- return false;
363
- }
364
- const age = now - entry.warmedAt;
365
- return age >= entry.ttlMs - withinMs && age < entry.ttlMs;
366
- }
367
-
368
- /**
369
- * Snapshot all entries for diagnostics. Returns a deep-copy of the values
370
- * so callers can serialize without risking mutation of cache state.
371
- */
372
- snapshot(): PrewarmEntry[] {
373
- return Array.from(this.entries.values()).map((entry) => ({
374
- ...entry,
375
- candidates: entry.candidates.map((candidate) => ({ ...candidate }))
376
- }));
377
- }
378
-
379
- /**
380
- * List every cached key, decoded back into its (model, protocol, payment)
381
- * triple. Used by `tb doctor` to render the prewarm table.
382
- */
383
- keys(): Array<{ modelId: string; protocol: string; paymentMethod: string }> {
384
- const out: Array<{ modelId: string; protocol: string; paymentMethod: string }> = [];
385
- for (const key of this.entries.keys()) {
386
- const parsed = parseKey(key);
387
- if (parsed) {
388
- out.push(parsed);
389
- }
390
- }
391
- return out;
392
- }
393
-
394
- size(): number {
395
- return this.entries.size;
396
- }
397
-
398
- clear(): void {
399
- this.entries.clear();
400
- }
401
- }
402
-
403
- /**
404
- * `PrewarmCache.freshness()` 的返回:一次"是否还能信任现有 prewarm"的快速判断。
405
- * 在每条 inference 请求的热路径上使用,避免直接遍历缓存。
406
- */
407
- export interface PrewarmFreshness {
408
- /** 是否存在对应条目(false 等价于 cold path) */
409
- present: boolean;
410
- /** 当前是否已超过 TTL */
411
- expired: boolean;
412
- /** TTL 剩余 ≤ 10% 时视为"即将过期",调度器用此触发 idle 预热 */
413
- expiringSoon: boolean;
414
- /** 剩余 TTL(毫秒),过期时省略 */
415
- remainingMs?: number;
416
- /** 条目当前状态(过期时强制为 `stale`) */
417
- state: PrewarmState;
418
- /** 关联的缓存条目(如果有) */
419
- entry?: PrewarmEntry;
420
- }
421
-
422
- /**
423
- * `PrewarmCache.beginWarming()` 的返回值:标记一次预热进入 in-flight。
424
- * `hadPrevious` 用于上层决定是否在 `warming` 期间对外暴露旧候选。
425
- */
426
- export interface PrewarmBeginResult {
427
- /** 缓存键(与 `prewarmKey()` 的输出等价) */
428
- key: string;
429
- /** 当前写回缓存的 entry(state=`warming`) */
430
- entry: PrewarmEntry;
431
- /** 调用前缓存里是否已有 entry,用于上层做"保留旧候选"判断 */
432
- hadPrevious: boolean;
433
- }
434
-
435
- /**
436
- * `PrewarmCache.commitWarm()` 的返回值:成功提交一次候选集合。
437
- * `replacedSellers` 用于上层做候选 churn 检测。
438
- */
439
- export interface PrewarmCommitResult {
440
- /** 缓存键 */
441
- key: string;
442
- /** 提交后的新 entry(state=`warm` 或 `empty`) */
443
- entry: PrewarmEntry;
444
- /** 上一版 entry 的候选 seller 列表(已不再缓存的 seller ID) */
445
- replacedSellers: string[];
446
- }
447
-
448
- function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
449
- return {
450
- sellerId: input.sellerId,
451
- url: input.url,
452
- healthScore: clampScore(input.healthScore ?? 50),
453
- lastSuccessAt: input.lastSuccessAt ?? 0,
454
- lastFailAt: input.lastFailAt ?? 0,
455
- avgLatencyMs: Math.max(0, input.avgLatencyMs ?? 0),
456
- healthProbeLatencyMs: finiteNonNegative(input.healthProbeLatencyMs),
457
- ttftMs: finiteNonNegative(input.ttftMs),
458
- avgInferenceMs: finiteNonNegative(input.avgInferenceMs),
459
- avgTokensPerSecond: finitePositive(input.avgTokensPerSecond),
460
- upstreamStatus: input.upstreamStatus,
461
- upstreamErrorClass: input.upstreamErrorClass,
462
- capacityBlockedUntil: finiteNonNegative(input.capacityBlockedUntil)
463
- };
464
- }
465
-
466
- function finiteNonNegative(value: number | undefined): number | undefined {
467
- return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
468
- }
469
-
470
- function finitePositive(value: number | undefined): number | undefined {
471
- return Number.isFinite(value) && (value as number) > 0 ? value : undefined;
472
- }
473
-
474
- function clampScore(score: number): number {
475
- if (!Number.isFinite(score)) {
476
- return 50;
477
- }
478
- if (score < 0) {
479
- return 0;
480
- }
481
- if (score > 100) {
482
- return 100;
483
- }
484
- return score;
485
- }