@tokenbuddy/tokenbuddy 1.0.35 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/src/buyer-store.d.ts +6 -1
  2. package/dist/src/buyer-store.js +43 -4
  3. package/dist/src/cli.js +2 -2
  4. package/dist/src/daemon.d.ts +12 -0
  5. package/dist/src/daemon.js +791 -61
  6. package/dist/src/doctor-diagnostics.js +1 -6
  7. package/dist/src/provider-install.d.ts +2 -2
  8. package/dist/src/provider-install.js +248 -2
  9. package/dist/src/seller-catalog.d.ts +21 -0
  10. package/dist/src/seller-catalog.js +17 -0
  11. package/dist/src/seller-route-planner.d.ts +4 -1
  12. package/dist/src/seller-route-planner.js +3 -0
  13. package/dist/src/seller-routing-strategy.d.ts +3 -0
  14. package/dist/src/terminal-detect.d.ts +1 -1
  15. package/dist/src/terminal-detect.js +3 -2
  16. package/package.json +15 -2
  17. package/static/ui/assets/index-Djfl9tw5.js +271 -0
  18. package/static/ui/assets/index-DkfztCkn.css +1 -0
  19. package/static/ui/index.html +2 -2
  20. package/dist/src/buyer-store.d.ts.map +0 -1
  21. package/dist/src/buyer-store.js.map +0 -1
  22. package/dist/src/clawtip-bootstrap.d.ts.map +0 -1
  23. package/dist/src/clawtip-bootstrap.js.map +0 -1
  24. package/dist/src/cli.d.ts.map +0 -1
  25. package/dist/src/cli.js.map +0 -1
  26. package/dist/src/credit-tracker.d.ts.map +0 -1
  27. package/dist/src/credit-tracker.js.map +0 -1
  28. package/dist/src/daemon.d.ts.map +0 -1
  29. package/dist/src/daemon.js.map +0 -1
  30. package/dist/src/doctor-clawtip-wallet.d.ts.map +0 -1
  31. package/dist/src/doctor-clawtip-wallet.js.map +0 -1
  32. package/dist/src/doctor-diagnostics.d.ts.map +0 -1
  33. package/dist/src/doctor-diagnostics.js.map +0 -1
  34. package/dist/src/index.d.ts.map +0 -1
  35. package/dist/src/index.js.map +0 -1
  36. package/dist/src/init-clawtip-activation.d.ts.map +0 -1
  37. package/dist/src/init-clawtip-activation.js.map +0 -1
  38. package/dist/src/init-payment-options.d.ts.map +0 -1
  39. package/dist/src/init-payment-options.js.map +0 -1
  40. package/dist/src/init-setup.d.ts.map +0 -1
  41. package/dist/src/init-setup.js.map +0 -1
  42. package/dist/src/model-index.d.ts.map +0 -1
  43. package/dist/src/model-index.js.map +0 -1
  44. package/dist/src/package-update.d.ts.map +0 -1
  45. package/dist/src/package-update.js.map +0 -1
  46. package/dist/src/prewarm-cache.d.ts.map +0 -1
  47. package/dist/src/prewarm-cache.js.map +0 -1
  48. package/dist/src/prewarm-scheduler.d.ts.map +0 -1
  49. package/dist/src/prewarm-scheduler.js.map +0 -1
  50. package/dist/src/provider-install.d.ts.map +0 -1
  51. package/dist/src/provider-install.js.map +0 -1
  52. package/dist/src/provider-routing-config.d.ts.map +0 -1
  53. package/dist/src/provider-routing-config.js.map +0 -1
  54. package/dist/src/registry-trust.d.ts.map +0 -1
  55. package/dist/src/registry-trust.js.map +0 -1
  56. package/dist/src/route-failover.d.ts.map +0 -1
  57. package/dist/src/route-failover.js.map +0 -1
  58. package/dist/src/seller-catalog.d.ts.map +0 -1
  59. package/dist/src/seller-catalog.js.map +0 -1
  60. package/dist/src/seller-concurrency-limiter.d.ts.map +0 -1
  61. package/dist/src/seller-concurrency-limiter.js.map +0 -1
  62. package/dist/src/seller-metadata-cache.d.ts.map +0 -1
  63. package/dist/src/seller-metadata-cache.js.map +0 -1
  64. package/dist/src/seller-pool.d.ts.map +0 -1
  65. package/dist/src/seller-pool.js.map +0 -1
  66. package/dist/src/seller-route-planner.d.ts.map +0 -1
  67. package/dist/src/seller-route-planner.js.map +0 -1
  68. package/dist/src/seller-routing-config.d.ts.map +0 -1
  69. package/dist/src/seller-routing-config.js.map +0 -1
  70. package/dist/src/seller-routing-strategy.d.ts.map +0 -1
  71. package/dist/src/seller-routing-strategy.js.map +0 -1
  72. package/dist/src/stream-failover.d.ts.map +0 -1
  73. package/dist/src/stream-failover.js.map +0 -1
  74. package/dist/src/tb-clawtip-proof.d.ts.map +0 -1
  75. package/dist/src/tb-clawtip-proof.js.map +0 -1
  76. package/dist/src/tb-proxyd.d.ts.map +0 -1
  77. package/dist/src/tb-proxyd.js.map +0 -1
  78. package/dist/src/terminal-detect.d.ts.map +0 -1
  79. package/dist/src/terminal-detect.js.map +0 -1
  80. package/dist/src/terminal-image.d.ts.map +0 -1
  81. package/dist/src/terminal-image.js.map +0 -1
  82. package/src/buyer-store.ts +0 -1090
  83. package/src/clawtip-bootstrap.ts +0 -65
  84. package/src/cli.ts +0 -2243
  85. package/src/credit-tracker.ts +0 -295
  86. package/src/daemon.ts +0 -5475
  87. package/src/doctor-clawtip-wallet.ts +0 -95
  88. package/src/doctor-diagnostics.ts +0 -1026
  89. package/src/index.ts +0 -16
  90. package/src/init-clawtip-activation.ts +0 -695
  91. package/src/init-payment-options.ts +0 -373
  92. package/src/init-setup.ts +0 -165
  93. package/src/model-index.ts +0 -278
  94. package/src/package-update.ts +0 -311
  95. package/src/prewarm-cache.ts +0 -485
  96. package/src/prewarm-scheduler.ts +0 -675
  97. package/src/provider-install.ts +0 -1006
  98. package/src/provider-routing-config.ts +0 -410
  99. package/src/registry-trust.ts +0 -51
  100. package/src/route-failover.ts +0 -304
  101. package/src/seller-catalog.ts +0 -505
  102. package/src/seller-concurrency-limiter.ts +0 -161
  103. package/src/seller-metadata-cache.ts +0 -91
  104. package/src/seller-pool.ts +0 -557
  105. package/src/seller-route-planner.ts +0 -513
  106. package/src/seller-routing-config.ts +0 -211
  107. package/src/seller-routing-strategy.ts +0 -362
  108. package/src/stream-failover.ts +0 -152
  109. package/src/tb-clawtip-proof.ts +0 -28
  110. package/src/tb-proxyd.ts +0 -101
  111. package/src/terminal-detect.ts +0 -333
  112. package/src/terminal-image.ts +0 -228
  113. package/static/ui/assets/index-0MVXD7bH.css +0 -1
  114. package/static/ui/assets/index-BVbeDEwq.js +0 -271
  115. package/static/ui/assets/index-BVbeDEwq.js.map +0 -1
  116. package/tests/cli-routing.test.ts +0 -363
  117. package/tests/control-plane-ui-endpoints.test.ts +0 -1630
  118. package/tests/credit-tracker.test.ts +0 -165
  119. package/tests/daemon-413-fallback.test.ts +0 -92
  120. package/tests/daemon-classify.test.ts +0 -452
  121. package/tests/daemon-roles.test.ts +0 -92
  122. package/tests/daemon-trusted-registry-cache.test.ts +0 -132
  123. package/tests/e2e.test.ts +0 -366
  124. package/tests/image-generation-e2e.test.ts +0 -230
  125. package/tests/model-index.test.ts +0 -198
  126. package/tests/package-update.test.ts +0 -147
  127. package/tests/prewarm-cache.test.ts +0 -296
  128. package/tests/prewarm-scheduler.test.ts +0 -367
  129. package/tests/provider-routing-config.test.ts +0 -150
  130. package/tests/registry-trust.test.ts +0 -28
  131. package/tests/route-failover.test.ts +0 -222
  132. package/tests/seller-catalog-413.test.ts +0 -120
  133. package/tests/seller-catalog-utilities.test.ts +0 -124
  134. package/tests/seller-concurrency-limiter.test.ts +0 -83
  135. package/tests/seller-metadata-cache.test.ts +0 -89
  136. package/tests/seller-pool.test.ts +0 -365
  137. package/tests/seller-route-planner.test.ts +0 -312
  138. package/tests/seller-routing-config.test.ts +0 -124
  139. package/tests/seller-routing-strategy.test.ts +0 -167
  140. package/tests/stream-failover.test.ts +0 -52
  141. package/tests/thousand-seller.test.ts +0 -151
  142. package/tests/tokenbuddy.test.ts +0 -4043
  143. package/tsconfig.json +0 -8
@@ -1,91 +0,0 @@
1
- import type { RegistrySeller } from "./seller-catalog.js";
2
- import { fetchSellerManifest } from "./seller-catalog.js";
3
- import type { SellerRouteMetadata } from "./seller-route-planner.js";
4
-
5
- /**
6
- * `SellerMetadataCache` 构造选项。
7
- */
8
- export interface SellerMetadataCacheOptions {
9
- /** 缓存条目有效期(毫秒),默认 10 分钟 */
10
- ttlMs?: number;
11
- /** 时间源,默认 `Date.now`;测试可注入受控时间 */
12
- now?: () => number;
13
- }
14
-
15
- export const DEFAULT_SELLER_METADATA_TTL_MS = 10 * 60 * 1000;
16
-
17
- /**
18
- * seller 路由元数据缓存(`/manifest` 拉取结果)。
19
- * 内部按 seller id 索引、按 TTL 过期;并发刷新合并到同一 `inFlight` Promise,避免重复打 seller。
20
- */
21
- export class SellerMetadataCache {
22
- private readonly ttlMs: number;
23
- private readonly now: () => number;
24
- private readonly entries = new Map<string, SellerRouteMetadata>();
25
- private readonly inFlight = new Map<string, Promise<void>>();
26
-
27
- constructor(options: SellerMetadataCacheOptions = {}) {
28
- this.ttlMs = options.ttlMs ?? DEFAULT_SELLER_METADATA_TTL_MS;
29
- this.now = options.now ?? Date.now;
30
- }
31
-
32
- snapshot(): SellerRouteMetadata[] {
33
- return Array.from(this.entries.values());
34
- }
35
-
36
- refreshIfStale(sellers: RegistrySeller[]): Promise<void> {
37
- const refreshes = sellers
38
- .filter((seller) => this.shouldRefresh(seller.id))
39
- .map((seller) => this.refreshSeller(seller));
40
- return Promise.all(refreshes).then(() => undefined);
41
- }
42
-
43
- private shouldRefresh(sellerId: string): boolean {
44
- const existing = this.entries.get(sellerId);
45
- if (!existing) {
46
- return true;
47
- }
48
- return this.now() - (existing.lastRefreshAt ?? 0) >= this.ttlMs;
49
- }
50
-
51
- private refreshSeller(seller: RegistrySeller): Promise<void> {
52
- const existing = this.inFlight.get(seller.id);
53
- if (existing) {
54
- return existing;
55
- }
56
- const refresh = this.fetchAndStore(seller).finally(() => {
57
- this.inFlight.delete(seller.id);
58
- });
59
- this.inFlight.set(seller.id, refresh);
60
- return refresh;
61
- }
62
-
63
- private async fetchAndStore(seller: RegistrySeller): Promise<void> {
64
- const refreshedAt = this.now();
65
- try {
66
- const manifest = await fetchSellerManifest(seller);
67
- this.entries.set(seller.id, {
68
- sellerId: seller.id,
69
- discountRatio: finiteNumber(manifest.selection?.discountRatio ?? manifest.selection?.discount_ratio),
70
- manifestVersion: stringField(manifest.manifestVersion ?? manifest.manifest_version),
71
- lastRefreshAt: refreshedAt,
72
- source: "manifest_selection"
73
- });
74
- } catch (err) {
75
- this.entries.set(seller.id, {
76
- sellerId: seller.id,
77
- lastRefreshAt: refreshedAt,
78
- source: "manifest_selection",
79
- errorMessage: err instanceof Error ? err.message : String(err)
80
- });
81
- }
82
- }
83
- }
84
-
85
- function finiteNumber(value: unknown): number | undefined {
86
- return typeof value === "number" && Number.isFinite(value) ? value : undefined;
87
- }
88
-
89
- function stringField(value: unknown): string | undefined {
90
- return typeof value === "string" && value.trim().length > 0 ? value : undefined;
91
- }
@@ -1,557 +0,0 @@
1
- import { createModuleLogger } from "@tokenbuddy/logging";
2
- import type { RegistrySeller } from "./seller-catalog.js";
3
- import type { ModelIndex } from "./model-index.js";
4
- import type { PrewarmCache, PrewarmCandidate, PrewarmEntry } from "./prewarm-cache.js";
5
- import type { CreditTracker } from "./credit-tracker.js";
6
-
7
- const logger = createModuleLogger("tb-proxyd:seller-pool");
8
-
9
- /**
10
- * seller 级熔断器状态。
11
- * - `closed`:正常挑选
12
- * - `open`:被踢出候选,等待 `openStateMs` 后降级到 `half_open`
13
- * - `half_open`:放行一次试探,成功则回 `closed`
14
- */
15
- export type CircuitState = "closed" | "half_open" | "open";
16
-
17
- /**
18
- * 路由失败归一化后的错误分类。控制器据此决定切流 / 重试 / 计入 wasted。
19
- * 与 `SellerPool.recordFailure` 的入参对齐。
20
- */
21
- export type FailureKind =
22
- | "hard_4xx" // 400/404/422 — the seller is wrong for this request
23
- | "auth_invalid" // 401/403 token invalid
24
- | "insufficient_funds" // 402
25
- | "busy_capacity" // 429 busy_capacity — seller is temporarily full
26
- | "purchase_failed" // purchase/create or purchase/complete failed
27
- | "soft_5xx" // 429/5xx/timeout/network
28
- | "deadline" // buyer deadline exceeded
29
- | "stream_aborted" // upstream stream broken after first chunk
30
- | "no_compatible"; // pool had no candidates for the request
31
-
32
- /**
33
- * 池里每个 seller 的运行时视图:registry 描述 + 熔断状态 + 健康画像。
34
- * 由 `SellerPool.sync()` 从 prewarm cache 重建;写路径(recordSuccess/recordFailure)会原地更新。
35
- */
36
- export interface PoolEntry {
37
- /** seller 全局 ID */
38
- sellerId: string;
39
- /** seller URL(去尾部斜杠) */
40
- url: string;
41
- /** registry 原始描述(用于 planSellerRouteSet 等下游) */
42
- registrySeller: RegistrySeller;
43
- /** 当前熔断状态 */
44
- circuit: CircuitState;
45
- /** 连续失败次数(达到 `failureThreshold` 立即 open) */
46
- consecutiveFailures: number;
47
- /** 滑动窗口内失败时间戳(毫秒),长度受 `windowMs` 约束 */
48
- recentFailures: number[]; // timestamps (ms) for sliding window
49
- /** 最近一次成功的 unix 毫秒时间戳;0 表示尚无成功 */
50
- lastSuccessAt: number;
51
- /** 最近一次失败的 unix 毫秒时间戳;0 表示尚无失败 */
52
- lastFailAt: number;
53
- /** 最近一次被 probe 的 unix 毫秒时间戳(即 cache.warmedAt) */
54
- lastProbeAt: number;
55
- // Source-of-truth prewarm state; the pool keeps a copy so the hot path
56
- // can answer health questions without touching the cache map on every
57
- // request.
58
- /** 综合健康分 0-100,热路径排序的主键 */
59
- healthScore: number;
60
- /** 平均延迟(毫秒) */
61
- avgLatencyMs: number;
62
- /** health probe 延迟(毫秒),可选 */
63
- healthProbeLatencyMs?: number;
64
- /** TTFT(毫秒),可选 */
65
- ttftMs?: number;
66
- /** 平均推理延迟(毫秒),可选 */
67
- avgInferenceMs?: number;
68
- /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
69
- avgTokensPerSecond?: number;
70
- /** 最近一次 runtime speed 指标观测时间;用于避免旧 prewarm 覆盖 live inference 指标 */
71
- runtimeMetricsObservedAt?: number;
72
- /** 上游状态,可选 */
73
- upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
74
- /** 上游错误类名,可选 */
75
- upstreamErrorClass?: string;
76
- /** 临时容量避让截止时间;大于当前时间时不参与路由 */
77
- capacityBlockedUntil?: number;
78
- }
79
-
80
- export interface SellerRuntimeMetricsUpdate {
81
- /** TTFT(毫秒),可选 */
82
- ttftMs?: number;
83
- /** 平均推理延迟(毫秒),可选 */
84
- avgInferenceMs?: number;
85
- /** 输出吞吐(tokens/s),可选 */
86
- avgTokensPerSecond?: number;
87
- }
88
-
89
- /**
90
- * `SellerPool.pick()` 的入参:标识一次路由请求 + 可选的时间/数量约束。
91
- */
92
- export interface PickOptions {
93
- /** 目标模型 ID(已归一化或未归一化都可,pool 内部会归一化) */
94
- modelId: string;
95
- /** 目标协议 */
96
- protocol: string;
97
- /** 目标支付方式 */
98
- paymentMethod: string;
99
- /** 最多返回几个候选,默认 4 */
100
- limit?: number;
101
- /** 覆盖时钟(测试用) */
102
- now?: number;
103
- }
104
-
105
- /**
106
- * `SellerPool.pick()` 的返回:候选列表 + 决策原因 + 底层 model index 解析结果。
107
- * `reason` 用于日志和 doctor 区分"无缓存""有缓存但全 open""正常"等情况。
108
- */
109
- export interface PickResult {
110
- /** 已按 healthScore 排序的候选(不含 open 电路的 seller) */
111
- candidates: Array<{ entry: PoolEntry; registrySeller: RegistrySeller }>;
112
- /** 决策原因,例:`prewarm_cache`、`prewarm_cache_empty`、`no_prewarm_candidates` */
113
- reason: string;
114
- /** 底层 `ModelIndex.resolve()` 的结果(保留给上游做诊断) */
115
- resolved: ModelIndexResolution;
116
- }
117
-
118
- /**
119
- * `SellerPool` 视角的 model-index 解析快照,与 `model-index.ts` 里的同名类型语义一致;
120
- * 在 pick 路径上做轻量拷贝,避免循环依赖和暴露 `sellers` vs `candidates` 命名差异。
121
- */
122
- export interface ModelIndexResolution {
123
- /** 解析时使用的模型 ID(可能未归一化) */
124
- modelId: string;
125
- /** 索引里是否至少有一个 seller 命中 */
126
- matched: boolean;
127
- /** 命中的 seller 列表(按 default + 声明顺序) */
128
- candidates: RegistrySeller[];
129
- /** 当前索引里 `models` 字段缺失的 seller 数(诊断用) */
130
- missingModelsFlag: number;
131
- }
132
-
133
- /**
134
- * 构造 `SellerPool` 所需的依赖与可调参数。默认值见 `DEFAULTS`:
135
- * 失败 3 次 open、滑动窗口 60s、失败率阈值 0.5、open 态 30s。
136
- */
137
- export interface SellerPoolOptions {
138
- /** 共享的 model index */
139
- modelIndex: ModelIndex;
140
- /** 共享的 prewarm cache(pool 的真相源) */
141
- cache: PrewarmCache;
142
- /** 共享的 credit tracker,wasted / auto-purchase 决策都依赖 */
143
- creditTracker: CreditTracker;
144
- // Circuit breaker thresholds (v1.2 §13).
145
- /** 连续失败次数阈值,到达后立即 open,默认 3 */
146
- failureThreshold?: number; // default 3
147
- /** 滑动窗口长度(毫秒),默认 60000 */
148
- windowMs?: number; // default 60_000 (1m sliding window)
149
- /** 滑动窗口内失败率阈值(次/秒),默认 0.5 */
150
- windowFailureRate?: number; // default 0.5
151
- /** open 态保持时间(毫秒),过期后降级 half_open,默认 30000 */
152
- openStateMs?: number; // default 30_000
153
- /** `busy_capacity` 的短期避让时间,默认 2000ms */
154
- capacityBlockMs?: number;
155
- /** 注入时钟(测试用),默认 `Date.now` */
156
- now?: () => number;
157
- // PoolEntry -> CircuitState transition hooks for tests.
158
- /** 测试钩子:在 sync 后对 entry 列表做额外处理 */
159
- applyRegistry?: (entries: PoolEntry[], registry: RegistrySeller[]) => PoolEntry[];
160
- }
161
-
162
- const DEFAULTS = {
163
- failureThreshold: 3,
164
- windowMs: 60_000,
165
- windowFailureRate: 0.5,
166
- openStateMs: 30_000,
167
- capacityBlockMs: 2_000
168
- };
169
-
170
- /**
171
- * v2 SellerPool: combines `ModelIndex` (registry index), `PrewarmCache`
172
- * (probe results), and `CreditTracker` (balance protection) into a single
173
- * source of truth used by the route-failover controller. The pool is
174
- * process-local and rebuilds its entry list from the cache whenever the
175
- * cache mutates; entries not yet present in the cache are not in the pool.
176
- */
177
- export class SellerPool {
178
- private readonly modelIndex: ModelIndex;
179
- private readonly cache: PrewarmCache;
180
- private readonly creditTracker: CreditTracker;
181
- private readonly failureThreshold: number;
182
- private readonly windowMs: number;
183
- private readonly windowFailureRate: number;
184
- private readonly openStateMs: number;
185
- private readonly capacityBlockMs: number;
186
- private readonly now: () => number;
187
-
188
- private entries = new Map<string, PoolEntry>();
189
-
190
- constructor(options: SellerPoolOptions) {
191
- this.modelIndex = options.modelIndex;
192
- this.cache = options.cache;
193
- this.creditTracker = options.creditTracker;
194
- this.failureThreshold = options.failureThreshold ?? DEFAULTS.failureThreshold;
195
- this.windowMs = options.windowMs ?? DEFAULTS.windowMs;
196
- this.windowFailureRate = options.windowFailureRate ?? DEFAULTS.windowFailureRate;
197
- this.openStateMs = options.openStateMs ?? DEFAULTS.openStateMs;
198
- this.capacityBlockMs = options.capacityBlockMs ?? DEFAULTS.capacityBlockMs;
199
- this.now = options.now ?? Date.now;
200
- }
201
-
202
- /**
203
- * Rebuild entries from the current prewarm cache. Called by
204
- * `route-failover` whenever the cache is mutated (commit, invalidate,
205
- * etc.) so the pool always reflects the latest probe results.
206
- */
207
- sync(): number {
208
- const fresh = new Map<string, PoolEntry>(this.entries);
209
- for (const entry of this.cache.snapshot()) {
210
- for (const candidate of entry.candidates) {
211
- const registry = this.modelIndex.getSeller(candidate.sellerId);
212
- if (!registry) {
213
- // Seller disappeared from the registry since the probe; skip.
214
- continue;
215
- }
216
- const previous = this.entries.get(candidate.sellerId);
217
- fresh.set(candidate.sellerId, {
218
- sellerId: candidate.sellerId,
219
- url: candidate.url,
220
- registrySeller: registry,
221
- circuit: previous?.circuit ?? "closed",
222
- consecutiveFailures: previous?.consecutiveFailures ?? 0,
223
- recentFailures: previous?.recentFailures ?? [],
224
- lastSuccessAt: candidate.lastSuccessAt || previous?.lastSuccessAt || 0,
225
- lastFailAt: candidate.lastFailAt || previous?.lastFailAt || 0,
226
- lastProbeAt: entry.warmedAt,
227
- healthScore: candidate.healthScore,
228
- avgLatencyMs: candidate.avgLatencyMs,
229
- healthProbeLatencyMs: candidate.healthProbeLatencyMs,
230
- ttftMs: preferRuntimeMetric(candidate.ttftMs, candidate.lastSuccessAt, previous?.ttftMs, previous?.runtimeMetricsObservedAt),
231
- avgInferenceMs: preferRuntimeMetric(candidate.avgInferenceMs, candidate.lastSuccessAt, previous?.avgInferenceMs, previous?.runtimeMetricsObservedAt),
232
- avgTokensPerSecond: preferRuntimeMetric(candidate.avgTokensPerSecond, candidate.lastSuccessAt, previous?.avgTokensPerSecond, previous?.runtimeMetricsObservedAt),
233
- runtimeMetricsObservedAt: Math.max(previous?.runtimeMetricsObservedAt ?? 0, candidate.lastSuccessAt || 0) || undefined,
234
- upstreamStatus: candidate.upstreamStatus,
235
- upstreamErrorClass: candidate.upstreamErrorClass,
236
- capacityBlockedUntil: candidate.capacityBlockedUntil ?? previous?.capacityBlockedUntil
237
- });
238
- }
239
- }
240
- for (const sellerId of fresh.keys()) {
241
- if (!this.modelIndex.getSeller(sellerId)) {
242
- fresh.delete(sellerId);
243
- }
244
- }
245
- this.entries = fresh;
246
- return this.entries.size;
247
- }
248
-
249
- /**
250
- * Ensure registry-fallback candidates also have runtime state. A seller
251
- * may be selected before prewarm has produced a cache entry; failures
252
- * from that first live request still need to affect the next route plan.
253
- */
254
- ensureRegistrySellers(sellers: RegistrySeller[], now: number = this.now()): void {
255
- for (const seller of sellers) {
256
- const previous = this.entries.get(seller.id);
257
- if (previous) {
258
- this.entries.set(seller.id, {
259
- ...previous,
260
- registrySeller: seller,
261
- url: seller.url.replace(/\/+$/, "")
262
- });
263
- continue;
264
- }
265
- this.entries.set(seller.id, {
266
- sellerId: seller.id,
267
- url: seller.url.replace(/\/+$/, ""),
268
- registrySeller: seller,
269
- circuit: "closed",
270
- consecutiveFailures: 0,
271
- recentFailures: [],
272
- lastSuccessAt: 0,
273
- lastFailAt: 0,
274
- lastProbeAt: now,
275
- healthScore: 60,
276
- avgLatencyMs: 0
277
- });
278
- }
279
- }
280
-
281
- /**
282
- * Pick up to `limit` candidates for a (model, protocol, payment) triple.
283
- * Sellers in the `open` circuit are skipped unless their open state has
284
- * expired (they are flipped to `half_open` and included). Candidates are
285
- * sorted by health score (descending) so the strongest seller goes first.
286
- */
287
- pick(options: PickOptions): PickResult {
288
- const now = options.now ?? this.now();
289
- const limit = options.limit ?? 4;
290
- const freshness = this.cache.freshness(options.modelId, options.protocol, options.paymentMethod);
291
- const resolved = this.modelIndex.resolve(options.modelId, {
292
- protocol: options.protocol,
293
- paymentMethod: options.paymentMethod
294
- });
295
-
296
- if (freshness.entry && freshness.entry.candidates.length === 0) {
297
- return {
298
- candidates: [],
299
- reason: "prewarm_cache_empty",
300
- resolved: asResolution(resolved)
301
- };
302
- }
303
-
304
- const candidates = (freshness.entry?.candidates ?? [])
305
- .map((candidate) => {
306
- const entry = this.entries.get(candidate.sellerId);
307
- if (!entry) {
308
- return null;
309
- }
310
- return { entry, registrySeller: entry.registrySeller, candidate };
311
- })
312
- .filter((row): row is { entry: PoolEntry; registrySeller: RegistrySeller; candidate: PrewarmCandidate } => row !== null)
313
- .map((row) => {
314
- const entry = this.maybeRecycleFromOpen(row.entry, now);
315
- return { entry, registrySeller: row.registrySeller };
316
- })
317
- .filter((row) => row.entry.circuit !== "open")
318
- .filter((row) => !isCapacityBlocked(row.entry, now))
319
- .sort((a, b) => b.entry.healthScore - a.entry.healthScore)
320
- .slice(0, limit);
321
-
322
- return {
323
- candidates,
324
- reason: candidates.length > 0 ? "prewarm_cache" : "no_prewarm_candidates",
325
- resolved: asResolution(resolved)
326
- };
327
- }
328
-
329
- /**
330
- * Record a successful inference against `sellerId`. The circuit closes
331
- * (if it was half-open) and the credit tracker observes the latest
332
- * balance via `recordSpend`.
333
- */
334
- recordSuccess(sellerId: string, balanceMicros: number, now: number = this.now()): PoolEntry | undefined {
335
- const entry = this.entries.get(sellerId);
336
- if (!entry) {
337
- return undefined;
338
- }
339
- const next: PoolEntry = {
340
- ...entry,
341
- circuit: "closed",
342
- consecutiveFailures: 0,
343
- recentFailures: [],
344
- lastSuccessAt: now,
345
- healthScore: Math.min(100, Math.max(entry.healthScore, 60)),
346
- capacityBlockedUntil: undefined
347
- };
348
- this.entries.set(sellerId, next);
349
- this.creditTracker.recordSpend(sellerId, balanceMicros);
350
- logger.info("pool.success.recorded", "seller pool entry marked successful", {
351
- sellerId,
352
- balanceMicros,
353
- healthScore: next.healthScore
354
- });
355
- return next;
356
- }
357
-
358
- recordRuntimeMetrics(
359
- sellerId: string,
360
- metrics: SellerRuntimeMetricsUpdate,
361
- now: number = this.now()
362
- ): PoolEntry | undefined {
363
- const entry = this.entries.get(sellerId);
364
- if (!entry) {
365
- return undefined;
366
- }
367
- const ttftMs = finiteNonNegative(metrics.ttftMs);
368
- const avgInferenceMs = finiteNonNegative(metrics.avgInferenceMs);
369
- const avgTokensPerSecond = finiteNonNegative(metrics.avgTokensPerSecond);
370
- const next: PoolEntry = {
371
- ...entry,
372
- lastSuccessAt: now,
373
- healthScore: Math.min(100, Math.max(entry.healthScore, 60)),
374
- avgLatencyMs: avgInferenceMs ?? entry.avgLatencyMs,
375
- ttftMs: ttftMs ?? entry.ttftMs,
376
- avgInferenceMs: avgInferenceMs ?? entry.avgInferenceMs,
377
- avgTokensPerSecond: avgTokensPerSecond ?? entry.avgTokensPerSecond,
378
- runtimeMetricsObservedAt: Math.max(entry.runtimeMetricsObservedAt ?? 0, now)
379
- };
380
- this.entries.set(sellerId, next);
381
- logger.info("pool.runtime_metrics.recorded", "seller pool runtime metrics updated", {
382
- sellerId,
383
- ttftMs: next.ttftMs,
384
- avgInferenceMs: next.avgInferenceMs,
385
- avgTokensPerSecond: next.avgTokensPerSecond
386
- });
387
- return next;
388
- }
389
-
390
- /**
391
- * Record a failure against `sellerId`. Returns the new PoolEntry. The
392
- * caller (route-failover) uses the returned `entry.circuit` and the
393
- * entry's `lastFailAt` to decide whether to fail over, retry, or stop.
394
- * On a non-recoverable failure (`hard_4xx`, `auth_invalid`,
395
- * `insufficient_funds`) the credit is also transferred to the wasted
396
- * bucket so the wasted-micros counter stays accurate.
397
- */
398
- recordFailure(
399
- sellerId: string,
400
- kind: FailureKind,
401
- options: { transferLeftover?: boolean; reason?: string; now?: number } = {}
402
- ): PoolEntry | undefined {
403
- const entry = this.entries.get(sellerId);
404
- if (!entry) {
405
- return undefined;
406
- }
407
- const now = options.now ?? this.now();
408
- const isBusyCapacity = kind === "busy_capacity";
409
- const recentFailures = (
410
- isBusyCapacity ? entry.recentFailures : [...entry.recentFailures, now]
411
- ).filter((ts) => ts >= now - this.windowMs);
412
- const consecutiveFailures = isBusyCapacity ? entry.consecutiveFailures : entry.consecutiveFailures + 1;
413
- const failureRate = recentFailures.length / Math.max(1, this.windowMs / 1000);
414
- const overThreshold = consecutiveFailures >= this.failureThreshold;
415
- const overRate = failureRate >= this.windowFailureRate;
416
- const isHard = kind === "hard_4xx" || kind === "auth_invalid" || kind === "no_compatible";
417
- const circuit: CircuitState = isHard || overThreshold || overRate ? "open" : entry.circuit;
418
- const next: PoolEntry = {
419
- ...entry,
420
- circuit,
421
- consecutiveFailures,
422
- recentFailures,
423
- lastFailAt: now,
424
- capacityBlockedUntil: isBusyCapacity ? now + this.capacityBlockMs : entry.capacityBlockedUntil
425
- };
426
- this.entries.set(sellerId, next);
427
- if (options.transferLeftover || isHard) {
428
- this.creditTracker.transferLeftoverToWasted(sellerId, options.reason ?? kind);
429
- }
430
- if (circuit === "open") {
431
- logger.warn("pool.circuit_opened", "seller pool entry transitioned to circuit_open", {
432
- sellerId,
433
- kind,
434
- consecutiveFailures,
435
- recentFailureRate: failureRate,
436
- threshold: this.failureThreshold
437
- });
438
- } else if (isBusyCapacity) {
439
- logger.warn("pool.capacity_blocked", "seller pool entry temporarily blocked by busy capacity", {
440
- sellerId,
441
- capacityBlockMs: this.capacityBlockMs,
442
- blockedUntil: next.capacityBlockedUntil
443
- });
444
- }
445
- return next;
446
- }
447
-
448
- /**
449
- * Expose a per-seller credit / circuit snapshot to the route-failover.
450
- * Used to decide whether a soft failure should retry on the same seller
451
- * (刚买窗口保护) or fail over immediately.
452
- */
453
- inspect(sellerId: string): { entry?: PoolEntry; freshPurchase: boolean; autoPurchaseAvailable: boolean } {
454
- const entry = this.entries.get(sellerId);
455
- const freshPurchase = this.creditTracker.isInFreshPurchaseWindow(sellerId, this.now());
456
- const autoPurchaseAvailable = this.creditTracker.canAutoPurchase(this.now());
457
- return { entry, freshPurchase, autoPurchaseAvailable };
458
- }
459
-
460
- /**
461
- * Recycle expired open circuits before route planning paths that consume
462
- * `snapshot()` directly. This keeps the `open -> half_open` recovery path
463
- * active even when the newer route planner is used instead of `pick()`.
464
- */
465
- recycleOpenCircuits(now: number = this.now()): number {
466
- let recycled = 0;
467
- for (const entry of this.entries.values()) {
468
- if (entry.circuit !== "open") {
469
- continue;
470
- }
471
- const next = this.maybeRecycleFromOpen(entry, now);
472
- if (next.circuit === "half_open") {
473
- recycled += 1;
474
- }
475
- }
476
- return recycled;
477
- }
478
-
479
- /**
480
- * Manually mark an entry as `open`. Used by the registry loop when a
481
- * seller is removed from the registry: the entry lingers for a grace
482
- * period but is unreachable, so opening the circuit prevents any
483
- * further selection.
484
- */
485
- markOpen(sellerId: string, reason: string, now: number = this.now()): void {
486
- const entry = this.entries.get(sellerId);
487
- if (!entry) {
488
- return;
489
- }
490
- this.entries.set(sellerId, { ...entry, circuit: "open", lastFailAt: now });
491
- logger.warn("pool.circuit_force_opened", "seller pool entry forced to circuit_open", {
492
- sellerId,
493
- reason
494
- });
495
- }
496
-
497
- /**
498
- * List all known pool entries. Used by `tb doctor` and tests.
499
- */
500
- snapshot(): PoolEntry[] {
501
- return Array.from(this.entries.values()).map((entry) => ({ ...entry, recentFailures: [...entry.recentFailures] }));
502
- }
503
-
504
- size(): number {
505
- return this.entries.size;
506
- }
507
-
508
- private maybeRecycleFromOpen(entry: PoolEntry, now: number): PoolEntry {
509
- if (entry.circuit !== "open") {
510
- return entry;
511
- }
512
- if (now - entry.lastFailAt < this.openStateMs) {
513
- return entry;
514
- }
515
- const recycled: PoolEntry = { ...entry, circuit: "half_open" };
516
- this.entries.set(entry.sellerId, recycled);
517
- logger.info("pool.circuit_half_opened", "seller pool entry recycled to half_open", {
518
- sellerId: entry.sellerId,
519
- openStateMs: this.openStateMs
520
- });
521
- return recycled;
522
- }
523
- }
524
-
525
- function finiteNonNegative(value: number | undefined): number | undefined {
526
- return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
527
- }
528
-
529
- function preferRuntimeMetric(
530
- prewarmValue: number | undefined,
531
- prewarmObservedAt: number | undefined,
532
- previousValue: number | undefined,
533
- previousObservedAt: number | undefined
534
- ): number | undefined {
535
- if (prewarmValue === undefined) {
536
- return previousValue;
537
- }
538
- if (previousValue !== undefined && (previousObservedAt ?? 0) > (prewarmObservedAt ?? 0)) {
539
- return previousValue;
540
- }
541
- return prewarmValue;
542
- }
543
-
544
- function isCapacityBlocked(entry: PoolEntry, now: number): boolean {
545
- return Number.isFinite(entry.capacityBlockedUntil) && (entry.capacityBlockedUntil as number) > now;
546
- }
547
-
548
- function asResolution(resolved: { modelId: string; matched: boolean; sellers: RegistrySeller[]; missingModelsFlag: number }): ModelIndexResolution {
549
- return {
550
- modelId: resolved.modelId,
551
- matched: resolved.matched,
552
- candidates: resolved.sellers,
553
- missingModelsFlag: resolved.missingModelsFlag
554
- };
555
- }
556
-
557
- export type { PrewarmEntry };