@tokenbuddy/tokenbuddy 1.0.35 → 1.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +6 -1
- package/dist/src/buyer-store.js +43 -4
- package/dist/src/cli.js +2 -2
- package/dist/src/daemon.d.ts +12 -0
- package/dist/src/daemon.js +791 -61
- package/dist/src/doctor-diagnostics.js +1 -6
- package/dist/src/provider-install.d.ts +2 -2
- package/dist/src/provider-install.js +248 -2
- package/dist/src/seller-catalog.d.ts +21 -0
- package/dist/src/seller-catalog.js +17 -0
- package/dist/src/seller-route-planner.d.ts +4 -1
- package/dist/src/seller-route-planner.js +3 -0
- package/dist/src/seller-routing-strategy.d.ts +3 -0
- package/dist/src/terminal-detect.d.ts +1 -1
- package/dist/src/terminal-detect.js +3 -2
- package/package.json +15 -2
- package/static/ui/assets/index-Djfl9tw5.js +271 -0
- package/static/ui/assets/index-DkfztCkn.css +1 -0
- package/static/ui/index.html +2 -2
- package/dist/src/buyer-store.d.ts.map +0 -1
- package/dist/src/buyer-store.js.map +0 -1
- package/dist/src/clawtip-bootstrap.d.ts.map +0 -1
- package/dist/src/clawtip-bootstrap.js.map +0 -1
- package/dist/src/cli.d.ts.map +0 -1
- package/dist/src/cli.js.map +0 -1
- package/dist/src/credit-tracker.d.ts.map +0 -1
- package/dist/src/credit-tracker.js.map +0 -1
- package/dist/src/daemon.d.ts.map +0 -1
- package/dist/src/daemon.js.map +0 -1
- package/dist/src/doctor-clawtip-wallet.d.ts.map +0 -1
- package/dist/src/doctor-clawtip-wallet.js.map +0 -1
- package/dist/src/doctor-diagnostics.d.ts.map +0 -1
- package/dist/src/doctor-diagnostics.js.map +0 -1
- package/dist/src/index.d.ts.map +0 -1
- package/dist/src/index.js.map +0 -1
- package/dist/src/init-clawtip-activation.d.ts.map +0 -1
- package/dist/src/init-clawtip-activation.js.map +0 -1
- package/dist/src/init-payment-options.d.ts.map +0 -1
- package/dist/src/init-payment-options.js.map +0 -1
- package/dist/src/init-setup.d.ts.map +0 -1
- package/dist/src/init-setup.js.map +0 -1
- package/dist/src/model-index.d.ts.map +0 -1
- package/dist/src/model-index.js.map +0 -1
- package/dist/src/package-update.d.ts.map +0 -1
- package/dist/src/package-update.js.map +0 -1
- package/dist/src/prewarm-cache.d.ts.map +0 -1
- package/dist/src/prewarm-cache.js.map +0 -1
- package/dist/src/prewarm-scheduler.d.ts.map +0 -1
- package/dist/src/prewarm-scheduler.js.map +0 -1
- package/dist/src/provider-install.d.ts.map +0 -1
- package/dist/src/provider-install.js.map +0 -1
- package/dist/src/provider-routing-config.d.ts.map +0 -1
- package/dist/src/provider-routing-config.js.map +0 -1
- package/dist/src/registry-trust.d.ts.map +0 -1
- package/dist/src/registry-trust.js.map +0 -1
- package/dist/src/route-failover.d.ts.map +0 -1
- package/dist/src/route-failover.js.map +0 -1
- package/dist/src/seller-catalog.d.ts.map +0 -1
- package/dist/src/seller-catalog.js.map +0 -1
- package/dist/src/seller-concurrency-limiter.d.ts.map +0 -1
- package/dist/src/seller-concurrency-limiter.js.map +0 -1
- package/dist/src/seller-metadata-cache.d.ts.map +0 -1
- package/dist/src/seller-metadata-cache.js.map +0 -1
- package/dist/src/seller-pool.d.ts.map +0 -1
- package/dist/src/seller-pool.js.map +0 -1
- package/dist/src/seller-route-planner.d.ts.map +0 -1
- package/dist/src/seller-route-planner.js.map +0 -1
- package/dist/src/seller-routing-config.d.ts.map +0 -1
- package/dist/src/seller-routing-config.js.map +0 -1
- package/dist/src/seller-routing-strategy.d.ts.map +0 -1
- package/dist/src/seller-routing-strategy.js.map +0 -1
- package/dist/src/stream-failover.d.ts.map +0 -1
- package/dist/src/stream-failover.js.map +0 -1
- package/dist/src/tb-clawtip-proof.d.ts.map +0 -1
- package/dist/src/tb-clawtip-proof.js.map +0 -1
- package/dist/src/tb-proxyd.d.ts.map +0 -1
- package/dist/src/tb-proxyd.js.map +0 -1
- package/dist/src/terminal-detect.d.ts.map +0 -1
- package/dist/src/terminal-detect.js.map +0 -1
- package/dist/src/terminal-image.d.ts.map +0 -1
- package/dist/src/terminal-image.js.map +0 -1
- package/src/buyer-store.ts +0 -1090
- package/src/clawtip-bootstrap.ts +0 -65
- package/src/cli.ts +0 -2243
- package/src/credit-tracker.ts +0 -295
- package/src/daemon.ts +0 -5475
- package/src/doctor-clawtip-wallet.ts +0 -95
- package/src/doctor-diagnostics.ts +0 -1026
- package/src/index.ts +0 -16
- package/src/init-clawtip-activation.ts +0 -695
- package/src/init-payment-options.ts +0 -373
- package/src/init-setup.ts +0 -165
- package/src/model-index.ts +0 -278
- package/src/package-update.ts +0 -311
- package/src/prewarm-cache.ts +0 -485
- package/src/prewarm-scheduler.ts +0 -675
- package/src/provider-install.ts +0 -1006
- package/src/provider-routing-config.ts +0 -410
- package/src/registry-trust.ts +0 -51
- package/src/route-failover.ts +0 -304
- package/src/seller-catalog.ts +0 -505
- package/src/seller-concurrency-limiter.ts +0 -161
- package/src/seller-metadata-cache.ts +0 -91
- package/src/seller-pool.ts +0 -557
- package/src/seller-route-planner.ts +0 -513
- package/src/seller-routing-config.ts +0 -211
- package/src/seller-routing-strategy.ts +0 -362
- package/src/stream-failover.ts +0 -152
- package/src/tb-clawtip-proof.ts +0 -28
- package/src/tb-proxyd.ts +0 -101
- package/src/terminal-detect.ts +0 -333
- package/src/terminal-image.ts +0 -228
- package/static/ui/assets/index-0MVXD7bH.css +0 -1
- package/static/ui/assets/index-BVbeDEwq.js +0 -271
- package/static/ui/assets/index-BVbeDEwq.js.map +0 -1
- package/tests/cli-routing.test.ts +0 -363
- package/tests/control-plane-ui-endpoints.test.ts +0 -1630
- package/tests/credit-tracker.test.ts +0 -165
- package/tests/daemon-413-fallback.test.ts +0 -92
- package/tests/daemon-classify.test.ts +0 -452
- package/tests/daemon-roles.test.ts +0 -92
- package/tests/daemon-trusted-registry-cache.test.ts +0 -132
- package/tests/e2e.test.ts +0 -366
- package/tests/image-generation-e2e.test.ts +0 -230
- package/tests/model-index.test.ts +0 -198
- package/tests/package-update.test.ts +0 -147
- package/tests/prewarm-cache.test.ts +0 -296
- package/tests/prewarm-scheduler.test.ts +0 -367
- package/tests/provider-routing-config.test.ts +0 -150
- package/tests/registry-trust.test.ts +0 -28
- package/tests/route-failover.test.ts +0 -222
- package/tests/seller-catalog-413.test.ts +0 -120
- package/tests/seller-catalog-utilities.test.ts +0 -124
- package/tests/seller-concurrency-limiter.test.ts +0 -83
- package/tests/seller-metadata-cache.test.ts +0 -89
- package/tests/seller-pool.test.ts +0 -365
- package/tests/seller-route-planner.test.ts +0 -312
- package/tests/seller-routing-config.test.ts +0 -124
- package/tests/seller-routing-strategy.test.ts +0 -167
- package/tests/stream-failover.test.ts +0 -52
- package/tests/thousand-seller.test.ts +0 -151
- package/tests/tokenbuddy.test.ts +0 -4043
- package/tsconfig.json +0 -8
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
import type { RegistrySeller } from "./seller-catalog.js";
|
|
2
|
-
import { fetchSellerManifest } from "./seller-catalog.js";
|
|
3
|
-
import type { SellerRouteMetadata } from "./seller-route-planner.js";
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* `SellerMetadataCache` 构造选项。
|
|
7
|
-
*/
|
|
8
|
-
export interface SellerMetadataCacheOptions {
|
|
9
|
-
/** 缓存条目有效期(毫秒),默认 10 分钟 */
|
|
10
|
-
ttlMs?: number;
|
|
11
|
-
/** 时间源,默认 `Date.now`;测试可注入受控时间 */
|
|
12
|
-
now?: () => number;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export const DEFAULT_SELLER_METADATA_TTL_MS = 10 * 60 * 1000;
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* seller 路由元数据缓存(`/manifest` 拉取结果)。
|
|
19
|
-
* 内部按 seller id 索引、按 TTL 过期;并发刷新合并到同一 `inFlight` Promise,避免重复打 seller。
|
|
20
|
-
*/
|
|
21
|
-
export class SellerMetadataCache {
|
|
22
|
-
private readonly ttlMs: number;
|
|
23
|
-
private readonly now: () => number;
|
|
24
|
-
private readonly entries = new Map<string, SellerRouteMetadata>();
|
|
25
|
-
private readonly inFlight = new Map<string, Promise<void>>();
|
|
26
|
-
|
|
27
|
-
constructor(options: SellerMetadataCacheOptions = {}) {
|
|
28
|
-
this.ttlMs = options.ttlMs ?? DEFAULT_SELLER_METADATA_TTL_MS;
|
|
29
|
-
this.now = options.now ?? Date.now;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
snapshot(): SellerRouteMetadata[] {
|
|
33
|
-
return Array.from(this.entries.values());
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
refreshIfStale(sellers: RegistrySeller[]): Promise<void> {
|
|
37
|
-
const refreshes = sellers
|
|
38
|
-
.filter((seller) => this.shouldRefresh(seller.id))
|
|
39
|
-
.map((seller) => this.refreshSeller(seller));
|
|
40
|
-
return Promise.all(refreshes).then(() => undefined);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
private shouldRefresh(sellerId: string): boolean {
|
|
44
|
-
const existing = this.entries.get(sellerId);
|
|
45
|
-
if (!existing) {
|
|
46
|
-
return true;
|
|
47
|
-
}
|
|
48
|
-
return this.now() - (existing.lastRefreshAt ?? 0) >= this.ttlMs;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
private refreshSeller(seller: RegistrySeller): Promise<void> {
|
|
52
|
-
const existing = this.inFlight.get(seller.id);
|
|
53
|
-
if (existing) {
|
|
54
|
-
return existing;
|
|
55
|
-
}
|
|
56
|
-
const refresh = this.fetchAndStore(seller).finally(() => {
|
|
57
|
-
this.inFlight.delete(seller.id);
|
|
58
|
-
});
|
|
59
|
-
this.inFlight.set(seller.id, refresh);
|
|
60
|
-
return refresh;
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
private async fetchAndStore(seller: RegistrySeller): Promise<void> {
|
|
64
|
-
const refreshedAt = this.now();
|
|
65
|
-
try {
|
|
66
|
-
const manifest = await fetchSellerManifest(seller);
|
|
67
|
-
this.entries.set(seller.id, {
|
|
68
|
-
sellerId: seller.id,
|
|
69
|
-
discountRatio: finiteNumber(manifest.selection?.discountRatio ?? manifest.selection?.discount_ratio),
|
|
70
|
-
manifestVersion: stringField(manifest.manifestVersion ?? manifest.manifest_version),
|
|
71
|
-
lastRefreshAt: refreshedAt,
|
|
72
|
-
source: "manifest_selection"
|
|
73
|
-
});
|
|
74
|
-
} catch (err) {
|
|
75
|
-
this.entries.set(seller.id, {
|
|
76
|
-
sellerId: seller.id,
|
|
77
|
-
lastRefreshAt: refreshedAt,
|
|
78
|
-
source: "manifest_selection",
|
|
79
|
-
errorMessage: err instanceof Error ? err.message : String(err)
|
|
80
|
-
});
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
function finiteNumber(value: unknown): number | undefined {
|
|
86
|
-
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
function stringField(value: unknown): string | undefined {
|
|
90
|
-
return typeof value === "string" && value.trim().length > 0 ? value : undefined;
|
|
91
|
-
}
|
package/src/seller-pool.ts
DELETED
|
@@ -1,557 +0,0 @@
|
|
|
1
|
-
import { createModuleLogger } from "@tokenbuddy/logging";
|
|
2
|
-
import type { RegistrySeller } from "./seller-catalog.js";
|
|
3
|
-
import type { ModelIndex } from "./model-index.js";
|
|
4
|
-
import type { PrewarmCache, PrewarmCandidate, PrewarmEntry } from "./prewarm-cache.js";
|
|
5
|
-
import type { CreditTracker } from "./credit-tracker.js";
|
|
6
|
-
|
|
7
|
-
const logger = createModuleLogger("tb-proxyd:seller-pool");
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
* seller 级熔断器状态。
|
|
11
|
-
* - `closed`:正常挑选
|
|
12
|
-
* - `open`:被踢出候选,等待 `openStateMs` 后降级到 `half_open`
|
|
13
|
-
* - `half_open`:放行一次试探,成功则回 `closed`
|
|
14
|
-
*/
|
|
15
|
-
export type CircuitState = "closed" | "half_open" | "open";
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* 路由失败归一化后的错误分类。控制器据此决定切流 / 重试 / 计入 wasted。
|
|
19
|
-
* 与 `SellerPool.recordFailure` 的入参对齐。
|
|
20
|
-
*/
|
|
21
|
-
export type FailureKind =
|
|
22
|
-
| "hard_4xx" // 400/404/422 — the seller is wrong for this request
|
|
23
|
-
| "auth_invalid" // 401/403 token invalid
|
|
24
|
-
| "insufficient_funds" // 402
|
|
25
|
-
| "busy_capacity" // 429 busy_capacity — seller is temporarily full
|
|
26
|
-
| "purchase_failed" // purchase/create or purchase/complete failed
|
|
27
|
-
| "soft_5xx" // 429/5xx/timeout/network
|
|
28
|
-
| "deadline" // buyer deadline exceeded
|
|
29
|
-
| "stream_aborted" // upstream stream broken after first chunk
|
|
30
|
-
| "no_compatible"; // pool had no candidates for the request
|
|
31
|
-
|
|
32
|
-
/**
|
|
33
|
-
* 池里每个 seller 的运行时视图:registry 描述 + 熔断状态 + 健康画像。
|
|
34
|
-
* 由 `SellerPool.sync()` 从 prewarm cache 重建;写路径(recordSuccess/recordFailure)会原地更新。
|
|
35
|
-
*/
|
|
36
|
-
export interface PoolEntry {
|
|
37
|
-
/** seller 全局 ID */
|
|
38
|
-
sellerId: string;
|
|
39
|
-
/** seller URL(去尾部斜杠) */
|
|
40
|
-
url: string;
|
|
41
|
-
/** registry 原始描述(用于 planSellerRouteSet 等下游) */
|
|
42
|
-
registrySeller: RegistrySeller;
|
|
43
|
-
/** 当前熔断状态 */
|
|
44
|
-
circuit: CircuitState;
|
|
45
|
-
/** 连续失败次数(达到 `failureThreshold` 立即 open) */
|
|
46
|
-
consecutiveFailures: number;
|
|
47
|
-
/** 滑动窗口内失败时间戳(毫秒),长度受 `windowMs` 约束 */
|
|
48
|
-
recentFailures: number[]; // timestamps (ms) for sliding window
|
|
49
|
-
/** 最近一次成功的 unix 毫秒时间戳;0 表示尚无成功 */
|
|
50
|
-
lastSuccessAt: number;
|
|
51
|
-
/** 最近一次失败的 unix 毫秒时间戳;0 表示尚无失败 */
|
|
52
|
-
lastFailAt: number;
|
|
53
|
-
/** 最近一次被 probe 的 unix 毫秒时间戳(即 cache.warmedAt) */
|
|
54
|
-
lastProbeAt: number;
|
|
55
|
-
// Source-of-truth prewarm state; the pool keeps a copy so the hot path
|
|
56
|
-
// can answer health questions without touching the cache map on every
|
|
57
|
-
// request.
|
|
58
|
-
/** 综合健康分 0-100,热路径排序的主键 */
|
|
59
|
-
healthScore: number;
|
|
60
|
-
/** 平均延迟(毫秒) */
|
|
61
|
-
avgLatencyMs: number;
|
|
62
|
-
/** health probe 延迟(毫秒),可选 */
|
|
63
|
-
healthProbeLatencyMs?: number;
|
|
64
|
-
/** TTFT(毫秒),可选 */
|
|
65
|
-
ttftMs?: number;
|
|
66
|
-
/** 平均推理延迟(毫秒),可选 */
|
|
67
|
-
avgInferenceMs?: number;
|
|
68
|
-
/** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
|
|
69
|
-
avgTokensPerSecond?: number;
|
|
70
|
-
/** 最近一次 runtime speed 指标观测时间;用于避免旧 prewarm 覆盖 live inference 指标 */
|
|
71
|
-
runtimeMetricsObservedAt?: number;
|
|
72
|
-
/** 上游状态,可选 */
|
|
73
|
-
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
74
|
-
/** 上游错误类名,可选 */
|
|
75
|
-
upstreamErrorClass?: string;
|
|
76
|
-
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
77
|
-
capacityBlockedUntil?: number;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
export interface SellerRuntimeMetricsUpdate {
|
|
81
|
-
/** TTFT(毫秒),可选 */
|
|
82
|
-
ttftMs?: number;
|
|
83
|
-
/** 平均推理延迟(毫秒),可选 */
|
|
84
|
-
avgInferenceMs?: number;
|
|
85
|
-
/** 输出吞吐(tokens/s),可选 */
|
|
86
|
-
avgTokensPerSecond?: number;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
/**
|
|
90
|
-
* `SellerPool.pick()` 的入参:标识一次路由请求 + 可选的时间/数量约束。
|
|
91
|
-
*/
|
|
92
|
-
export interface PickOptions {
|
|
93
|
-
/** 目标模型 ID(已归一化或未归一化都可,pool 内部会归一化) */
|
|
94
|
-
modelId: string;
|
|
95
|
-
/** 目标协议 */
|
|
96
|
-
protocol: string;
|
|
97
|
-
/** 目标支付方式 */
|
|
98
|
-
paymentMethod: string;
|
|
99
|
-
/** 最多返回几个候选,默认 4 */
|
|
100
|
-
limit?: number;
|
|
101
|
-
/** 覆盖时钟(测试用) */
|
|
102
|
-
now?: number;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
/**
|
|
106
|
-
* `SellerPool.pick()` 的返回:候选列表 + 决策原因 + 底层 model index 解析结果。
|
|
107
|
-
* `reason` 用于日志和 doctor 区分"无缓存""有缓存但全 open""正常"等情况。
|
|
108
|
-
*/
|
|
109
|
-
export interface PickResult {
|
|
110
|
-
/** 已按 healthScore 排序的候选(不含 open 电路的 seller) */
|
|
111
|
-
candidates: Array<{ entry: PoolEntry; registrySeller: RegistrySeller }>;
|
|
112
|
-
/** 决策原因,例:`prewarm_cache`、`prewarm_cache_empty`、`no_prewarm_candidates` */
|
|
113
|
-
reason: string;
|
|
114
|
-
/** 底层 `ModelIndex.resolve()` 的结果(保留给上游做诊断) */
|
|
115
|
-
resolved: ModelIndexResolution;
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* `SellerPool` 视角的 model-index 解析快照,与 `model-index.ts` 里的同名类型语义一致;
|
|
120
|
-
* 在 pick 路径上做轻量拷贝,避免循环依赖和暴露 `sellers` vs `candidates` 命名差异。
|
|
121
|
-
*/
|
|
122
|
-
export interface ModelIndexResolution {
|
|
123
|
-
/** 解析时使用的模型 ID(可能未归一化) */
|
|
124
|
-
modelId: string;
|
|
125
|
-
/** 索引里是否至少有一个 seller 命中 */
|
|
126
|
-
matched: boolean;
|
|
127
|
-
/** 命中的 seller 列表(按 default + 声明顺序) */
|
|
128
|
-
candidates: RegistrySeller[];
|
|
129
|
-
/** 当前索引里 `models` 字段缺失的 seller 数(诊断用) */
|
|
130
|
-
missingModelsFlag: number;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* 构造 `SellerPool` 所需的依赖与可调参数。默认值见 `DEFAULTS`:
|
|
135
|
-
* 失败 3 次 open、滑动窗口 60s、失败率阈值 0.5、open 态 30s。
|
|
136
|
-
*/
|
|
137
|
-
export interface SellerPoolOptions {
|
|
138
|
-
/** 共享的 model index */
|
|
139
|
-
modelIndex: ModelIndex;
|
|
140
|
-
/** 共享的 prewarm cache(pool 的真相源) */
|
|
141
|
-
cache: PrewarmCache;
|
|
142
|
-
/** 共享的 credit tracker,wasted / auto-purchase 决策都依赖 */
|
|
143
|
-
creditTracker: CreditTracker;
|
|
144
|
-
// Circuit breaker thresholds (v1.2 §13).
|
|
145
|
-
/** 连续失败次数阈值,到达后立即 open,默认 3 */
|
|
146
|
-
failureThreshold?: number; // default 3
|
|
147
|
-
/** 滑动窗口长度(毫秒),默认 60000 */
|
|
148
|
-
windowMs?: number; // default 60_000 (1m sliding window)
|
|
149
|
-
/** 滑动窗口内失败率阈值(次/秒),默认 0.5 */
|
|
150
|
-
windowFailureRate?: number; // default 0.5
|
|
151
|
-
/** open 态保持时间(毫秒),过期后降级 half_open,默认 30000 */
|
|
152
|
-
openStateMs?: number; // default 30_000
|
|
153
|
-
/** `busy_capacity` 的短期避让时间,默认 2000ms */
|
|
154
|
-
capacityBlockMs?: number;
|
|
155
|
-
/** 注入时钟(测试用),默认 `Date.now` */
|
|
156
|
-
now?: () => number;
|
|
157
|
-
// PoolEntry -> CircuitState transition hooks for tests.
|
|
158
|
-
/** 测试钩子:在 sync 后对 entry 列表做额外处理 */
|
|
159
|
-
applyRegistry?: (entries: PoolEntry[], registry: RegistrySeller[]) => PoolEntry[];
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
const DEFAULTS = {
|
|
163
|
-
failureThreshold: 3,
|
|
164
|
-
windowMs: 60_000,
|
|
165
|
-
windowFailureRate: 0.5,
|
|
166
|
-
openStateMs: 30_000,
|
|
167
|
-
capacityBlockMs: 2_000
|
|
168
|
-
};
|
|
169
|
-
|
|
170
|
-
/**
|
|
171
|
-
* v2 SellerPool: combines `ModelIndex` (registry index), `PrewarmCache`
|
|
172
|
-
* (probe results), and `CreditTracker` (balance protection) into a single
|
|
173
|
-
* source of truth used by the route-failover controller. The pool is
|
|
174
|
-
* process-local and rebuilds its entry list from the cache whenever the
|
|
175
|
-
* cache mutates; entries not yet present in the cache are not in the pool.
|
|
176
|
-
*/
|
|
177
|
-
export class SellerPool {
|
|
178
|
-
private readonly modelIndex: ModelIndex;
|
|
179
|
-
private readonly cache: PrewarmCache;
|
|
180
|
-
private readonly creditTracker: CreditTracker;
|
|
181
|
-
private readonly failureThreshold: number;
|
|
182
|
-
private readonly windowMs: number;
|
|
183
|
-
private readonly windowFailureRate: number;
|
|
184
|
-
private readonly openStateMs: number;
|
|
185
|
-
private readonly capacityBlockMs: number;
|
|
186
|
-
private readonly now: () => number;
|
|
187
|
-
|
|
188
|
-
private entries = new Map<string, PoolEntry>();
|
|
189
|
-
|
|
190
|
-
constructor(options: SellerPoolOptions) {
|
|
191
|
-
this.modelIndex = options.modelIndex;
|
|
192
|
-
this.cache = options.cache;
|
|
193
|
-
this.creditTracker = options.creditTracker;
|
|
194
|
-
this.failureThreshold = options.failureThreshold ?? DEFAULTS.failureThreshold;
|
|
195
|
-
this.windowMs = options.windowMs ?? DEFAULTS.windowMs;
|
|
196
|
-
this.windowFailureRate = options.windowFailureRate ?? DEFAULTS.windowFailureRate;
|
|
197
|
-
this.openStateMs = options.openStateMs ?? DEFAULTS.openStateMs;
|
|
198
|
-
this.capacityBlockMs = options.capacityBlockMs ?? DEFAULTS.capacityBlockMs;
|
|
199
|
-
this.now = options.now ?? Date.now;
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
/**
|
|
203
|
-
* Rebuild entries from the current prewarm cache. Called by
|
|
204
|
-
* `route-failover` whenever the cache is mutated (commit, invalidate,
|
|
205
|
-
* etc.) so the pool always reflects the latest probe results.
|
|
206
|
-
*/
|
|
207
|
-
sync(): number {
|
|
208
|
-
const fresh = new Map<string, PoolEntry>(this.entries);
|
|
209
|
-
for (const entry of this.cache.snapshot()) {
|
|
210
|
-
for (const candidate of entry.candidates) {
|
|
211
|
-
const registry = this.modelIndex.getSeller(candidate.sellerId);
|
|
212
|
-
if (!registry) {
|
|
213
|
-
// Seller disappeared from the registry since the probe; skip.
|
|
214
|
-
continue;
|
|
215
|
-
}
|
|
216
|
-
const previous = this.entries.get(candidate.sellerId);
|
|
217
|
-
fresh.set(candidate.sellerId, {
|
|
218
|
-
sellerId: candidate.sellerId,
|
|
219
|
-
url: candidate.url,
|
|
220
|
-
registrySeller: registry,
|
|
221
|
-
circuit: previous?.circuit ?? "closed",
|
|
222
|
-
consecutiveFailures: previous?.consecutiveFailures ?? 0,
|
|
223
|
-
recentFailures: previous?.recentFailures ?? [],
|
|
224
|
-
lastSuccessAt: candidate.lastSuccessAt || previous?.lastSuccessAt || 0,
|
|
225
|
-
lastFailAt: candidate.lastFailAt || previous?.lastFailAt || 0,
|
|
226
|
-
lastProbeAt: entry.warmedAt,
|
|
227
|
-
healthScore: candidate.healthScore,
|
|
228
|
-
avgLatencyMs: candidate.avgLatencyMs,
|
|
229
|
-
healthProbeLatencyMs: candidate.healthProbeLatencyMs,
|
|
230
|
-
ttftMs: preferRuntimeMetric(candidate.ttftMs, candidate.lastSuccessAt, previous?.ttftMs, previous?.runtimeMetricsObservedAt),
|
|
231
|
-
avgInferenceMs: preferRuntimeMetric(candidate.avgInferenceMs, candidate.lastSuccessAt, previous?.avgInferenceMs, previous?.runtimeMetricsObservedAt),
|
|
232
|
-
avgTokensPerSecond: preferRuntimeMetric(candidate.avgTokensPerSecond, candidate.lastSuccessAt, previous?.avgTokensPerSecond, previous?.runtimeMetricsObservedAt),
|
|
233
|
-
runtimeMetricsObservedAt: Math.max(previous?.runtimeMetricsObservedAt ?? 0, candidate.lastSuccessAt || 0) || undefined,
|
|
234
|
-
upstreamStatus: candidate.upstreamStatus,
|
|
235
|
-
upstreamErrorClass: candidate.upstreamErrorClass,
|
|
236
|
-
capacityBlockedUntil: candidate.capacityBlockedUntil ?? previous?.capacityBlockedUntil
|
|
237
|
-
});
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
for (const sellerId of fresh.keys()) {
|
|
241
|
-
if (!this.modelIndex.getSeller(sellerId)) {
|
|
242
|
-
fresh.delete(sellerId);
|
|
243
|
-
}
|
|
244
|
-
}
|
|
245
|
-
this.entries = fresh;
|
|
246
|
-
return this.entries.size;
|
|
247
|
-
}
|
|
248
|
-
|
|
249
|
-
/**
|
|
250
|
-
* Ensure registry-fallback candidates also have runtime state. A seller
|
|
251
|
-
* may be selected before prewarm has produced a cache entry; failures
|
|
252
|
-
* from that first live request still need to affect the next route plan.
|
|
253
|
-
*/
|
|
254
|
-
ensureRegistrySellers(sellers: RegistrySeller[], now: number = this.now()): void {
|
|
255
|
-
for (const seller of sellers) {
|
|
256
|
-
const previous = this.entries.get(seller.id);
|
|
257
|
-
if (previous) {
|
|
258
|
-
this.entries.set(seller.id, {
|
|
259
|
-
...previous,
|
|
260
|
-
registrySeller: seller,
|
|
261
|
-
url: seller.url.replace(/\/+$/, "")
|
|
262
|
-
});
|
|
263
|
-
continue;
|
|
264
|
-
}
|
|
265
|
-
this.entries.set(seller.id, {
|
|
266
|
-
sellerId: seller.id,
|
|
267
|
-
url: seller.url.replace(/\/+$/, ""),
|
|
268
|
-
registrySeller: seller,
|
|
269
|
-
circuit: "closed",
|
|
270
|
-
consecutiveFailures: 0,
|
|
271
|
-
recentFailures: [],
|
|
272
|
-
lastSuccessAt: 0,
|
|
273
|
-
lastFailAt: 0,
|
|
274
|
-
lastProbeAt: now,
|
|
275
|
-
healthScore: 60,
|
|
276
|
-
avgLatencyMs: 0
|
|
277
|
-
});
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
/**
|
|
282
|
-
* Pick up to `limit` candidates for a (model, protocol, payment) triple.
|
|
283
|
-
* Sellers in the `open` circuit are skipped unless their open state has
|
|
284
|
-
* expired (they are flipped to `half_open` and included). Candidates are
|
|
285
|
-
* sorted by health score (descending) so the strongest seller goes first.
|
|
286
|
-
*/
|
|
287
|
-
pick(options: PickOptions): PickResult {
|
|
288
|
-
const now = options.now ?? this.now();
|
|
289
|
-
const limit = options.limit ?? 4;
|
|
290
|
-
const freshness = this.cache.freshness(options.modelId, options.protocol, options.paymentMethod);
|
|
291
|
-
const resolved = this.modelIndex.resolve(options.modelId, {
|
|
292
|
-
protocol: options.protocol,
|
|
293
|
-
paymentMethod: options.paymentMethod
|
|
294
|
-
});
|
|
295
|
-
|
|
296
|
-
if (freshness.entry && freshness.entry.candidates.length === 0) {
|
|
297
|
-
return {
|
|
298
|
-
candidates: [],
|
|
299
|
-
reason: "prewarm_cache_empty",
|
|
300
|
-
resolved: asResolution(resolved)
|
|
301
|
-
};
|
|
302
|
-
}
|
|
303
|
-
|
|
304
|
-
const candidates = (freshness.entry?.candidates ?? [])
|
|
305
|
-
.map((candidate) => {
|
|
306
|
-
const entry = this.entries.get(candidate.sellerId);
|
|
307
|
-
if (!entry) {
|
|
308
|
-
return null;
|
|
309
|
-
}
|
|
310
|
-
return { entry, registrySeller: entry.registrySeller, candidate };
|
|
311
|
-
})
|
|
312
|
-
.filter((row): row is { entry: PoolEntry; registrySeller: RegistrySeller; candidate: PrewarmCandidate } => row !== null)
|
|
313
|
-
.map((row) => {
|
|
314
|
-
const entry = this.maybeRecycleFromOpen(row.entry, now);
|
|
315
|
-
return { entry, registrySeller: row.registrySeller };
|
|
316
|
-
})
|
|
317
|
-
.filter((row) => row.entry.circuit !== "open")
|
|
318
|
-
.filter((row) => !isCapacityBlocked(row.entry, now))
|
|
319
|
-
.sort((a, b) => b.entry.healthScore - a.entry.healthScore)
|
|
320
|
-
.slice(0, limit);
|
|
321
|
-
|
|
322
|
-
return {
|
|
323
|
-
candidates,
|
|
324
|
-
reason: candidates.length > 0 ? "prewarm_cache" : "no_prewarm_candidates",
|
|
325
|
-
resolved: asResolution(resolved)
|
|
326
|
-
};
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
/**
|
|
330
|
-
* Record a successful inference against `sellerId`. The circuit closes
|
|
331
|
-
* (if it was half-open) and the credit tracker observes the latest
|
|
332
|
-
* balance via `recordSpend`.
|
|
333
|
-
*/
|
|
334
|
-
recordSuccess(sellerId: string, balanceMicros: number, now: number = this.now()): PoolEntry | undefined {
|
|
335
|
-
const entry = this.entries.get(sellerId);
|
|
336
|
-
if (!entry) {
|
|
337
|
-
return undefined;
|
|
338
|
-
}
|
|
339
|
-
const next: PoolEntry = {
|
|
340
|
-
...entry,
|
|
341
|
-
circuit: "closed",
|
|
342
|
-
consecutiveFailures: 0,
|
|
343
|
-
recentFailures: [],
|
|
344
|
-
lastSuccessAt: now,
|
|
345
|
-
healthScore: Math.min(100, Math.max(entry.healthScore, 60)),
|
|
346
|
-
capacityBlockedUntil: undefined
|
|
347
|
-
};
|
|
348
|
-
this.entries.set(sellerId, next);
|
|
349
|
-
this.creditTracker.recordSpend(sellerId, balanceMicros);
|
|
350
|
-
logger.info("pool.success.recorded", "seller pool entry marked successful", {
|
|
351
|
-
sellerId,
|
|
352
|
-
balanceMicros,
|
|
353
|
-
healthScore: next.healthScore
|
|
354
|
-
});
|
|
355
|
-
return next;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
recordRuntimeMetrics(
|
|
359
|
-
sellerId: string,
|
|
360
|
-
metrics: SellerRuntimeMetricsUpdate,
|
|
361
|
-
now: number = this.now()
|
|
362
|
-
): PoolEntry | undefined {
|
|
363
|
-
const entry = this.entries.get(sellerId);
|
|
364
|
-
if (!entry) {
|
|
365
|
-
return undefined;
|
|
366
|
-
}
|
|
367
|
-
const ttftMs = finiteNonNegative(metrics.ttftMs);
|
|
368
|
-
const avgInferenceMs = finiteNonNegative(metrics.avgInferenceMs);
|
|
369
|
-
const avgTokensPerSecond = finiteNonNegative(metrics.avgTokensPerSecond);
|
|
370
|
-
const next: PoolEntry = {
|
|
371
|
-
...entry,
|
|
372
|
-
lastSuccessAt: now,
|
|
373
|
-
healthScore: Math.min(100, Math.max(entry.healthScore, 60)),
|
|
374
|
-
avgLatencyMs: avgInferenceMs ?? entry.avgLatencyMs,
|
|
375
|
-
ttftMs: ttftMs ?? entry.ttftMs,
|
|
376
|
-
avgInferenceMs: avgInferenceMs ?? entry.avgInferenceMs,
|
|
377
|
-
avgTokensPerSecond: avgTokensPerSecond ?? entry.avgTokensPerSecond,
|
|
378
|
-
runtimeMetricsObservedAt: Math.max(entry.runtimeMetricsObservedAt ?? 0, now)
|
|
379
|
-
};
|
|
380
|
-
this.entries.set(sellerId, next);
|
|
381
|
-
logger.info("pool.runtime_metrics.recorded", "seller pool runtime metrics updated", {
|
|
382
|
-
sellerId,
|
|
383
|
-
ttftMs: next.ttftMs,
|
|
384
|
-
avgInferenceMs: next.avgInferenceMs,
|
|
385
|
-
avgTokensPerSecond: next.avgTokensPerSecond
|
|
386
|
-
});
|
|
387
|
-
return next;
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
/**
|
|
391
|
-
* Record a failure against `sellerId`. Returns the new PoolEntry. The
|
|
392
|
-
* caller (route-failover) uses the returned `entry.circuit` and the
|
|
393
|
-
* entry's `lastFailAt` to decide whether to fail over, retry, or stop.
|
|
394
|
-
* On a non-recoverable failure (`hard_4xx`, `auth_invalid`,
|
|
395
|
-
* `insufficient_funds`) the credit is also transferred to the wasted
|
|
396
|
-
* bucket so the wasted-micros counter stays accurate.
|
|
397
|
-
*/
|
|
398
|
-
recordFailure(
|
|
399
|
-
sellerId: string,
|
|
400
|
-
kind: FailureKind,
|
|
401
|
-
options: { transferLeftover?: boolean; reason?: string; now?: number } = {}
|
|
402
|
-
): PoolEntry | undefined {
|
|
403
|
-
const entry = this.entries.get(sellerId);
|
|
404
|
-
if (!entry) {
|
|
405
|
-
return undefined;
|
|
406
|
-
}
|
|
407
|
-
const now = options.now ?? this.now();
|
|
408
|
-
const isBusyCapacity = kind === "busy_capacity";
|
|
409
|
-
const recentFailures = (
|
|
410
|
-
isBusyCapacity ? entry.recentFailures : [...entry.recentFailures, now]
|
|
411
|
-
).filter((ts) => ts >= now - this.windowMs);
|
|
412
|
-
const consecutiveFailures = isBusyCapacity ? entry.consecutiveFailures : entry.consecutiveFailures + 1;
|
|
413
|
-
const failureRate = recentFailures.length / Math.max(1, this.windowMs / 1000);
|
|
414
|
-
const overThreshold = consecutiveFailures >= this.failureThreshold;
|
|
415
|
-
const overRate = failureRate >= this.windowFailureRate;
|
|
416
|
-
const isHard = kind === "hard_4xx" || kind === "auth_invalid" || kind === "no_compatible";
|
|
417
|
-
const circuit: CircuitState = isHard || overThreshold || overRate ? "open" : entry.circuit;
|
|
418
|
-
const next: PoolEntry = {
|
|
419
|
-
...entry,
|
|
420
|
-
circuit,
|
|
421
|
-
consecutiveFailures,
|
|
422
|
-
recentFailures,
|
|
423
|
-
lastFailAt: now,
|
|
424
|
-
capacityBlockedUntil: isBusyCapacity ? now + this.capacityBlockMs : entry.capacityBlockedUntil
|
|
425
|
-
};
|
|
426
|
-
this.entries.set(sellerId, next);
|
|
427
|
-
if (options.transferLeftover || isHard) {
|
|
428
|
-
this.creditTracker.transferLeftoverToWasted(sellerId, options.reason ?? kind);
|
|
429
|
-
}
|
|
430
|
-
if (circuit === "open") {
|
|
431
|
-
logger.warn("pool.circuit_opened", "seller pool entry transitioned to circuit_open", {
|
|
432
|
-
sellerId,
|
|
433
|
-
kind,
|
|
434
|
-
consecutiveFailures,
|
|
435
|
-
recentFailureRate: failureRate,
|
|
436
|
-
threshold: this.failureThreshold
|
|
437
|
-
});
|
|
438
|
-
} else if (isBusyCapacity) {
|
|
439
|
-
logger.warn("pool.capacity_blocked", "seller pool entry temporarily blocked by busy capacity", {
|
|
440
|
-
sellerId,
|
|
441
|
-
capacityBlockMs: this.capacityBlockMs,
|
|
442
|
-
blockedUntil: next.capacityBlockedUntil
|
|
443
|
-
});
|
|
444
|
-
}
|
|
445
|
-
return next;
|
|
446
|
-
}
|
|
447
|
-
|
|
448
|
-
/**
|
|
449
|
-
* Expose a per-seller credit / circuit snapshot to the route-failover.
|
|
450
|
-
* Used to decide whether a soft failure should retry on the same seller
|
|
451
|
-
* (刚买窗口保护) or fail over immediately.
|
|
452
|
-
*/
|
|
453
|
-
inspect(sellerId: string): { entry?: PoolEntry; freshPurchase: boolean; autoPurchaseAvailable: boolean } {
|
|
454
|
-
const entry = this.entries.get(sellerId);
|
|
455
|
-
const freshPurchase = this.creditTracker.isInFreshPurchaseWindow(sellerId, this.now());
|
|
456
|
-
const autoPurchaseAvailable = this.creditTracker.canAutoPurchase(this.now());
|
|
457
|
-
return { entry, freshPurchase, autoPurchaseAvailable };
|
|
458
|
-
}
|
|
459
|
-
|
|
460
|
-
/**
|
|
461
|
-
* Recycle expired open circuits before route planning paths that consume
|
|
462
|
-
* `snapshot()` directly. This keeps the `open -> half_open` recovery path
|
|
463
|
-
* active even when the newer route planner is used instead of `pick()`.
|
|
464
|
-
*/
|
|
465
|
-
recycleOpenCircuits(now: number = this.now()): number {
|
|
466
|
-
let recycled = 0;
|
|
467
|
-
for (const entry of this.entries.values()) {
|
|
468
|
-
if (entry.circuit !== "open") {
|
|
469
|
-
continue;
|
|
470
|
-
}
|
|
471
|
-
const next = this.maybeRecycleFromOpen(entry, now);
|
|
472
|
-
if (next.circuit === "half_open") {
|
|
473
|
-
recycled += 1;
|
|
474
|
-
}
|
|
475
|
-
}
|
|
476
|
-
return recycled;
|
|
477
|
-
}
|
|
478
|
-
|
|
479
|
-
/**
|
|
480
|
-
* Manually mark an entry as `open`. Used by the registry loop when a
|
|
481
|
-
* seller is removed from the registry: the entry lingers for a grace
|
|
482
|
-
* period but is unreachable, so opening the circuit prevents any
|
|
483
|
-
* further selection.
|
|
484
|
-
*/
|
|
485
|
-
markOpen(sellerId: string, reason: string, now: number = this.now()): void {
|
|
486
|
-
const entry = this.entries.get(sellerId);
|
|
487
|
-
if (!entry) {
|
|
488
|
-
return;
|
|
489
|
-
}
|
|
490
|
-
this.entries.set(sellerId, { ...entry, circuit: "open", lastFailAt: now });
|
|
491
|
-
logger.warn("pool.circuit_force_opened", "seller pool entry forced to circuit_open", {
|
|
492
|
-
sellerId,
|
|
493
|
-
reason
|
|
494
|
-
});
|
|
495
|
-
}
|
|
496
|
-
|
|
497
|
-
/**
|
|
498
|
-
* List all known pool entries. Used by `tb doctor` and tests.
|
|
499
|
-
*/
|
|
500
|
-
snapshot(): PoolEntry[] {
|
|
501
|
-
return Array.from(this.entries.values()).map((entry) => ({ ...entry, recentFailures: [...entry.recentFailures] }));
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
size(): number {
|
|
505
|
-
return this.entries.size;
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
private maybeRecycleFromOpen(entry: PoolEntry, now: number): PoolEntry {
|
|
509
|
-
if (entry.circuit !== "open") {
|
|
510
|
-
return entry;
|
|
511
|
-
}
|
|
512
|
-
if (now - entry.lastFailAt < this.openStateMs) {
|
|
513
|
-
return entry;
|
|
514
|
-
}
|
|
515
|
-
const recycled: PoolEntry = { ...entry, circuit: "half_open" };
|
|
516
|
-
this.entries.set(entry.sellerId, recycled);
|
|
517
|
-
logger.info("pool.circuit_half_opened", "seller pool entry recycled to half_open", {
|
|
518
|
-
sellerId: entry.sellerId,
|
|
519
|
-
openStateMs: this.openStateMs
|
|
520
|
-
});
|
|
521
|
-
return recycled;
|
|
522
|
-
}
|
|
523
|
-
}
|
|
524
|
-
|
|
525
|
-
function finiteNonNegative(value: number | undefined): number | undefined {
|
|
526
|
-
return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
|
|
527
|
-
}
|
|
528
|
-
|
|
529
|
-
function preferRuntimeMetric(
|
|
530
|
-
prewarmValue: number | undefined,
|
|
531
|
-
prewarmObservedAt: number | undefined,
|
|
532
|
-
previousValue: number | undefined,
|
|
533
|
-
previousObservedAt: number | undefined
|
|
534
|
-
): number | undefined {
|
|
535
|
-
if (prewarmValue === undefined) {
|
|
536
|
-
return previousValue;
|
|
537
|
-
}
|
|
538
|
-
if (previousValue !== undefined && (previousObservedAt ?? 0) > (prewarmObservedAt ?? 0)) {
|
|
539
|
-
return previousValue;
|
|
540
|
-
}
|
|
541
|
-
return prewarmValue;
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
function isCapacityBlocked(entry: PoolEntry, now: number): boolean {
|
|
545
|
-
return Number.isFinite(entry.capacityBlockedUntil) && (entry.capacityBlockedUntil as number) > now;
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
function asResolution(resolved: { modelId: string; matched: boolean; sellers: RegistrySeller[]; missingModelsFlag: number }): ModelIndexResolution {
|
|
549
|
-
return {
|
|
550
|
-
modelId: resolved.modelId,
|
|
551
|
-
matched: resolved.matched,
|
|
552
|
-
candidates: resolved.sellers,
|
|
553
|
-
missingModelsFlag: resolved.missingModelsFlag
|
|
554
|
-
};
|
|
555
|
-
}
|
|
556
|
-
|
|
557
|
-
export type { PrewarmEntry };
|