@tokenbuddy/tokenbuddy 1.0.13 → 1.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +23 -0
- package/dist/src/buyer-store.d.ts.map +1 -1
- package/dist/src/buyer-store.js +31 -6
- package/dist/src/buyer-store.js.map +1 -1
- package/dist/src/clawtip-bootstrap.d.ts +23 -0
- package/dist/src/clawtip-bootstrap.d.ts.map +1 -0
- package/dist/src/clawtip-bootstrap.js +47 -0
- package/dist/src/clawtip-bootstrap.js.map +1 -0
- package/dist/src/cli.d.ts +24 -33
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +157 -58
- package/dist/src/cli.js.map +1 -1
- package/dist/src/daemon.d.ts +79 -1
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +984 -23
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/model-index.d.ts +1 -1
- package/dist/src/model-index.d.ts.map +1 -1
- package/dist/src/model-index.js +4 -0
- package/dist/src/model-index.js.map +1 -1
- package/dist/src/prewarm-cache.d.ts +4 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -1
- package/dist/src/prewarm-cache.js +2 -1
- package/dist/src/prewarm-cache.js.map +1 -1
- package/dist/src/prewarm-scheduler.d.ts +2 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -1
- package/dist/src/prewarm-scheduler.js +4 -2
- package/dist/src/prewarm-scheduler.js.map +1 -1
- package/dist/src/route-failover.d.ts.map +1 -1
- package/dist/src/route-failover.js +10 -0
- package/dist/src/route-failover.js.map +1 -1
- package/dist/src/seller-catalog.d.ts +17 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js +15 -1
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +12 -1
- package/dist/src/seller-pool.d.ts.map +1 -1
- package/dist/src/seller-pool.js +61 -7
- package/dist/src/seller-pool.js.map +1 -1
- package/dist/src/seller-route-planner.d.ts +11 -1
- package/dist/src/seller-route-planner.d.ts.map +1 -1
- package/dist/src/seller-route-planner.js +21 -9
- package/dist/src/seller-route-planner.js.map +1 -1
- package/dist/src/seller-routing-config.d.ts +2 -0
- package/dist/src/seller-routing-config.d.ts.map +1 -1
- package/dist/src/seller-routing-config.js +11 -1
- package/dist/src/seller-routing-config.js.map +1 -1
- package/package.json +1 -1
- package/src/buyer-store.ts +70 -7
- package/src/clawtip-bootstrap.ts +64 -0
- package/src/cli.ts +201 -76
- package/src/daemon.ts +1132 -25
- package/src/model-index.ts +4 -1
- package/src/prewarm-cache.ts +6 -1
- package/src/prewarm-scheduler.ts +6 -2
- package/src/route-failover.ts +11 -0
- package/src/seller-catalog.ts +24 -1
- package/src/seller-pool.ts +69 -7
- package/src/seller-route-planner.ts +33 -11
- package/src/seller-routing-config.ts +14 -1
- package/static/clawtip/recharge.png +0 -0
- package/tests/control-plane-ui-endpoints.test.ts +559 -0
- package/tests/daemon-classify.test.ts +9 -0
- package/tests/model-index.test.ts +14 -0
- package/tests/route-failover.test.ts +16 -0
- package/tests/seller-catalog-utilities.test.ts +54 -0
- package/tests/seller-pool.test.ts +56 -0
- package/tests/seller-route-planner.test.ts +40 -0
- package/tests/seller-routing-config.test.ts +13 -0
- package/tests/tokenbuddy.test.ts +200 -7
package/src/model-index.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { createModuleLogger } from "@tokenbuddy/logging";
|
|
2
|
-
import type
|
|
2
|
+
import { isBuyerVisibleRegistrySeller, type RegistrySeller } from "./seller-catalog.js";
|
|
3
3
|
|
|
4
4
|
const logger = createModuleLogger("tb-proxyd:model-index");
|
|
5
5
|
|
|
@@ -73,6 +73,9 @@ export class ModelIndex {
|
|
|
73
73
|
if (!seller || !seller.id) {
|
|
74
74
|
continue;
|
|
75
75
|
}
|
|
76
|
+
if (!isBuyerVisibleRegistrySeller(seller)) {
|
|
77
|
+
continue;
|
|
78
|
+
}
|
|
76
79
|
// v1.2 registry schema 用 "anthropic_messages" 作为协议名(OpenAI / ClawTip
|
|
77
80
|
// / 外部 client 都用这个),但 buyer 内部 `endpointProtocol` 对 /v1/messages
|
|
78
81
|
// 返 "messages"(更短、更易读)。在 modelIndex 重建时做 alias 映射,让两边
|
package/src/prewarm-cache.ts
CHANGED
|
@@ -46,6 +46,8 @@ export interface PrewarmCandidate {
|
|
|
46
46
|
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
47
47
|
/** 上游错误类名(HTTP status / 错误码),仅在失败时存在 */
|
|
48
48
|
upstreamErrorClass?: string;
|
|
49
|
+
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
50
|
+
capacityBlockedUntil?: number;
|
|
49
51
|
}
|
|
50
52
|
|
|
51
53
|
/**
|
|
@@ -100,6 +102,8 @@ export interface PrewarmCandidateInput {
|
|
|
100
102
|
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
101
103
|
/** 上游错误类名,可选 */
|
|
102
104
|
upstreamErrorClass?: string;
|
|
105
|
+
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
106
|
+
capacityBlockedUntil?: number;
|
|
103
107
|
}
|
|
104
108
|
|
|
105
109
|
/**
|
|
@@ -449,7 +453,8 @@ function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
|
|
|
449
453
|
ttftMs: finiteNonNegative(input.ttftMs),
|
|
450
454
|
avgInferenceMs: finiteNonNegative(input.avgInferenceMs),
|
|
451
455
|
upstreamStatus: input.upstreamStatus,
|
|
452
|
-
upstreamErrorClass: input.upstreamErrorClass
|
|
456
|
+
upstreamErrorClass: input.upstreamErrorClass,
|
|
457
|
+
capacityBlockedUntil: finiteNonNegative(input.capacityBlockedUntil)
|
|
453
458
|
};
|
|
454
459
|
}
|
|
455
460
|
|
package/src/prewarm-scheduler.ts
CHANGED
|
@@ -35,6 +35,8 @@ export interface ProbeResult {
|
|
|
35
35
|
ttftMs?: number;
|
|
36
36
|
/** 平均推理延迟(毫秒),可选 */
|
|
37
37
|
avgInferenceMs?: number;
|
|
38
|
+
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
39
|
+
capacityBlockedUntil?: number;
|
|
38
40
|
}
|
|
39
41
|
|
|
40
42
|
/**
|
|
@@ -498,7 +500,8 @@ export class PrewarmScheduler {
|
|
|
498
500
|
ttftMs: result.ttftMs,
|
|
499
501
|
avgInferenceMs: result.avgInferenceMs,
|
|
500
502
|
upstreamStatus: result.upstreamStatus,
|
|
501
|
-
upstreamErrorClass: result.upstreamErrorClass
|
|
503
|
+
upstreamErrorClass: result.upstreamErrorClass,
|
|
504
|
+
capacityBlockedUntil: result.capacityBlockedUntil
|
|
502
505
|
});
|
|
503
506
|
logger.info("prewarm.succeeded", "seller probe succeeded", {
|
|
504
507
|
taskId: task.id,
|
|
@@ -524,7 +527,8 @@ export class PrewarmScheduler {
|
|
|
524
527
|
ttftMs: result.ttftMs,
|
|
525
528
|
avgInferenceMs: result.avgInferenceMs,
|
|
526
529
|
upstreamStatus: result.upstreamStatus,
|
|
527
|
-
upstreamErrorClass: result.upstreamErrorClass
|
|
530
|
+
upstreamErrorClass: result.upstreamErrorClass,
|
|
531
|
+
capacityBlockedUntil: result.capacityBlockedUntil
|
|
528
532
|
});
|
|
529
533
|
logger.warn("prewarm.failed", "seller probe failed", {
|
|
530
534
|
taskId: task.id,
|
package/src/route-failover.ts
CHANGED
|
@@ -169,6 +169,7 @@ export class RouteFailover {
|
|
|
169
169
|
decide(context: DecideContext, totalCandidates: number): FailoverDecision {
|
|
170
170
|
const isHard = context.errorKind === "hard_4xx" || context.errorKind === "auth_invalid" || context.errorKind === "no_compatible";
|
|
171
171
|
const isSoft = context.errorKind === "soft_5xx" || context.errorKind === "deadline";
|
|
172
|
+
const isBusyCapacity = context.errorKind === "busy_capacity";
|
|
172
173
|
const info = this.pool.inspect(context.sellerId);
|
|
173
174
|
const freshPurchase = info.freshPurchase;
|
|
174
175
|
const budgetExceeded = !this.creditTracker.canAutoPurchase(this.now());
|
|
@@ -179,6 +180,16 @@ export class RouteFailover {
|
|
|
179
180
|
now: this.now()
|
|
180
181
|
});
|
|
181
182
|
|
|
183
|
+
if (isBusyCapacity) {
|
|
184
|
+
return {
|
|
185
|
+
action: "failover_next",
|
|
186
|
+
reason: "busy_capacity",
|
|
187
|
+
freshPurchase,
|
|
188
|
+
retryAttemptsBeforeFailover: context.attempt,
|
|
189
|
+
budgetExceeded
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
|
|
182
193
|
if (isHard) {
|
|
183
194
|
// Hard failures are not eligible for retry; the seller is wrong
|
|
184
195
|
// for this request. The pool has already transferred leftover
|
package/src/seller-catalog.ts
CHANGED
|
@@ -17,6 +17,8 @@ export interface RegistrySeller {
|
|
|
17
17
|
id: string;
|
|
18
18
|
/** 人类可读名称 */
|
|
19
19
|
name?: string;
|
|
20
|
+
/** registry 发布状态;只有 `active` 参与 buyer 自动路由,缺省兼容旧 registry 为可用 */
|
|
21
|
+
status?: string;
|
|
20
22
|
/** seller 服务的公网 URL(去掉尾部 `/`) */
|
|
21
23
|
url: string;
|
|
22
24
|
/** seller 支持的协议列表(包含 `anthropic_messages` 时内部 alias 到 `messages`) */
|
|
@@ -44,6 +46,19 @@ export interface SellerRegistryDocument {
|
|
|
44
46
|
sellers: RegistrySeller[];
|
|
45
47
|
}
|
|
46
48
|
|
|
49
|
+
/**
|
|
50
|
+
* Buyer 自动路由 / 模型目录可见性门禁。
|
|
51
|
+
* 新 registry 会显式写 `status`,只有 `active` 参与 buyer 可见路径;
|
|
52
|
+
* 旧 registry 缺省 status 时按历史行为保留可用,避免升级后隐藏存量节点。
|
|
53
|
+
*
|
|
54
|
+
* @param seller registry seller
|
|
55
|
+
* @returns seller 是否应进入 buyer 路由和模型目录
|
|
56
|
+
*/
|
|
57
|
+
export function isBuyerVisibleRegistrySeller(seller: RegistrySeller): boolean {
|
|
58
|
+
const status = seller.status?.trim().toLowerCase();
|
|
59
|
+
return !status || status === "active";
|
|
60
|
+
}
|
|
61
|
+
|
|
47
62
|
/**
|
|
48
63
|
* 单个 seller 的 `/manifest` 响应,兼容 snake_case 与 camelCase 字段。
|
|
49
64
|
*/
|
|
@@ -72,6 +87,10 @@ export interface SellerManifest {
|
|
|
72
87
|
discountRatio?: number;
|
|
73
88
|
/** 折扣系数(snake_case 兼容) */
|
|
74
89
|
discount_ratio?: number;
|
|
90
|
+
/** 服务手续费系数(camelCase) */
|
|
91
|
+
serviceFeeRatio?: number;
|
|
92
|
+
/** 服务手续费系数(snake_case 兼容) */
|
|
93
|
+
service_fee_ratio?: number;
|
|
75
94
|
};
|
|
76
95
|
}
|
|
77
96
|
|
|
@@ -132,6 +151,8 @@ export interface SellerCatalogEntry {
|
|
|
132
151
|
manifestSellerId?: string;
|
|
133
152
|
/** 折扣系数(来自 manifest.selection) */
|
|
134
153
|
discountRatio?: number;
|
|
154
|
+
/** 服务手续费系数(来自 manifest.selection) */
|
|
155
|
+
serviceFeeRatio?: number;
|
|
135
156
|
/** 模型数(来自 manifest) */
|
|
136
157
|
modelCount?: number;
|
|
137
158
|
/** seller 支持的协议(manifest > registry fallback) */
|
|
@@ -305,7 +326,8 @@ export async function fetchSellerManifest(seller: RegistrySeller): Promise<Selle
|
|
|
305
326
|
*/
|
|
306
327
|
export async function discoverSellerBackedModels(registryUrl: string): Promise<SellerCatalogResult> {
|
|
307
328
|
const registry = await fetchSellerRegistry(registryUrl);
|
|
308
|
-
const
|
|
329
|
+
const visibleSellers = registry.sellers.filter(isBuyerVisibleRegistrySeller);
|
|
330
|
+
const sellerResults = await Promise.all(visibleSellers.map(async (seller) => {
|
|
309
331
|
try {
|
|
310
332
|
const manifest = await fetchSellerManifest(seller);
|
|
311
333
|
const protocols = manifestProtocols(manifest, seller);
|
|
@@ -328,6 +350,7 @@ export async function discoverSellerBackedModels(registryUrl: string): Promise<S
|
|
|
328
350
|
status: "ok",
|
|
329
351
|
manifestSellerId: manifest.sellerId || manifest.seller_id || seller.id,
|
|
330
352
|
discountRatio: manifest.selection?.discountRatio ?? manifest.selection?.discount_ratio,
|
|
353
|
+
serviceFeeRatio: manifest.selection?.serviceFeeRatio ?? manifest.selection?.service_fee_ratio,
|
|
331
354
|
modelCount: models.length,
|
|
332
355
|
supportedProtocols: protocols,
|
|
333
356
|
paymentMethods,
|
package/src/seller-pool.ts
CHANGED
|
@@ -22,6 +22,7 @@ export type FailureKind =
|
|
|
22
22
|
| "hard_4xx" // 400/404/422 — the seller is wrong for this request
|
|
23
23
|
| "auth_invalid" // 401/403 token invalid
|
|
24
24
|
| "insufficient_funds" // 402
|
|
25
|
+
| "busy_capacity" // 429 busy_capacity — seller is temporarily full
|
|
25
26
|
| "soft_5xx" // 429/5xx/timeout/network
|
|
26
27
|
| "deadline" // buyer deadline exceeded
|
|
27
28
|
| "stream_aborted" // upstream stream broken after first chunk
|
|
@@ -67,6 +68,8 @@ export interface PoolEntry {
|
|
|
67
68
|
upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
|
|
68
69
|
/** 上游错误类名,可选 */
|
|
69
70
|
upstreamErrorClass?: string;
|
|
71
|
+
/** 临时容量避让截止时间;大于当前时间时不参与路由 */
|
|
72
|
+
capacityBlockedUntil?: number;
|
|
70
73
|
}
|
|
71
74
|
|
|
72
75
|
/**
|
|
@@ -133,6 +136,8 @@ export interface SellerPoolOptions {
|
|
|
133
136
|
windowFailureRate?: number; // default 0.5
|
|
134
137
|
/** open 态保持时间(毫秒),过期后降级 half_open,默认 30000 */
|
|
135
138
|
openStateMs?: number; // default 30_000
|
|
139
|
+
/** `busy_capacity` 的短期避让时间,默认 2000ms */
|
|
140
|
+
capacityBlockMs?: number;
|
|
136
141
|
/** 注入时钟(测试用),默认 `Date.now` */
|
|
137
142
|
now?: () => number;
|
|
138
143
|
// PoolEntry -> CircuitState transition hooks for tests.
|
|
@@ -144,7 +149,8 @@ const DEFAULTS = {
|
|
|
144
149
|
failureThreshold: 3,
|
|
145
150
|
windowMs: 60_000,
|
|
146
151
|
windowFailureRate: 0.5,
|
|
147
|
-
openStateMs: 30_000
|
|
152
|
+
openStateMs: 30_000,
|
|
153
|
+
capacityBlockMs: 2_000
|
|
148
154
|
};
|
|
149
155
|
|
|
150
156
|
/**
|
|
@@ -162,6 +168,7 @@ export class SellerPool {
|
|
|
162
168
|
private readonly windowMs: number;
|
|
163
169
|
private readonly windowFailureRate: number;
|
|
164
170
|
private readonly openStateMs: number;
|
|
171
|
+
private readonly capacityBlockMs: number;
|
|
165
172
|
private readonly now: () => number;
|
|
166
173
|
|
|
167
174
|
private entries = new Map<string, PoolEntry>();
|
|
@@ -174,6 +181,7 @@ export class SellerPool {
|
|
|
174
181
|
this.windowMs = options.windowMs ?? DEFAULTS.windowMs;
|
|
175
182
|
this.windowFailureRate = options.windowFailureRate ?? DEFAULTS.windowFailureRate;
|
|
176
183
|
this.openStateMs = options.openStateMs ?? DEFAULTS.openStateMs;
|
|
184
|
+
this.capacityBlockMs = options.capacityBlockMs ?? DEFAULTS.capacityBlockMs;
|
|
177
185
|
this.now = options.now ?? Date.now;
|
|
178
186
|
}
|
|
179
187
|
|
|
@@ -183,7 +191,7 @@ export class SellerPool {
|
|
|
183
191
|
* etc.) so the pool always reflects the latest probe results.
|
|
184
192
|
*/
|
|
185
193
|
sync(): number {
|
|
186
|
-
const fresh = new Map<string, PoolEntry>();
|
|
194
|
+
const fresh = new Map<string, PoolEntry>(this.entries);
|
|
187
195
|
for (const entry of this.cache.snapshot()) {
|
|
188
196
|
for (const candidate of entry.candidates) {
|
|
189
197
|
const registry = this.modelIndex.getSeller(candidate.sellerId);
|
|
@@ -208,14 +216,52 @@ export class SellerPool {
|
|
|
208
216
|
ttftMs: candidate.ttftMs,
|
|
209
217
|
avgInferenceMs: candidate.avgInferenceMs,
|
|
210
218
|
upstreamStatus: candidate.upstreamStatus,
|
|
211
|
-
upstreamErrorClass: candidate.upstreamErrorClass
|
|
219
|
+
upstreamErrorClass: candidate.upstreamErrorClass,
|
|
220
|
+
capacityBlockedUntil: candidate.capacityBlockedUntil ?? previous?.capacityBlockedUntil
|
|
212
221
|
});
|
|
213
222
|
}
|
|
214
223
|
}
|
|
224
|
+
for (const sellerId of fresh.keys()) {
|
|
225
|
+
if (!this.modelIndex.getSeller(sellerId)) {
|
|
226
|
+
fresh.delete(sellerId);
|
|
227
|
+
}
|
|
228
|
+
}
|
|
215
229
|
this.entries = fresh;
|
|
216
230
|
return this.entries.size;
|
|
217
231
|
}
|
|
218
232
|
|
|
233
|
+
/**
|
|
234
|
+
* Ensure registry-fallback candidates also have runtime state. A seller
|
|
235
|
+
* may be selected before prewarm has produced a cache entry; failures
|
|
236
|
+
* from that first live request still need to affect the next route plan.
|
|
237
|
+
*/
|
|
238
|
+
ensureRegistrySellers(sellers: RegistrySeller[], now: number = this.now()): void {
|
|
239
|
+
for (const seller of sellers) {
|
|
240
|
+
const previous = this.entries.get(seller.id);
|
|
241
|
+
if (previous) {
|
|
242
|
+
this.entries.set(seller.id, {
|
|
243
|
+
...previous,
|
|
244
|
+
registrySeller: seller,
|
|
245
|
+
url: seller.url.replace(/\/+$/, "")
|
|
246
|
+
});
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
this.entries.set(seller.id, {
|
|
250
|
+
sellerId: seller.id,
|
|
251
|
+
url: seller.url.replace(/\/+$/, ""),
|
|
252
|
+
registrySeller: seller,
|
|
253
|
+
circuit: "closed",
|
|
254
|
+
consecutiveFailures: 0,
|
|
255
|
+
recentFailures: [],
|
|
256
|
+
lastSuccessAt: 0,
|
|
257
|
+
lastFailAt: 0,
|
|
258
|
+
lastProbeAt: now,
|
|
259
|
+
healthScore: 60,
|
|
260
|
+
avgLatencyMs: 0
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
219
265
|
/**
|
|
220
266
|
* Pick up to `limit` candidates for a (model, protocol, payment) triple.
|
|
221
267
|
* Sellers in the `open` circuit are skipped unless their open state has
|
|
@@ -253,6 +299,7 @@ export class SellerPool {
|
|
|
253
299
|
return { entry, registrySeller: row.registrySeller };
|
|
254
300
|
})
|
|
255
301
|
.filter((row) => row.entry.circuit !== "open")
|
|
302
|
+
.filter((row) => !isCapacityBlocked(row.entry, now))
|
|
256
303
|
.sort((a, b) => b.entry.healthScore - a.entry.healthScore)
|
|
257
304
|
.slice(0, limit);
|
|
258
305
|
|
|
@@ -279,7 +326,8 @@ export class SellerPool {
|
|
|
279
326
|
consecutiveFailures: 0,
|
|
280
327
|
recentFailures: [],
|
|
281
328
|
lastSuccessAt: now,
|
|
282
|
-
healthScore: Math.min(100, Math.max(entry.healthScore, 60))
|
|
329
|
+
healthScore: Math.min(100, Math.max(entry.healthScore, 60)),
|
|
330
|
+
capacityBlockedUntil: undefined
|
|
283
331
|
};
|
|
284
332
|
this.entries.set(sellerId, next);
|
|
285
333
|
this.creditTracker.recordSpend(sellerId, balanceMicros);
|
|
@@ -309,8 +357,11 @@ export class SellerPool {
|
|
|
309
357
|
return undefined;
|
|
310
358
|
}
|
|
311
359
|
const now = options.now ?? this.now();
|
|
312
|
-
const
|
|
313
|
-
const
|
|
360
|
+
const isBusyCapacity = kind === "busy_capacity";
|
|
361
|
+
const recentFailures = (
|
|
362
|
+
isBusyCapacity ? entry.recentFailures : [...entry.recentFailures, now]
|
|
363
|
+
).filter((ts) => ts >= now - this.windowMs);
|
|
364
|
+
const consecutiveFailures = isBusyCapacity ? entry.consecutiveFailures : entry.consecutiveFailures + 1;
|
|
314
365
|
const failureRate = recentFailures.length / Math.max(1, this.windowMs / 1000);
|
|
315
366
|
const overThreshold = consecutiveFailures >= this.failureThreshold;
|
|
316
367
|
const overRate = failureRate >= this.windowFailureRate;
|
|
@@ -321,7 +372,8 @@ export class SellerPool {
|
|
|
321
372
|
circuit,
|
|
322
373
|
consecutiveFailures,
|
|
323
374
|
recentFailures,
|
|
324
|
-
lastFailAt: now
|
|
375
|
+
lastFailAt: now,
|
|
376
|
+
capacityBlockedUntil: isBusyCapacity ? now + this.capacityBlockMs : entry.capacityBlockedUntil
|
|
325
377
|
};
|
|
326
378
|
this.entries.set(sellerId, next);
|
|
327
379
|
if (options.transferLeftover || isHard) {
|
|
@@ -335,6 +387,12 @@ export class SellerPool {
|
|
|
335
387
|
recentFailureRate: failureRate,
|
|
336
388
|
threshold: this.failureThreshold
|
|
337
389
|
});
|
|
390
|
+
} else if (isBusyCapacity) {
|
|
391
|
+
logger.warn("pool.capacity_blocked", "seller pool entry temporarily blocked by busy capacity", {
|
|
392
|
+
sellerId,
|
|
393
|
+
capacityBlockMs: this.capacityBlockMs,
|
|
394
|
+
blockedUntil: next.capacityBlockedUntil
|
|
395
|
+
});
|
|
338
396
|
}
|
|
339
397
|
return next;
|
|
340
398
|
}
|
|
@@ -397,6 +455,10 @@ export class SellerPool {
|
|
|
397
455
|
}
|
|
398
456
|
}
|
|
399
457
|
|
|
458
|
+
function isCapacityBlocked(entry: PoolEntry, now: number): boolean {
|
|
459
|
+
return Number.isFinite(entry.capacityBlockedUntil) && (entry.capacityBlockedUntil as number) > now;
|
|
460
|
+
}
|
|
461
|
+
|
|
400
462
|
function asResolution(resolved: { modelId: string; matched: boolean; sellers: RegistrySeller[]; missingModelsFlag: number }): ModelIndexResolution {
|
|
401
463
|
return {
|
|
402
464
|
modelId: resolved.modelId,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type
|
|
1
|
+
import { isBuyerVisibleRegistrySeller, type RegistrySeller } from "./seller-catalog.js";
|
|
2
2
|
import {
|
|
3
3
|
planSellerRoutes,
|
|
4
4
|
type RoutingCandidate,
|
|
@@ -24,10 +24,16 @@ export interface SellerRouteMetric {
|
|
|
24
24
|
healthScore?: number;
|
|
25
25
|
/** 平均延迟(毫秒),可选 */
|
|
26
26
|
avgLatencyMs?: number;
|
|
27
|
+
/** TTFT(毫秒),可选 */
|
|
28
|
+
ttftMs?: number;
|
|
29
|
+
/** 平均推理延迟(毫秒),可选 */
|
|
30
|
+
avgInferenceMs?: number;
|
|
27
31
|
/** 折扣系数(0-1),可选;缺省时 scoring 视为"无折扣信息" */
|
|
28
32
|
discountRatio?: number;
|
|
29
33
|
/** 当前熔断状态,可选;`open` 的 seller 直接被剔除候选 */
|
|
30
34
|
circuit?: SellerCircuitState;
|
|
35
|
+
/** 临时容量避让截止时间;大于当前时间时直接剔除候选 */
|
|
36
|
+
capacityBlockedUntil?: number;
|
|
31
37
|
}
|
|
32
38
|
|
|
33
39
|
/**
|
|
@@ -75,6 +81,8 @@ export interface SellerRoutePlannerInput {
|
|
|
75
81
|
prewarmCandidates?: SellerRoutePrewarmCandidate[];
|
|
76
82
|
/** seller 实时指标(可选;`circuit=open` 的 seller 被剔除) */
|
|
77
83
|
sellerMetrics?: SellerRouteMetric[];
|
|
84
|
+
/** 当前时间戳,用于判断容量避让窗口;默认 `Date.now()` */
|
|
85
|
+
now?: number;
|
|
78
86
|
}
|
|
79
87
|
|
|
80
88
|
/**
|
|
@@ -89,6 +97,8 @@ export interface PlannedSellerRoute {
|
|
|
89
97
|
metrics: {
|
|
90
98
|
healthScore?: number;
|
|
91
99
|
avgLatencyMs?: number;
|
|
100
|
+
ttftMs?: number;
|
|
101
|
+
avgInferenceMs?: number;
|
|
92
102
|
discountRatio?: number;
|
|
93
103
|
/** 在 registry 里的声明顺序(0-based,tie-breaker) */
|
|
94
104
|
registryOrder: number;
|
|
@@ -128,7 +138,7 @@ interface IndexedSeller {
|
|
|
128
138
|
|
|
129
139
|
interface MetricIndex {
|
|
130
140
|
bySellerId: Map<string, SellerRouteMetric>;
|
|
131
|
-
|
|
141
|
+
blockedSellerIds: Set<string>;
|
|
132
142
|
}
|
|
133
143
|
|
|
134
144
|
/**
|
|
@@ -145,7 +155,7 @@ interface MetricIndex {
|
|
|
145
155
|
*/
|
|
146
156
|
export function planSellerRouteSet(input: SellerRoutePlannerInput): SellerRoutePlan {
|
|
147
157
|
const indexed = indexRegistrySellers(input.registrySellers);
|
|
148
|
-
const metrics = indexMetrics(input.sellerMetrics);
|
|
158
|
+
const metrics = indexMetrics(input.sellerMetrics, input.now ?? Date.now());
|
|
149
159
|
const source = chooseCandidateSource(input, indexed, metrics);
|
|
150
160
|
const strategyPlan = planSellerRoutes(source.candidates, input.routing);
|
|
151
161
|
const routes = strategyPlan.routes.map((candidate) => {
|
|
@@ -159,6 +169,8 @@ export function planSellerRouteSet(input: SellerRoutePlannerInput): SellerRouteP
|
|
|
159
169
|
metrics: {
|
|
160
170
|
healthScore: candidate.healthScore,
|
|
161
171
|
avgLatencyMs: candidate.avgLatencyMs,
|
|
172
|
+
ttftMs: candidate.ttftMs,
|
|
173
|
+
avgInferenceMs: candidate.avgInferenceMs,
|
|
162
174
|
discountRatio: candidate.discountRatio,
|
|
163
175
|
registryOrder: candidate.registryOrder
|
|
164
176
|
}
|
|
@@ -189,7 +201,7 @@ function chooseCandidateSource(
|
|
|
189
201
|
if (!indexedSeller) {
|
|
190
202
|
return undefined;
|
|
191
203
|
}
|
|
192
|
-
if (metrics.
|
|
204
|
+
if (metrics.blockedSellerIds.has(indexedSeller.seller.id)) {
|
|
193
205
|
return undefined;
|
|
194
206
|
}
|
|
195
207
|
return buildCandidate({
|
|
@@ -217,7 +229,7 @@ function chooseCandidateSource(
|
|
|
217
229
|
source: "registry_fallback",
|
|
218
230
|
sourceReason: prewarm.length > 0 ? "prewarm_no_compatible_candidates" : "prewarm_missing",
|
|
219
231
|
candidates: indexed.ordered
|
|
220
|
-
.filter((entry) => !metrics.
|
|
232
|
+
.filter((entry) => !metrics.blockedSellerIds.has(entry.seller.id))
|
|
221
233
|
.map((entry) => buildCandidate({
|
|
222
234
|
seller: entry.seller,
|
|
223
235
|
registryOrder: entry.registryOrder,
|
|
@@ -246,6 +258,8 @@ function buildCandidate(input: {
|
|
|
246
258
|
supportsPayment: sellerSupportsPayment(input.seller, input.paymentMethod),
|
|
247
259
|
healthScore: input.metric?.healthScore,
|
|
248
260
|
avgLatencyMs: input.metric?.avgLatencyMs,
|
|
261
|
+
ttftMs: input.metric?.ttftMs,
|
|
262
|
+
avgInferenceMs: input.metric?.avgInferenceMs,
|
|
249
263
|
discountRatio: input.metric?.discountRatio,
|
|
250
264
|
registryOrder: input.registryOrder
|
|
251
265
|
};
|
|
@@ -261,6 +275,7 @@ function indexRegistrySellers(sellers: RegistrySeller[]): {
|
|
|
261
275
|
} {
|
|
262
276
|
const ordered = sellers
|
|
263
277
|
.filter((seller) => Boolean(seller?.id && seller.url))
|
|
278
|
+
.filter((seller) => isBuyerVisibleRegistrySeller(seller))
|
|
264
279
|
.map((seller, registryOrder) => ({ seller, registryOrder }));
|
|
265
280
|
return {
|
|
266
281
|
ordered,
|
|
@@ -268,15 +283,15 @@ function indexRegistrySellers(sellers: RegistrySeller[]): {
|
|
|
268
283
|
};
|
|
269
284
|
}
|
|
270
285
|
|
|
271
|
-
function indexMetrics(metrics: SellerRouteMetric[] | undefined): MetricIndex {
|
|
272
|
-
const
|
|
273
|
-
.filter((metric) => metric.circuit === "open")
|
|
286
|
+
function indexMetrics(metrics: SellerRouteMetric[] | undefined, now: number): MetricIndex {
|
|
287
|
+
const blockedSellerIds = new Set((metrics ?? [])
|
|
288
|
+
.filter((metric) => metric.circuit === "open" || isCapacityBlocked(metric, now))
|
|
274
289
|
.map((metric) => metric.sellerId));
|
|
275
290
|
return {
|
|
276
291
|
bySellerId: new Map((metrics ?? [])
|
|
277
|
-
.filter((metric) => metric.
|
|
292
|
+
.filter((metric) => !blockedSellerIds.has(metric.sellerId))
|
|
278
293
|
.map((metric) => [metric.sellerId, metric])),
|
|
279
|
-
|
|
294
|
+
blockedSellerIds
|
|
280
295
|
};
|
|
281
296
|
}
|
|
282
297
|
|
|
@@ -288,11 +303,18 @@ function mergeMetric(
|
|
|
288
303
|
sellerId: prewarm.sellerId,
|
|
289
304
|
healthScore: prewarm.healthScore ?? metric?.healthScore,
|
|
290
305
|
avgLatencyMs: prewarm.avgLatencyMs ?? metric?.avgLatencyMs,
|
|
306
|
+
ttftMs: metric?.ttftMs,
|
|
307
|
+
avgInferenceMs: metric?.avgInferenceMs,
|
|
291
308
|
discountRatio: metric?.discountRatio,
|
|
292
|
-
circuit: metric?.circuit
|
|
309
|
+
circuit: metric?.circuit,
|
|
310
|
+
capacityBlockedUntil: metric?.capacityBlockedUntil
|
|
293
311
|
};
|
|
294
312
|
}
|
|
295
313
|
|
|
314
|
+
function isCapacityBlocked(metric: SellerRouteMetric, now: number): boolean {
|
|
315
|
+
return Number.isFinite(metric.capacityBlockedUntil) && (metric.capacityBlockedUntil as number) > now;
|
|
316
|
+
}
|
|
317
|
+
|
|
296
318
|
function sellerSupportsModel(seller: RegistrySeller, modelId: string): boolean {
|
|
297
319
|
const normalized = normalizeLookupValue(modelId);
|
|
298
320
|
return (seller.models ?? []).some((model) => normalizeLookupValue(model) === normalized);
|
|
@@ -18,6 +18,8 @@ export interface BuyerSellerRoutingConfig extends SellerRoutingStrategyConfig {
|
|
|
18
18
|
mode: SellerRoutingMode;
|
|
19
19
|
/** 评分器:`speed` / `discount` / `balanced` */
|
|
20
20
|
scorer: SellerRoutingScorer;
|
|
21
|
+
/** fixed 模式下按模型固定 seller;缺省时回退到全局 sellerId。 */
|
|
22
|
+
fixedByModel?: Record<string, string>;
|
|
21
23
|
}
|
|
22
24
|
|
|
23
25
|
/**
|
|
@@ -53,6 +55,7 @@ export function normalizeSellerRoutingConfig(value: unknown): BuyerSellerRouting
|
|
|
53
55
|
return {
|
|
54
56
|
mode,
|
|
55
57
|
sellerId: readOptionalString(value.sellerId),
|
|
58
|
+
fixedByModel: readFixedByModel(value.fixedByModel),
|
|
56
59
|
scorer
|
|
57
60
|
};
|
|
58
61
|
}
|
|
@@ -151,7 +154,7 @@ export function parseSellerIdList(value: string): string[] {
|
|
|
151
154
|
* @throws 当 `fixedSet` 模式缺少 `sellerIds` 时
|
|
152
155
|
*/
|
|
153
156
|
export function assertSellerRoutingConfig(config: BuyerSellerRoutingConfig): void {
|
|
154
|
-
if (config.mode === "fixed" && !config.sellerId?.trim()) {
|
|
157
|
+
if (config.mode === "fixed" && !config.sellerId?.trim() && Object.keys(config.fixedByModel ?? {}).length === 0) {
|
|
155
158
|
throw new Error("fixed routing requires --seller <sellerId>");
|
|
156
159
|
}
|
|
157
160
|
if (config.mode === "fixedSet" && (!config.sellerIds || config.sellerIds.length === 0)) {
|
|
@@ -193,6 +196,16 @@ function readSellerIds(value: unknown): string[] {
|
|
|
193
196
|
return [];
|
|
194
197
|
}
|
|
195
198
|
|
|
199
|
+
function readFixedByModel(value: unknown): Record<string, string> | undefined {
|
|
200
|
+
if (!isObject(value)) {
|
|
201
|
+
return undefined;
|
|
202
|
+
}
|
|
203
|
+
const entries = Object.entries(value)
|
|
204
|
+
.map(([modelId, sellerId]) => [modelId.trim(), typeof sellerId === "string" ? sellerId.trim() : ""] as const)
|
|
205
|
+
.filter(([modelId, sellerId]) => modelId.length > 0 && sellerId.length > 0);
|
|
206
|
+
return entries.length > 0 ? Object.fromEntries(entries) : undefined;
|
|
207
|
+
}
|
|
208
|
+
|
|
196
209
|
function isObject(value: unknown): value is Record<string, unknown> {
|
|
197
210
|
return Boolean(value && typeof value === "object" && !Array.isArray(value));
|
|
198
211
|
}
|
|
Binary file
|