@tokenbuddy/tokenbuddy 1.0.34 → 1.0.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +16 -0
- package/dist/src/buyer-store.d.ts.map +1 -1
- package/dist/src/buyer-store.js +29 -1
- package/dist/src/buyer-store.js.map +1 -1
- package/dist/src/daemon.d.ts +13 -0
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +121 -9
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/provider-routing-config.d.ts +1 -1
- package/dist/src/provider-routing-config.d.ts.map +1 -1
- package/dist/src/provider-routing-config.js +1 -1
- package/dist/src/provider-routing-config.js.map +1 -1
- package/dist/src/seller-catalog.d.ts +1 -1
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-routing-strategy.d.ts +15 -0
- package/dist/src/seller-routing-strategy.d.ts.map +1 -1
- package/dist/src/seller-routing-strategy.js +41 -0
- package/dist/src/seller-routing-strategy.js.map +1 -1
- package/package.json +2 -2
- package/src/buyer-store.ts +61 -1
- package/src/daemon.ts +136 -9
- package/src/provider-routing-config.ts +2 -2
- package/src/seller-catalog.ts +1 -1
- package/src/seller-routing-strategy.ts +41 -0
- package/static/ui/assets/index-BVbeDEwq.js +271 -0
- package/static/ui/assets/index-BVbeDEwq.js.map +1 -0
- package/static/ui/index.html +1 -1
- package/tests/daemon-classify.test.ts +8 -0
- package/tests/image-generation-e2e.test.ts +230 -0
- package/static/ui/assets/index-Mt3BZFuP.js +0 -266
- package/static/ui/assets/index-Mt3BZFuP.js.map +0 -1
package/src/daemon.ts
CHANGED
|
@@ -313,6 +313,10 @@ interface UsageSummary {
|
|
|
313
313
|
completionTokens: number;
|
|
314
314
|
cacheReadTokens: number;
|
|
315
315
|
billedMicros: number;
|
|
316
|
+
imageCount?: number;
|
|
317
|
+
imageSize?: string;
|
|
318
|
+
imageQuality?: string;
|
|
319
|
+
imageOutputFormat?: string;
|
|
316
320
|
}
|
|
317
321
|
|
|
318
322
|
interface ProxyBodySummary {
|
|
@@ -365,6 +369,7 @@ interface SellerSettlementSummary {
|
|
|
365
369
|
}
|
|
366
370
|
|
|
367
371
|
interface BillingBreakdownSummary {
|
|
372
|
+
billingUnit?: "tokens" | "images";
|
|
368
373
|
inputPriceMicrosPer1m: number;
|
|
369
374
|
outputPriceMicrosPer1m: number;
|
|
370
375
|
cacheReadPriceMicrosPer1m: number;
|
|
@@ -374,6 +379,13 @@ interface BillingBreakdownSummary {
|
|
|
374
379
|
originalUsdMicros: number;
|
|
375
380
|
billingMultiplier: number;
|
|
376
381
|
serviceTier?: string;
|
|
382
|
+
imageCount?: number;
|
|
383
|
+
imageSize?: string;
|
|
384
|
+
imageQuality?: string;
|
|
385
|
+
imageOutputFormat?: string;
|
|
386
|
+
imageOutputTokens?: number;
|
|
387
|
+
imageOutputCostMicros?: number;
|
|
388
|
+
imageCostMicrosPerImage?: number;
|
|
377
389
|
}
|
|
378
390
|
|
|
379
391
|
interface SellerAttemptRequestContext {
|
|
@@ -424,6 +436,17 @@ function safeBillingServiceTier(value: unknown): string | undefined {
|
|
|
424
436
|
return /^[A-Za-z0-9 _.-]+$/.test(trimmed) ? trimmed : undefined;
|
|
425
437
|
}
|
|
426
438
|
|
|
439
|
+
function safeBillingUnit(value: unknown): "tokens" | "images" | undefined {
|
|
440
|
+
return value === "tokens" || value === "images" ? value : undefined;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
function safeShortDisplayString(value: unknown): string | undefined {
|
|
444
|
+
if (typeof value !== "string") return undefined;
|
|
445
|
+
const trimmed = value.trim();
|
|
446
|
+
if (trimmed.length === 0 || trimmed.length > 80) return undefined;
|
|
447
|
+
return /^[A-Za-z0-9 _./:-]+$/.test(trimmed) ? trimmed : undefined;
|
|
448
|
+
}
|
|
449
|
+
|
|
427
450
|
function billingBreakdownSummary(value: unknown): BillingBreakdownSummary | undefined {
|
|
428
451
|
const data = usageRecord(value);
|
|
429
452
|
if (!data) return undefined;
|
|
@@ -448,6 +471,7 @@ function billingBreakdownSummary(value: unknown): BillingBreakdownSummary | unde
|
|
|
448
471
|
return undefined;
|
|
449
472
|
}
|
|
450
473
|
return {
|
|
474
|
+
billingUnit: safeBillingUnit(data.billingUnit ?? data.billing_unit),
|
|
451
475
|
inputPriceMicrosPer1m,
|
|
452
476
|
outputPriceMicrosPer1m,
|
|
453
477
|
cacheReadPriceMicrosPer1m,
|
|
@@ -456,10 +480,36 @@ function billingBreakdownSummary(value: unknown): BillingBreakdownSummary | unde
|
|
|
456
480
|
cacheReadCostMicros,
|
|
457
481
|
originalUsdMicros,
|
|
458
482
|
billingMultiplier,
|
|
459
|
-
serviceTier: safeBillingServiceTier(data.serviceTier ?? data.service_tier)
|
|
483
|
+
serviceTier: safeBillingServiceTier(data.serviceTier ?? data.service_tier),
|
|
484
|
+
imageCount: nonNegativeIntegerField(data.imageCount ?? data.image_count),
|
|
485
|
+
imageSize: safeShortDisplayString(data.imageSize ?? data.image_size),
|
|
486
|
+
imageQuality: safeShortDisplayString(data.imageQuality ?? data.image_quality),
|
|
487
|
+
imageOutputFormat: safeShortDisplayString(data.imageOutputFormat ?? data.image_output_format),
|
|
488
|
+
imageOutputTokens: nonNegativeIntegerField(data.imageOutputTokens ?? data.image_output_tokens),
|
|
489
|
+
imageOutputCostMicros: nonNegativeIntegerField(data.imageOutputCostMicros ?? data.image_output_cost_micros),
|
|
490
|
+
imageCostMicrosPerImage: nonNegativeIntegerField(data.imageCostMicrosPerImage ?? data.image_cost_micros_per_image)
|
|
460
491
|
};
|
|
461
492
|
}
|
|
462
493
|
|
|
494
|
+
function imageUsageMetadata(responseBody: Record<string, unknown> | undefined, requestBody: Record<string, unknown> | undefined): Partial<UsageSummary> {
|
|
495
|
+
const responseImages = Array.isArray(responseBody?.data) ? responseBody.data : undefined;
|
|
496
|
+
const firstImage = responseImages?.find((item) => item && typeof item === "object") as Record<string, unknown> | undefined;
|
|
497
|
+
const requestedCount = nonNegativeIntegerField(requestBody?.n);
|
|
498
|
+
return {
|
|
499
|
+
imageCount: responseImages ? responseImages.length : requestedCount && requestedCount > 0 ? requestedCount : undefined,
|
|
500
|
+
imageSize: safeShortDisplayString(responseBody?.size) ?? safeShortDisplayString(firstImage?.size) ?? safeShortDisplayString(requestBody?.size),
|
|
501
|
+
imageQuality: safeShortDisplayString(responseBody?.quality) ?? safeShortDisplayString(firstImage?.quality) ?? safeShortDisplayString(requestBody?.quality),
|
|
502
|
+
imageOutputFormat: safeShortDisplayString(responseBody?.output_format) ?? safeShortDisplayString(firstImage?.output_format) ?? safeShortDisplayString(requestBody?.output_format)
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
function imageCostMicrosPerImage(totalMicros: number | undefined, imageCount: number | undefined): number | undefined {
|
|
507
|
+
if (totalMicros === undefined || imageCount === undefined || imageCount <= 0) {
|
|
508
|
+
return undefined;
|
|
509
|
+
}
|
|
510
|
+
return Math.ceil(totalMicros / imageCount);
|
|
511
|
+
}
|
|
512
|
+
|
|
463
513
|
function purchasePaymentSummaryFromQuote(value: unknown): PurchasePaymentSummary {
|
|
464
514
|
const quote = usageRecord(value);
|
|
465
515
|
if (!quote) return {};
|
|
@@ -860,7 +910,16 @@ export class TokenbuddyDaemon {
|
|
|
860
910
|
const bundledDir = this.bundledClawtipStaticDir();
|
|
861
911
|
const rechargeSourcePath = bundledDir ? path.join(bundledDir, CLAWTIP_RECHARGE_QR_FILE) : undefined;
|
|
862
912
|
if (rechargeSourcePath && fs.existsSync(rechargeSourcePath)) {
|
|
863
|
-
fs.
|
|
913
|
+
fs.mkdirSync(path.dirname(rechargeOutputPath), { recursive: true });
|
|
914
|
+
try {
|
|
915
|
+
fs.copyFileSync(rechargeSourcePath, rechargeOutputPath);
|
|
916
|
+
} catch (err) {
|
|
917
|
+
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
|
918
|
+
throw err;
|
|
919
|
+
}
|
|
920
|
+
fs.mkdirSync(path.dirname(rechargeOutputPath), { recursive: true });
|
|
921
|
+
fs.copyFileSync(rechargeSourcePath, rechargeOutputPath);
|
|
922
|
+
}
|
|
864
923
|
}
|
|
865
924
|
}
|
|
866
925
|
|
|
@@ -1319,6 +1378,19 @@ export class TokenbuddyDaemon {
|
|
|
1319
1378
|
});
|
|
1320
1379
|
}
|
|
1321
1380
|
|
|
1381
|
+
/**
|
|
1382
|
+
* 将 AutoProviderConfig 转换为 BuyerSellerRoutingConfig。
|
|
1383
|
+
*
|
|
1384
|
+
* 映射规则:
|
|
1385
|
+
* - `recommended` 模式 → `fullAuto`:使用全部 registry sellers,由评分器自动排序
|
|
1386
|
+
* - `custom` 模式 → `fixedSet`:使用 `config.sellerIds` 白名单内的 sellers
|
|
1387
|
+
*
|
|
1388
|
+
* 注意:`config.modelIds` 不直接用于路由过滤,而是通过 `applyFocusSet()` 设置焦点集合,
|
|
1389
|
+
* 影响 UI 推荐和初始化向导,但不限制实际路由决策。
|
|
1390
|
+
*
|
|
1391
|
+
* @param config Auto provider 配置
|
|
1392
|
+
* @returns Seller 路由配置
|
|
1393
|
+
*/
|
|
1322
1394
|
private applyAutoProviderRoutingConfig(config: AutoProviderConfig): BuyerSellerRoutingConfig {
|
|
1323
1395
|
const routing: BuyerSellerRoutingConfig = config.range === "custom"
|
|
1324
1396
|
? {
|
|
@@ -1340,7 +1412,19 @@ export class TokenbuddyDaemon {
|
|
|
1340
1412
|
}
|
|
1341
1413
|
|
|
1342
1414
|
private autoProviderCanRoute(config: AutoProviderConfig): boolean {
|
|
1343
|
-
|
|
1415
|
+
if (!config.enabled) {
|
|
1416
|
+
logger.warn("route.auto_provider.disabled", "auto provider is disabled in config");
|
|
1417
|
+
return false;
|
|
1418
|
+
}
|
|
1419
|
+
if (config.range === "custom" && config.sellerIds.length === 0) {
|
|
1420
|
+
logger.warn("route.auto_provider.custom_empty", "auto provider custom mode requires sellerIds", {
|
|
1421
|
+
range: config.range,
|
|
1422
|
+
modelIds: config.modelIds,
|
|
1423
|
+
scorer: config.scorer
|
|
1424
|
+
});
|
|
1425
|
+
return false;
|
|
1426
|
+
}
|
|
1427
|
+
return true;
|
|
1344
1428
|
}
|
|
1345
1429
|
|
|
1346
1430
|
private providerModePayload(): Record<string, unknown> {
|
|
@@ -1627,6 +1711,9 @@ export class TokenbuddyDaemon {
|
|
|
1627
1711
|
if (endpoint === "/v1/messages" || endpoint === "/messages") {
|
|
1628
1712
|
return "messages";
|
|
1629
1713
|
}
|
|
1714
|
+
if (endpoint === "/v1/images/generations") {
|
|
1715
|
+
return "images_generations";
|
|
1716
|
+
}
|
|
1630
1717
|
return undefined;
|
|
1631
1718
|
}
|
|
1632
1719
|
|
|
@@ -1761,6 +1848,20 @@ export class TokenbuddyDaemon {
|
|
|
1761
1848
|
planReason: reason,
|
|
1762
1849
|
planSellerCount: providers.length
|
|
1763
1850
|
}));
|
|
1851
|
+
|
|
1852
|
+
// Log manual provider routing decision for auditability
|
|
1853
|
+
logger.info("route.manual.decision", "manual provider routing decision", {
|
|
1854
|
+
policy: config.routing.policy,
|
|
1855
|
+
selectedProviderId: providers[0].id,
|
|
1856
|
+
selectedProviderName: providers[0].name,
|
|
1857
|
+
totalCandidates: providers.length,
|
|
1858
|
+
candidateProviderIds: providers.map((p) => p.id),
|
|
1859
|
+
endpoint,
|
|
1860
|
+
modelId,
|
|
1861
|
+
protocol,
|
|
1862
|
+
reason
|
|
1863
|
+
});
|
|
1864
|
+
|
|
1764
1865
|
return {
|
|
1765
1866
|
routes,
|
|
1766
1867
|
paymentMethod: "provider_key",
|
|
@@ -1842,11 +1943,15 @@ export class TokenbuddyDaemon {
|
|
|
1842
1943
|
selectionMode: this.selectionMode,
|
|
1843
1944
|
sellerRoutingMode: routing.mode,
|
|
1844
1945
|
sellerRoutingScorer: routing.scorer,
|
|
1946
|
+
autoProviderRange: this.currentAutoProviderConfig().range,
|
|
1947
|
+
autoProviderCustomSellerIds: this.currentAutoProviderConfig().range === "custom" ? this.currentAutoProviderConfig().sellerIds : undefined,
|
|
1845
1948
|
routeSource: planned.source,
|
|
1846
1949
|
routeSourceReason: planned.sourceReason,
|
|
1847
1950
|
routeReason: planned.reason,
|
|
1848
1951
|
candidateDiagnostics: planned.diagnostics,
|
|
1849
1952
|
sellerCount: planned.routes.length,
|
|
1953
|
+
selectedSellerId: planned.routes[0]?.seller.id,
|
|
1954
|
+
selectedSellerName: planned.routes[0]?.seller.name,
|
|
1850
1955
|
sellers: planned.routes.map((route) => route.seller.id)
|
|
1851
1956
|
});
|
|
1852
1957
|
|
|
@@ -2251,7 +2356,7 @@ export class TokenbuddyDaemon {
|
|
|
2251
2356
|
};
|
|
2252
2357
|
}
|
|
2253
2358
|
|
|
2254
|
-
private readUsage(bodyText: string): UsageSummary {
|
|
2359
|
+
private readUsage(bodyText: string, endpoint?: string, requestBody?: unknown): UsageSummary {
|
|
2255
2360
|
const fallback: UsageSummary = {
|
|
2256
2361
|
promptTokens: 0,
|
|
2257
2362
|
completionTokens: 0,
|
|
@@ -2273,11 +2378,15 @@ export class TokenbuddyDaemon {
|
|
|
2273
2378
|
?? nonNegativeIntegerField(usage?.cache_read_input_tokens)
|
|
2274
2379
|
?? nonNegativeIntegerField(usage?.cache_read_tokens)
|
|
2275
2380
|
?? 0;
|
|
2381
|
+
const imageMetadata = endpoint === "/v1/images/generations"
|
|
2382
|
+
? imageUsageMetadata(data, usageRecord(requestBody))
|
|
2383
|
+
: {};
|
|
2276
2384
|
return {
|
|
2277
2385
|
promptTokens,
|
|
2278
2386
|
completionTokens,
|
|
2279
2387
|
cacheReadTokens,
|
|
2280
|
-
billedMicros: (promptTokens + completionTokens) * 4
|
|
2388
|
+
billedMicros: (promptTokens + completionTokens) * 4,
|
|
2389
|
+
...imageMetadata
|
|
2281
2390
|
};
|
|
2282
2391
|
} catch {
|
|
2283
2392
|
return fallback;
|
|
@@ -2349,6 +2458,14 @@ export class TokenbuddyDaemon {
|
|
|
2349
2458
|
originalUsdMicros: billingBreakdown?.originalUsdMicros,
|
|
2350
2459
|
billingMultiplier: billingBreakdown?.billingMultiplier,
|
|
2351
2460
|
serviceTier: billingBreakdown?.serviceTier,
|
|
2461
|
+
billingUnit: billingBreakdown?.billingUnit ?? (endpoint === "/v1/images/generations" ? "images" : "tokens"),
|
|
2462
|
+
imageCount: billingBreakdown?.imageCount ?? usage.imageCount,
|
|
2463
|
+
imageSize: billingBreakdown?.imageSize ?? usage.imageSize,
|
|
2464
|
+
imageQuality: billingBreakdown?.imageQuality ?? usage.imageQuality,
|
|
2465
|
+
imageOutputFormat: billingBreakdown?.imageOutputFormat ?? usage.imageOutputFormat,
|
|
2466
|
+
imageOutputTokens: billingBreakdown?.imageOutputTokens ?? (endpoint === "/v1/images/generations" ? usage.completionTokens : undefined),
|
|
2467
|
+
imageOutputCostMicros: billingBreakdown?.imageOutputCostMicros ?? (endpoint === "/v1/images/generations" ? billingBreakdown?.outputCostMicros : undefined),
|
|
2468
|
+
imageCostMicrosPerImage: billingBreakdown?.imageCostMicrosPerImage ?? imageCostMicrosPerImage(settledMicros ?? usage.billedMicros, billingBreakdown?.imageCount ?? usage.imageCount),
|
|
2352
2469
|
balanceSnapshotMicros: settlement?.remainingCreditMicros,
|
|
2353
2470
|
balanceSource: settlement ? "seller_authoritative" : "estimated",
|
|
2354
2471
|
prompt,
|
|
@@ -2371,6 +2488,8 @@ export class TokenbuddyDaemon {
|
|
|
2371
2488
|
settledMicros,
|
|
2372
2489
|
settledUsdMicros: settlement?.settledUsdMicros,
|
|
2373
2490
|
billedMicros: settledMicros ?? usage.billedMicros,
|
|
2491
|
+
billingUnit: billingBreakdown?.billingUnit ?? (endpoint === "/v1/images/generations" ? "images" : "tokens"),
|
|
2492
|
+
imageCount: billingBreakdown?.imageCount ?? usage.imageCount,
|
|
2374
2493
|
promptTokens: usage.promptTokens,
|
|
2375
2494
|
completionTokens: usage.completionTokens,
|
|
2376
2495
|
cacheReadTokens: usage.cacheReadTokens,
|
|
@@ -3192,6 +3311,7 @@ export class TokenbuddyDaemon {
|
|
|
3192
3311
|
billedMicros: Math.max(1, bytes),
|
|
3193
3312
|
estimatedMicros: Math.max(1, bytes),
|
|
3194
3313
|
priceVersion: `local-provider:${provider.id}`,
|
|
3314
|
+
billingUnit: endpoint === "/v1/images/generations" ? "images" : "tokens",
|
|
3195
3315
|
balanceSource: "self_funded_provider",
|
|
3196
3316
|
prompt: this.inferPromptForHash(reqBody),
|
|
3197
3317
|
ttftMs,
|
|
@@ -3208,7 +3328,7 @@ export class TokenbuddyDaemon {
|
|
|
3208
3328
|
const responseBody = await response.text();
|
|
3209
3329
|
markFirstByte();
|
|
3210
3330
|
res.send(responseBody);
|
|
3211
|
-
const usage = this.readUsage(responseBody);
|
|
3331
|
+
const usage = this.readUsage(responseBody, endpoint, reqBody);
|
|
3212
3332
|
const durationMs = Date.now() - startedAt;
|
|
3213
3333
|
const ttftMs = Date.now() - attemptStartedAt;
|
|
3214
3334
|
const completionTokens = usage.completionTokens;
|
|
@@ -3232,6 +3352,13 @@ export class TokenbuddyDaemon {
|
|
|
3232
3352
|
billedMicros: usage.billedMicros,
|
|
3233
3353
|
estimatedMicros: usage.billedMicros,
|
|
3234
3354
|
priceVersion: `local-provider:${provider.id}`,
|
|
3355
|
+
billingUnit: endpoint === "/v1/images/generations" ? "images" : "tokens",
|
|
3356
|
+
imageCount: usage.imageCount,
|
|
3357
|
+
imageSize: usage.imageSize,
|
|
3358
|
+
imageQuality: usage.imageQuality,
|
|
3359
|
+
imageOutputFormat: usage.imageOutputFormat,
|
|
3360
|
+
imageOutputTokens: endpoint === "/v1/images/generations" ? usage.completionTokens : undefined,
|
|
3361
|
+
imageCostMicrosPerImage: imageCostMicrosPerImage(usage.billedMicros, usage.imageCount),
|
|
3235
3362
|
balanceSource: "self_funded_provider",
|
|
3236
3363
|
prompt: this.inferPromptForHash(reqBody),
|
|
3237
3364
|
response: responseBody,
|
|
@@ -3644,7 +3771,7 @@ export class TokenbuddyDaemon {
|
|
|
3644
3771
|
lease.refresh();
|
|
3645
3772
|
markFirstByte();
|
|
3646
3773
|
res.send(responseBody);
|
|
3647
|
-
const usage = this.readUsage(responseBody);
|
|
3774
|
+
const usage = this.readUsage(responseBody, endpoint, body);
|
|
3648
3775
|
void this.refreshSellerRuntimeMetrics(route, requestId);
|
|
3649
3776
|
this.recordReconciledInference(
|
|
3650
3777
|
route,
|
|
@@ -4857,7 +4984,7 @@ export class TokenbuddyDaemon {
|
|
|
4857
4984
|
}
|
|
4858
4985
|
});
|
|
4859
4986
|
|
|
4860
|
-
for (const endpoint of ["/v1/chat/completions", "/v1/responses", "/v1/messages", "/messages"]) {
|
|
4987
|
+
for (const endpoint of ["/v1/chat/completions", "/v1/responses", "/v1/messages", "/messages", "/v1/images/generations"]) {
|
|
4861
4988
|
proxyApp.post(endpoint, async (req: Request, res: Response) => {
|
|
4862
4989
|
await this.forwardProxyRequest(endpoint, req, res);
|
|
4863
4990
|
});
|
|
@@ -5014,7 +5141,7 @@ export class TokenbuddyDaemon {
|
|
|
5014
5141
|
}
|
|
5015
5142
|
|
|
5016
5143
|
private resolvePrewarmProtocol(modelId: string, paymentMethod = "clawtip"): string | undefined {
|
|
5017
|
-
for (const protocol of ["chat_completions", "messages", "responses"]) {
|
|
5144
|
+
for (const protocol of ["chat_completions", "messages", "responses", "images_generations"]) {
|
|
5018
5145
|
if (this.modelIndex.sellersFor(modelId, { protocol, paymentMethod }).length > 0) {
|
|
5019
5146
|
return protocol;
|
|
5020
5147
|
}
|
|
@@ -7,7 +7,7 @@ export const MANUAL_PROVIDER_OBSERVATIONS_CONFIG_KEY = "manual-provider-observat
|
|
|
7
7
|
|
|
8
8
|
export type ProviderMode = "manual" | "auto";
|
|
9
9
|
export type ManualProviderKind = "openai-compatible";
|
|
10
|
-
export type ProviderProtocol = "chat_completions" | "responses" | "messages";
|
|
10
|
+
export type ProviderProtocol = "chat_completions" | "responses" | "messages" | "images_generations";
|
|
11
11
|
export type AutoProviderRange = "recommended" | "custom";
|
|
12
12
|
export type ManualProviderRoutingPolicy = "fallback" | "locked";
|
|
13
13
|
|
|
@@ -85,7 +85,7 @@ export interface ManualProviderObservationsConfig {
|
|
|
85
85
|
updatedAt: string;
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
-
const VALID_PROTOCOLS = new Set<ProviderProtocol>(["chat_completions", "responses", "messages"]);
|
|
88
|
+
const VALID_PROTOCOLS = new Set<ProviderProtocol>(["chat_completions", "responses", "messages", "images_generations"]);
|
|
89
89
|
const VALID_SCORERS = new Set<SellerRoutingScorer>(["balanced", "speed", "discount"]);
|
|
90
90
|
|
|
91
91
|
export function defaultProviderModeConfig(now = new Date().toISOString()): ProviderModeConfig {
|
package/src/seller-catalog.ts
CHANGED
|
@@ -11,7 +11,7 @@ const logger = createModuleLogger("tb-proxyd");
|
|
|
11
11
|
/**
|
|
12
12
|
* buyer 端协议偏好(用于按协议过滤 catalog)。`messages` 是 anthropic 协议的简称。
|
|
13
13
|
*/
|
|
14
|
-
export type ProtocolPreference = "chat_completions" | "responses" | "messages";
|
|
14
|
+
export type ProtocolPreference = "chat_completions" | "responses" | "messages" | "images_generations";
|
|
15
15
|
|
|
16
16
|
/**
|
|
17
17
|
* wallet-bootstrap `/registry/sellers` 里的 seller 描述。
|
|
@@ -221,6 +221,21 @@ function compareCandidates(a: SortableCandidate, b: SortableCandidate, scorer: S
|
|
|
221
221
|
* 计算单个 candidate 在指定 scorer 下的完整打分拆解(含各维度分量和缺失项)。
|
|
222
222
|
* 不会修改输入 candidate,常用于 doctor 面板和调试日志。
|
|
223
223
|
*
|
|
224
|
+
* 权重设计理由:
|
|
225
|
+
* - **Speed 评分器**:TTFT 65% + Tok/s 25% + Health 10%
|
|
226
|
+
* - TTFT 占主导是因为首 token 延迟直接影响用户感知的响应速度
|
|
227
|
+
* - Tok/s 次要,因为大多数场景下吞吐差异不如延迟显著
|
|
228
|
+
* - Health 最低,只作为平局时的兜底
|
|
229
|
+
*
|
|
230
|
+
* - **Discount 评分器**:折扣 100%
|
|
231
|
+
* - 纯成本优先,忽略性能指标
|
|
232
|
+
* - 平局时按健康分和注册顺序兜底
|
|
233
|
+
*
|
|
234
|
+
* - **Balanced 评分器**:Health 35% + TTFT 20% + Tok/s 20% + Discount 25%
|
|
235
|
+
* - Health 占比最高是为了避免选择不稳定的 seller
|
|
236
|
+
* - 速度(TTFT + Tok/s)合计 40%,与折扣 25% 形成平衡
|
|
237
|
+
* - 这是默认评分器,适合大多数生产场景
|
|
238
|
+
*
|
|
224
239
|
* @param candidate 待打分的候选
|
|
225
240
|
* @param scorer 评分器:`speed` / `discount` / `balanced`
|
|
226
241
|
* @returns 打分拆解
|
|
@@ -266,6 +281,19 @@ export function scoreCandidateBreakdown(candidate: RoutingCandidate, scorer: Sel
|
|
|
266
281
|
};
|
|
267
282
|
}
|
|
268
283
|
|
|
284
|
+
/**
|
|
285
|
+
* 将延迟(毫秒)转换为 0-100 分数。
|
|
286
|
+
* 公式:`100 - latency_ms / 10`
|
|
287
|
+
*
|
|
288
|
+
* 设计理由:
|
|
289
|
+
* - 0ms → 100分,1000ms → 0分(线性递减)
|
|
290
|
+
* - 除以 10 的比例来自经验:100ms 是"优秀"延迟,1000ms 是"不可接受"延迟
|
|
291
|
+
* - 线性公式的局限:100ms vs 200ms 的差异被等同于 900ms vs 1000ms
|
|
292
|
+
* 实际用户体验可能不是线性的,未来可考虑对数或分段函数
|
|
293
|
+
*
|
|
294
|
+
* @param latencyMs 延迟(毫秒),undefined 或非有限值返回 0 分
|
|
295
|
+
* @returns 0-100 分数
|
|
296
|
+
*/
|
|
269
297
|
function latencyScore(latencyMs: number | undefined): number {
|
|
270
298
|
if (!Number.isFinite(latencyMs)) {
|
|
271
299
|
return 0;
|
|
@@ -273,6 +301,19 @@ function latencyScore(latencyMs: number | undefined): number {
|
|
|
273
301
|
return Math.max(0, 100 - Math.max(0, latencyMs as number) / 10);
|
|
274
302
|
}
|
|
275
303
|
|
|
304
|
+
/**
|
|
305
|
+
* 将输出吞吐(tokens/s)转换为 0-100 分数。
|
|
306
|
+
* 公式:直接取值并限制在 0-100 范围内
|
|
307
|
+
*
|
|
308
|
+
* 设计理由:
|
|
309
|
+
* - 假设大多数 seller 的吞吐在 0-100 tok/s 范围内
|
|
310
|
+
* - 100 tok/s 及以上都得满分(上界问题)
|
|
311
|
+
* - 这种线性映射的局限:无法区分 100 tok/s 和 200 tok/s 的差异
|
|
312
|
+
* 未来可考虑对数缩放,例如 `100 * log(1 + value) / log(101)`
|
|
313
|
+
*
|
|
314
|
+
* @param value 吞吐(tokens/s),undefined 或非有限值返回 0 分
|
|
315
|
+
* @returns 0-100 分数
|
|
316
|
+
*/
|
|
276
317
|
function tokensPerSecondScore(value: number | undefined): number {
|
|
277
318
|
if (!Number.isFinite(value)) {
|
|
278
319
|
return 0;
|