@tokenbuddy/tokenbuddy 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/src/daemon.d.ts +11 -4
  2. package/dist/src/daemon.d.ts.map +1 -1
  3. package/dist/src/daemon.js +130 -42
  4. package/dist/src/daemon.js.map +1 -1
  5. package/dist/src/doctor-diagnostics.d.ts.map +1 -1
  6. package/dist/src/doctor-diagnostics.js +7 -1
  7. package/dist/src/doctor-diagnostics.js.map +1 -1
  8. package/dist/src/prewarm-cache.d.ts +4 -0
  9. package/dist/src/prewarm-cache.d.ts.map +1 -1
  10. package/dist/src/prewarm-cache.js +1 -0
  11. package/dist/src/prewarm-cache.js.map +1 -1
  12. package/dist/src/prewarm-scheduler.d.ts +2 -0
  13. package/dist/src/prewarm-scheduler.d.ts.map +1 -1
  14. package/dist/src/prewarm-scheduler.js +4 -1
  15. package/dist/src/prewarm-scheduler.js.map +1 -1
  16. package/dist/src/provider-install.d.ts.map +1 -1
  17. package/dist/src/provider-install.js +196 -18
  18. package/dist/src/provider-install.js.map +1 -1
  19. package/dist/src/seller-catalog.d.ts +4 -0
  20. package/dist/src/seller-catalog.d.ts.map +1 -1
  21. package/dist/src/seller-catalog.js.map +1 -1
  22. package/dist/src/seller-pool.d.ts +13 -0
  23. package/dist/src/seller-pool.d.ts.map +1 -1
  24. package/dist/src/seller-pool.js +43 -2
  25. package/dist/src/seller-pool.js.map +1 -1
  26. package/dist/src/seller-route-planner.d.ts +9 -0
  27. package/dist/src/seller-route-planner.d.ts.map +1 -1
  28. package/dist/src/seller-route-planner.js +39 -15
  29. package/dist/src/seller-route-planner.js.map +1 -1
  30. package/dist/src/seller-routing-strategy.d.ts +6 -4
  31. package/dist/src/seller-routing-strategy.d.ts.map +1 -1
  32. package/dist/src/seller-routing-strategy.js +15 -12
  33. package/dist/src/seller-routing-strategy.js.map +1 -1
  34. package/dist/src/terminal-detect.d.ts +5 -5
  35. package/dist/src/terminal-detect.d.ts.map +1 -1
  36. package/dist/src/terminal-detect.js +79 -26
  37. package/dist/src/terminal-detect.js.map +1 -1
  38. package/package.json +1 -1
  39. package/src/daemon.ts +168 -46
  40. package/src/doctor-diagnostics.ts +5 -1
  41. package/src/prewarm-cache.ts +5 -0
  42. package/src/prewarm-scheduler.ts +6 -1
  43. package/src/provider-install.ts +203 -18
  44. package/src/seller-catalog.ts +4 -0
  45. package/src/seller-pool.ts +68 -2
  46. package/src/seller-route-planner.ts +61 -15
  47. package/src/seller-routing-strategy.ts +21 -16
  48. package/src/terminal-detect.ts +81 -24
  49. package/static/ui/assets/index-DEDEl8o2.js +236 -0
  50. package/static/ui/assets/{index-UAfOhbwC.js.map → index-DEDEl8o2.js.map} +1 -1
  51. package/static/ui/index.html +1 -1
  52. package/tests/control-plane-ui-endpoints.test.ts +73 -0
  53. package/tests/seller-pool.test.ts +55 -0
  54. package/tests/seller-route-planner.test.ts +45 -1
  55. package/tests/seller-routing-strategy.test.ts +6 -5
  56. package/tests/tokenbuddy.test.ts +346 -38
  57. package/static/ui/assets/index-UAfOhbwC.js +0 -236
package/src/daemon.ts CHANGED
@@ -36,7 +36,9 @@ import {
36
36
  isBuyerVisibleRegistrySeller,
37
37
  normalizeSellerUrl,
38
38
  RegistryTooLargeError,
39
+ type ModelCatalogEntry,
39
40
  type RegistrySeller,
41
+ type SellerCatalogEntry,
40
42
  type SellerManifest,
41
43
  type SellerRegistryDocument,
42
44
  type SellerRegistryTrustMetadata,
@@ -49,9 +51,10 @@ import { SellerPool, type FailureKind } from "./seller-pool.js";
49
51
  import { RouteFailover, type FailoverDecision, type RouteCandidate } from "./route-failover.js";
50
52
  import { PrewarmScheduler, type PrewarmReason, type SellerProber } from "./prewarm-scheduler.js";
51
53
  import { SellerConcurrencyLimiter, type SellerConcurrencyLimiterOptions } from "./seller-concurrency-limiter.js";
54
+ import { SellerMetadataCache } from "./seller-metadata-cache.js";
52
55
  import type { PoolEntry } from "./seller-pool.js";
53
56
  import { planSellerRouteSet } from "./seller-route-planner.js";
54
- import type { SellerRoutePlan } from "./seller-route-planner.js";
57
+ import type { SellerRouteMetric, SellerRoutePlan } from "./seller-route-planner.js";
55
58
  import {
56
59
  assertSellerRoutingConfig,
57
60
  mergeSellerRoutingConfig,
@@ -294,6 +297,14 @@ interface SellerHealthBody {
294
297
  lastErrorClass?: unknown;
295
298
  last_error_class?: unknown;
296
299
  };
300
+ latency?: {
301
+ ttftMs?: unknown;
302
+ ttft_ms?: unknown;
303
+ avgInferenceMs?: unknown;
304
+ avg_inference_ms?: unknown;
305
+ avgTokensPerSecond?: unknown;
306
+ avg_tokens_per_second?: unknown;
307
+ };
297
308
  capacity?: {
298
309
  activeConnections?: unknown;
299
310
  active_connections?: unknown;
@@ -316,6 +327,11 @@ interface SellerSettlementSummary {
316
327
  priceVersion?: string;
317
328
  }
318
329
 
330
+ interface SellerAttemptRequestContext {
331
+ requestId: string;
332
+ idempotencyKey: string;
333
+ }
334
+
319
335
  interface SellerBalanceSnapshot {
320
336
  creditMicros: number;
321
337
  reservedMicros: number;
@@ -413,6 +429,23 @@ function parseSellerSettlementObject(raw: string): SellerSettlementSummary | und
413
429
  }
414
430
  }
415
431
 
432
+ function sellerAttemptRequestContext(
433
+ requestId: string,
434
+ idempotencyKey: string,
435
+ routeIndex: number,
436
+ attempt: number,
437
+ retryOrdinal: number
438
+ ): SellerAttemptRequestContext {
439
+ if (routeIndex === 0 && attempt === 0 && retryOrdinal === 0) {
440
+ return { requestId, idempotencyKey };
441
+ }
442
+ const suffix = `r${routeIndex}_a${attempt}_n${retryOrdinal}`;
443
+ return {
444
+ requestId: `${requestId}_${suffix}`,
445
+ idempotencyKey: `${idempotencyKey}_${suffix}`
446
+ };
447
+ }
448
+
416
449
  function arrayLength(value: unknown): number | undefined {
417
450
  return Array.isArray(value) ? value.length : undefined;
418
451
  }
@@ -558,6 +591,7 @@ export class TokenbuddyDaemon {
558
591
  // "fetchRegistry + manifest per request" path.
559
592
  private readonly modelIndex = new ModelIndex();
560
593
  private readonly prewarmCache = new PrewarmCache();
594
+ private readonly sellerMetadataCache = new SellerMetadataCache();
561
595
  private readonly creditTracker = new CreditTracker();
562
596
  private readonly sellerPool = new SellerPool({
563
597
  modelIndex: this.modelIndex,
@@ -626,11 +660,15 @@ export class TokenbuddyDaemon {
626
660
  const now = Date.now();
627
661
  const body = await res.json() as SellerHealthBody;
628
662
  const upstream = body.upstream;
663
+ const latency = body.latency;
629
664
  const upstreamErrorClass = upstream?.lastErrorClass ?? upstream?.last_error_class;
630
665
  return {
631
666
  ok: true,
632
667
  latencyMs: now - startedAt,
633
668
  httpStatus: res.status,
669
+ ttftMs: finiteNumber(latency?.ttftMs ?? latency?.ttft_ms),
670
+ avgInferenceMs: finiteNumber(latency?.avgInferenceMs ?? latency?.avg_inference_ms),
671
+ avgTokensPerSecond: finiteNumber(latency?.avgTokensPerSecond ?? latency?.avg_tokens_per_second),
634
672
  upstreamStatus: typeof upstream?.status === "string"
635
673
  ? upstream.status as "healthy" | "degraded" | "unhealthy" | "unknown"
636
674
  : undefined,
@@ -857,6 +895,7 @@ export class TokenbuddyDaemon {
857
895
  // trades freshness for availability: requests still route, but the
858
896
  // model set is whatever was cached before the registry outgrew 1MB.
859
897
  private lastRegistrySnapshot: SellerRegistryDocument | null = null;
898
+ private forceRegistrySnapshotForTest = false;
860
899
 
861
900
  private async fetchRegistry(): Promise<SellerRegistryDocument> {
862
901
  try {
@@ -1106,7 +1145,7 @@ export class TokenbuddyDaemon {
1106
1145
  const payments = this.livePayments().filter((payment) => payment.enabled);
1107
1146
  const clients = this.clientToolsSummary();
1108
1147
  const routeModelId = this.resolveFocusSet()[0] || catalog.models[0]?.id;
1109
- const routingPreview = routeModelId ? this.buildRoutingPreview({ modelId: routeModelId, routing: currentRouting }) : undefined;
1148
+ const routingPreview = routeModelId ? await this.buildRoutingPreview({ modelId: routeModelId, routing: currentRouting }) : undefined;
1110
1149
  const checks: InitDoctorCheck[] = [
1111
1150
  {
1112
1151
  id: "local_service",
@@ -1229,6 +1268,42 @@ export class TokenbuddyDaemon {
1229
1268
  }
1230
1269
  }
1231
1270
 
1271
+ private async refreshSellerRuntimeMetrics(route: SellerRoute, requestId: string | undefined): Promise<void> {
1272
+ let timer: ReturnType<typeof setTimeout> | undefined;
1273
+ try {
1274
+ const ac = new AbortController();
1275
+ timer = setTimeout(() => ac.abort(new Error("health timeout")), this.config.warmupProbeTimeoutMs ?? 3000);
1276
+ const startedAt = Date.now();
1277
+ const res = await fetch(`${route.seller.url.replace(/\/+$/, "")}/health`, { signal: ac.signal });
1278
+ if (!res.ok) {
1279
+ logger.warn("pool.runtime_metrics.refresh_failed", "seller health refresh failed after inference", {
1280
+ requestId,
1281
+ sellerId: route.seller.id,
1282
+ status: res.status,
1283
+ durationMs: Date.now() - startedAt
1284
+ });
1285
+ return;
1286
+ }
1287
+ const body = await res.json() as SellerHealthBody;
1288
+ const latency = body.latency;
1289
+ this.sellerPool.recordRuntimeMetrics(route.seller.id, {
1290
+ ttftMs: finiteNumber(latency?.ttftMs ?? latency?.ttft_ms),
1291
+ avgInferenceMs: finiteNumber(latency?.avgInferenceMs ?? latency?.avg_inference_ms),
1292
+ avgTokensPerSecond: finiteNumber(latency?.avgTokensPerSecond ?? latency?.avg_tokens_per_second)
1293
+ });
1294
+ } catch (error: unknown) {
1295
+ logger.warn("pool.runtime_metrics.refresh_failed", "seller health refresh failed after inference", {
1296
+ requestId,
1297
+ sellerId: route.seller.id,
1298
+ errorMessage: error instanceof Error ? error.message : String(error)
1299
+ });
1300
+ } finally {
1301
+ if (timer) {
1302
+ clearTimeout(timer);
1303
+ }
1304
+ }
1305
+ }
1306
+
1232
1307
  private endpointProtocol(endpoint: string): string | undefined {
1233
1308
  if (endpoint === "/v1/chat/completions") {
1234
1309
  return "chat_completions";
@@ -1337,6 +1412,7 @@ export class TokenbuddyDaemon {
1337
1412
 
1338
1413
  const routing = resolveSellerRoutingForModel(this.refreshSellerRoutingConfig(), modelId);
1339
1414
  const registrySellers = reorderDefaultSellerFirst(registry.sellers, registry.defaultSeller);
1415
+ await this.refreshSellerRouteMetadata(registrySellers);
1340
1416
  this.sellerPool.ensureRegistrySellers(registrySellers);
1341
1417
  this.scheduleLazyPrewarmIfNeeded(modelId, protocol, paymentMethod);
1342
1418
  this.sellerPool.recycleOpenCircuits();
@@ -1350,21 +1426,12 @@ export class TokenbuddyDaemon {
1350
1426
  registrySellers,
1351
1427
  routing,
1352
1428
  prewarmCandidates: this.prewarmCache.get(modelId, protocol, paymentMethod)?.candidates,
1353
- sellerMetrics: Array.from(poolById.values()).map((entry) => ({
1354
- sellerId: entry.sellerId,
1355
- healthScore: entry.healthScore,
1356
- avgLatencyMs: entry.avgLatencyMs,
1357
- ttftMs: entry.ttftMs,
1358
- avgInferenceMs: entry.avgInferenceMs,
1359
- circuit: entry.circuit,
1360
- capacityBlockedUntil: entry.capacityBlockedUntil,
1361
- ...(concurrencySnapshot.enabled
1362
- ? {
1363
- localConcurrencyActive: localConcurrencyBySellerId.get(entry.sellerId) ?? 0,
1364
- localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
1365
- }
1366
- : {})
1367
- })),
1429
+ sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry, concurrencySnapshot.enabled
1430
+ ? {
1431
+ localConcurrencyActive: localConcurrencyBySellerId.get(entry.sellerId) ?? 0,
1432
+ localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
1433
+ }
1434
+ : undefined)),
1368
1435
  now: Date.now()
1369
1436
  });
1370
1437
 
@@ -1667,7 +1734,8 @@ export class TokenbuddyDaemon {
1667
1734
  settlement: SellerSettlementSummary
1668
1735
  ): void {
1669
1736
  logger.info("token.balance.reconciled", "seller token balance reconciled from settlement", {
1670
- requestId: settlement.requestId || requestId,
1737
+ requestId,
1738
+ sellerRequestId: settlement.requestId !== requestId ? settlement.requestId : undefined,
1671
1739
  sellerKey: route.seller.id,
1672
1740
  model: route.modelId,
1673
1741
  remainingCreditMicros: settlement.remainingCreditMicros,
@@ -1681,8 +1749,8 @@ export class TokenbuddyDaemon {
1681
1749
  }
1682
1750
 
1683
1751
  private async listSellerBackedModels(): Promise<{
1684
- models: Array<{ id: string; sellerId: string; sellerName?: string; sellerUrl: string; supportedProtocols: string[]; paymentMethods: string[] }>;
1685
- sellers: Array<{ id: string; name?: string; url: string; status: string; manifestSellerId?: string; errorMessage?: string }>;
1752
+ models: ModelCatalogEntry[];
1753
+ sellers: SellerCatalogEntry[];
1686
1754
  }> {
1687
1755
  try {
1688
1756
  const catalog = await discoverSellerBackedModels(this.config.sellerRegistryUrl);
@@ -1696,7 +1764,7 @@ export class TokenbuddyDaemon {
1696
1764
  }
1697
1765
  return {
1698
1766
  models: catalog.models,
1699
- sellers: catalog.sellers
1767
+ sellers: this.sellerCatalogWithRuntimeMetrics(catalog.sellers)
1700
1768
  };
1701
1769
  } catch (error) {
1702
1770
  const cached = this.loadTrustedRegistryCache(error);
@@ -1706,11 +1774,52 @@ export class TokenbuddyDaemon {
1706
1774
  const snapshot = catalogSnapshotFromRegistry(cached);
1707
1775
  return {
1708
1776
  models: snapshot.models,
1709
- sellers: snapshot.sellers
1777
+ sellers: this.sellerCatalogWithRuntimeMetrics(snapshot.sellers)
1778
+ };
1779
+ }
1780
+ }
1781
+
1782
+ private sellerCatalogWithRuntimeMetrics(sellers: SellerCatalogEntry[]): SellerCatalogEntry[] {
1783
+ const runtimeBySellerId = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
1784
+ return sellers.map((seller) => {
1785
+ const runtime = runtimeBySellerId.get(seller.id);
1786
+ return {
1787
+ ...seller,
1788
+ ttftMs: runtime?.ttftMs ?? seller.ttftMs,
1789
+ avgTokensPerSecond: runtime?.avgTokensPerSecond ?? seller.avgTokensPerSecond ?? 0
1710
1790
  };
1791
+ });
1792
+ }
1793
+
1794
+ private async refreshSellerRouteMetadata(sellers: RegistrySeller[]): Promise<void> {
1795
+ try {
1796
+ await this.sellerMetadataCache.refreshIfStale(sellers.filter(isBuyerVisibleRegistrySeller));
1797
+ } catch (error: unknown) {
1798
+ logger.warn("route.metadata.refresh_failed", "seller route metadata refresh failed", {
1799
+ errorMessage: error instanceof Error ? error.message : String(error)
1800
+ });
1711
1801
  }
1712
1802
  }
1713
1803
 
1804
+ private routeMetricFromPoolEntry(
1805
+ entry: PoolEntry,
1806
+ concurrency?: { localConcurrencyActive: number; localConcurrencyLimit: number }
1807
+ ): SellerRouteMetric {
1808
+ const metadata = this.sellerMetadataCache.snapshot().find((item) => item.sellerId === entry.sellerId);
1809
+ return {
1810
+ sellerId: entry.sellerId,
1811
+ healthScore: entry.healthScore,
1812
+ avgLatencyMs: entry.avgLatencyMs,
1813
+ ttftMs: entry.ttftMs,
1814
+ avgInferenceMs: entry.avgInferenceMs,
1815
+ avgTokensPerSecond: entry.avgTokensPerSecond,
1816
+ discountRatio: metadata?.discountRatio,
1817
+ circuit: entry.circuit,
1818
+ capacityBlockedUntil: entry.capacityBlockedUntil,
1819
+ ...(concurrency ?? {})
1820
+ };
1821
+ }
1822
+
1714
1823
  private readUsage(bodyText: string): UsageSummary {
1715
1824
  const fallback: UsageSummary = {
1716
1825
  promptTokens: 0,
@@ -1779,8 +1888,11 @@ export class TokenbuddyDaemon {
1779
1888
  }
1780
1889
 
1781
1890
  const settledMicros = settlement?.settledMicros;
1891
+ const sellerRequestId = settlement?.requestId && settlement.requestId !== requestId
1892
+ ? settlement.requestId
1893
+ : undefined;
1782
1894
  this.tokenStore.recordInferenceLedger({
1783
- requestId: settlement?.requestId || requestId,
1895
+ requestId,
1784
1896
  sellerKey: route.seller.id,
1785
1897
  modelId: route.modelId,
1786
1898
  endpoint,
@@ -1805,7 +1917,7 @@ export class TokenbuddyDaemon {
1805
1917
  paymentMethod: extras?.paymentMethod
1806
1918
  });
1807
1919
  logger.info("inference.ledger.recorded", "safe inference ledger recorded", {
1808
- requestId: settlement?.requestId || requestId,
1920
+ requestId,
1809
1921
  sellerKey: route.seller.id,
1810
1922
  model: route.modelId,
1811
1923
  endpoint,
@@ -1818,6 +1930,7 @@ export class TokenbuddyDaemon {
1818
1930
  completionTokens: usage.completionTokens,
1819
1931
  balanceSnapshotMicros: settlement?.remainingCreditMicros,
1820
1932
  balanceSource: settlement ? "seller_authoritative" : "estimated",
1933
+ sellerRequestId,
1821
1934
  ttftMs: extras?.ttftMs,
1822
1935
  fallbackCount: extras?.fallbackCount,
1823
1936
  routeReason: extras?.routeReason,
@@ -2488,21 +2601,31 @@ export class TokenbuddyDaemon {
2488
2601
  // the `X-TokenBuddy-Deadline-Ms` header (PR-6) can propagate
2489
2602
  // it to their own upstream fetch via the same signal.
2490
2603
  const deadlineMs = this.requestDeadlineMs();
2491
- const sendSellerRequest = async (token: string) => {
2604
+ const sendSellerRequest = async (token: string, retryOrdinal = 0) => {
2605
+ const attemptContext = sellerAttemptRequestContext(
2606
+ requestId,
2607
+ idempotencyKey,
2608
+ routeIndex,
2609
+ attempt,
2610
+ retryOrdinal
2611
+ );
2492
2612
  const requestAc = new AbortController();
2493
2613
  const requestTimer = setTimeout(() => requestAc.abort(new Error("buyer deadline exceeded")), deadlineMs);
2494
2614
  const headers: Record<string, string> = {
2495
2615
  "Content-Type": "application/json",
2496
2616
  "Authorization": `Bearer ${token}`,
2497
- "X-Request-Id": requestId,
2498
- "Idempotency-Key": idempotencyKey
2617
+ "X-Request-Id": attemptContext.requestId,
2618
+ "Idempotency-Key": attemptContext.idempotencyKey
2499
2619
  };
2500
2620
  headers["X-TokenBuddy-Deadline-Ms"] = String(deadlineMs);
2501
2621
  try {
2502
2622
  return await fetch(`${sellerUrl}${endpoint}`, {
2503
2623
  method: "POST",
2504
2624
  headers,
2505
- body: JSON.stringify(upstreamBody),
2625
+ body: JSON.stringify({
2626
+ ...upstreamBody,
2627
+ requestId: attemptContext.requestId
2628
+ }),
2506
2629
  signal: requestAc.signal
2507
2630
  });
2508
2631
  } finally {
@@ -2649,6 +2772,7 @@ export class TokenbuddyDaemon {
2649
2772
  res.write(settlementTrailing.downstream);
2650
2773
  }
2651
2774
  res.end();
2775
+ void this.refreshSellerRuntimeMetrics(route, requestId);
2652
2776
  this.recordReconciledInference(
2653
2777
  route,
2654
2778
  endpoint,
@@ -2675,6 +2799,7 @@ export class TokenbuddyDaemon {
2675
2799
  markFirstByte();
2676
2800
  res.send(responseBody);
2677
2801
  const usage = this.readUsage(responseBody);
2802
+ void this.refreshSellerRuntimeMetrics(route, requestId);
2678
2803
  this.recordReconciledInference(
2679
2804
  route,
2680
2805
  endpoint,
@@ -3212,10 +3337,10 @@ export class TokenbuddyDaemon {
3212
3337
 
3213
3338
  // 2) GET /routing/preview — 算「假如改完会怎样」,不改 state
3214
3339
  // query: modelId? protocol? paymentMethod? mode? scorer? sellerId? sellerIds?(逗号分隔)
3215
- controlApp.get("/routing/preview", (req, res) => {
3340
+ controlApp.get("/routing/preview", async (req, res) => {
3216
3341
  try {
3217
3342
  const override = buildRoutingConfigFromQuery(req.query);
3218
- const result = this.buildRoutingPreview({
3343
+ const result = await this.buildRoutingPreview({
3219
3344
  modelId: typeof req.query.modelId === "string" ? req.query.modelId : undefined,
3220
3345
  protocol: typeof req.query.protocol === "string" ? req.query.protocol : undefined,
3221
3346
  paymentMethod: typeof req.query.paymentMethod === "string" ? req.query.paymentMethod : undefined,
@@ -3244,7 +3369,7 @@ export class TokenbuddyDaemon {
3244
3369
  });
3245
3370
 
3246
3371
  // 3) PUT /routing/strategy — 写策略 + 热更新 + 返回 preview
3247
- controlApp.put("/routing/strategy", (req, res) => {
3372
+ controlApp.put("/routing/strategy", async (req, res) => {
3248
3373
  try {
3249
3374
  const body = (req.body ?? {}) as Record<string, unknown>;
3250
3375
  const normalized = normalizeSellerRoutingConfig(body);
@@ -3258,7 +3383,7 @@ export class TokenbuddyDaemon {
3258
3383
  sellerId: current.sellerId,
3259
3384
  sellerIds: current.sellerIds
3260
3385
  });
3261
- const preview = this.buildRoutingPreview({ routing: current });
3386
+ const preview = await this.buildRoutingPreview({ routing: current });
3262
3387
  const previewPayload = "error" in preview.plan
3263
3388
  ? { error: preview.plan.error }
3264
3389
  : {
@@ -3475,15 +3600,18 @@ export class TokenbuddyDaemon {
3475
3600
  /**
3476
3601
  * tb-ui v1 `GET /routing/preview` 和 `PUT /routing/strategy` 复用的 preview 计算。
3477
3602
  * 接受任意 routing 覆盖(来自 request body)算「假如改成这个,路由会是啥」。
3478
- * 不修改任何内部 state,**纯函数式**。
3603
+ * 不修改 routing state;registry / seller metadata 可按需刷新,保证 preview
3604
+ * 使用的候选和折扣信息与真实请求路径一致。
3479
3605
  */
3480
- public buildRoutingPreview(input: {
3606
+ public async buildRoutingPreview(input: {
3481
3607
  modelId?: string;
3482
3608
  protocol?: string;
3483
3609
  paymentMethod?: string;
3484
3610
  routing?: Partial<BuyerSellerRoutingConfig>;
3485
- }): { modelId: string; protocol: string; paymentMethod: string; plan: SellerRoutePlan | { error: string } } {
3486
- const registry = this.lastRegistrySnapshot;
3611
+ }): Promise<{ modelId: string; protocol: string; paymentMethod: string; plan: SellerRoutePlan | { error: string } }> {
3612
+ const registry = this.lastRegistrySnapshot ?? (
3613
+ this.forceRegistrySnapshotForTest ? null : await this.fetchRegistry()
3614
+ );
3487
3615
  const focusFirst = this.resolveFocusSet()[0];
3488
3616
  const registryFirst = registry?.sellers[0]?.models?.[0];
3489
3617
  const modelId = input.modelId?.trim() || focusFirst || registryFirst || "";
@@ -3501,6 +3629,7 @@ export class TokenbuddyDaemon {
3501
3629
  : current;
3502
3630
  const resolvedRouting = resolveSellerRoutingForModel(routing, modelId);
3503
3631
  const registrySellers = reorderDefaultSellerFirst(registry.sellers, registry.defaultSeller);
3632
+ await this.refreshSellerRouteMetadata(registrySellers);
3504
3633
  this.sellerPool.ensureRegistrySellers(registrySellers);
3505
3634
  const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
3506
3635
  const plan = planSellerRouteSet({
@@ -3510,15 +3639,7 @@ export class TokenbuddyDaemon {
3510
3639
  registrySellers,
3511
3640
  routing: resolvedRouting,
3512
3641
  prewarmCandidates: this.prewarmCache.get(modelId, protocol, paymentMethod)?.candidates,
3513
- sellerMetrics: Array.from(poolById.values()).map((entry) => ({
3514
- sellerId: entry.sellerId,
3515
- healthScore: entry.healthScore,
3516
- avgLatencyMs: entry.avgLatencyMs,
3517
- ttftMs: entry.ttftMs,
3518
- avgInferenceMs: entry.avgInferenceMs,
3519
- circuit: entry.circuit,
3520
- capacityBlockedUntil: entry.capacityBlockedUntil
3521
- })),
3642
+ sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry)),
3522
3643
  now: Date.now()
3523
3644
  });
3524
3645
  return { modelId, protocol, paymentMethod, plan };
@@ -3569,12 +3690,13 @@ export class TokenbuddyDaemon {
3569
3690
  }
3570
3691
 
3571
3692
  /**
3572
- * @internal test-only seam to inject a registry snapshot without
3693
+ * @internal - test-only hook to inject a registry snapshot without
3573
3694
  * hitting the network. Used by `tests/control-plane-ui-endpoints.test.ts`
3574
3695
  * to drive `buildRoutingPreview` deterministically. Production code
3575
3696
  * must NOT call this; the real `fetchRegistry()` populates the snapshot.
3576
3697
  */
3577
3698
  public setLastRegistrySnapshotForTest(snapshot: SellerRegistryDocument | null): void {
3699
+ this.forceRegistrySnapshotForTest = true;
3578
3700
  this.lastRegistrySnapshot = snapshot;
3579
3701
  }
3580
3702
  }
@@ -341,7 +341,11 @@ function discountRatioFromSeller(seller: DoctorSellerEntry): number | undefined
341
341
  }
342
342
 
343
343
  function formatDiscountRatio(value: number): string {
344
- return value.toFixed(2).replace(/\.?0+$/, "");
344
+ const ratio = Math.max(0, value);
345
+ if (ratio === 0) return "免费";
346
+ if (Math.abs(ratio - 1) < 0.0001) return "原价";
347
+ const folded = Math.round(ratio * 100) / 10;
348
+ return `${Number.isInteger(folded) ? String(folded) : folded.toFixed(1)}折`;
345
349
  }
346
350
 
347
351
  function formatUsdPer1m(microsPer1m: number): string {
@@ -42,6 +42,8 @@ export interface PrewarmCandidate {
42
42
  ttftMs?: number;
43
43
  /** 平均推理延迟(毫秒),可选 */
44
44
  avgInferenceMs?: number;
45
+ /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
46
+ avgTokensPerSecond?: number;
45
47
  /** 上游状态(与 seller 上报的语义对齐) */
46
48
  upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
47
49
  /** 上游错误类名(HTTP status / 错误码),仅在失败时存在 */
@@ -98,6 +100,8 @@ export interface PrewarmCandidateInput {
98
100
  ttftMs?: number;
99
101
  /** 平均推理延迟(毫秒),可选 */
100
102
  avgInferenceMs?: number;
103
+ /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
104
+ avgTokensPerSecond?: number;
101
105
  /** 上游状态,可选 */
102
106
  upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
103
107
  /** 上游错误类名,可选 */
@@ -452,6 +456,7 @@ function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
452
456
  healthProbeLatencyMs: finiteNonNegative(input.healthProbeLatencyMs),
453
457
  ttftMs: finiteNonNegative(input.ttftMs),
454
458
  avgInferenceMs: finiteNonNegative(input.avgInferenceMs),
459
+ avgTokensPerSecond: finiteNonNegative(input.avgTokensPerSecond),
455
460
  upstreamStatus: input.upstreamStatus,
456
461
  upstreamErrorClass: input.upstreamErrorClass,
457
462
  capacityBlockedUntil: finiteNonNegative(input.capacityBlockedUntil)
@@ -35,6 +35,8 @@ export interface ProbeResult {
35
35
  ttftMs?: number;
36
36
  /** 平均推理延迟(毫秒),可选 */
37
37
  avgInferenceMs?: number;
38
+ /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
39
+ avgTokensPerSecond?: number;
38
40
  /** 临时容量避让截止时间;大于当前时间时不参与路由 */
39
41
  capacityBlockedUntil?: number;
40
42
  }
@@ -499,6 +501,7 @@ export class PrewarmScheduler {
499
501
  healthProbeLatencyMs: result.latencyMs,
500
502
  ttftMs: result.ttftMs,
501
503
  avgInferenceMs: result.avgInferenceMs,
504
+ avgTokensPerSecond: result.avgTokensPerSecond,
502
505
  upstreamStatus: result.upstreamStatus,
503
506
  upstreamErrorClass: result.upstreamErrorClass,
504
507
  capacityBlockedUntil: result.capacityBlockedUntil
@@ -513,7 +516,8 @@ export class PrewarmScheduler {
513
516
  upstreamStatus: result.upstreamStatus,
514
517
  upstreamErrorClass: result.upstreamErrorClass,
515
518
  ttftMs: result.ttftMs,
516
- avgInferenceMs: result.avgInferenceMs
519
+ avgInferenceMs: result.avgInferenceMs,
520
+ avgTokensPerSecond: result.avgTokensPerSecond
517
521
  });
518
522
  } else {
519
523
  candidates.push({
@@ -526,6 +530,7 @@ export class PrewarmScheduler {
526
530
  healthProbeLatencyMs: result.latencyMs,
527
531
  ttftMs: result.ttftMs,
528
532
  avgInferenceMs: result.avgInferenceMs,
533
+ avgTokensPerSecond: result.avgTokensPerSecond,
529
534
  upstreamStatus: result.upstreamStatus,
530
535
  upstreamErrorClass: result.upstreamErrorClass,
531
536
  capacityBlockedUntil: result.capacityBlockedUntil