@tokenbuddy/tokenbuddy 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/dist/src/daemon.d.ts +11 -4
  2. package/dist/src/daemon.d.ts.map +1 -1
  3. package/dist/src/daemon.js +130 -42
  4. package/dist/src/daemon.js.map +1 -1
  5. package/dist/src/doctor-diagnostics.d.ts.map +1 -1
  6. package/dist/src/doctor-diagnostics.js +7 -1
  7. package/dist/src/doctor-diagnostics.js.map +1 -1
  8. package/dist/src/prewarm-cache.d.ts +4 -0
  9. package/dist/src/prewarm-cache.d.ts.map +1 -1
  10. package/dist/src/prewarm-cache.js +1 -0
  11. package/dist/src/prewarm-cache.js.map +1 -1
  12. package/dist/src/prewarm-scheduler.d.ts +2 -0
  13. package/dist/src/prewarm-scheduler.d.ts.map +1 -1
  14. package/dist/src/prewarm-scheduler.js +4 -1
  15. package/dist/src/prewarm-scheduler.js.map +1 -1
  16. package/dist/src/provider-install.d.ts.map +1 -1
  17. package/dist/src/provider-install.js +196 -18
  18. package/dist/src/provider-install.js.map +1 -1
  19. package/dist/src/seller-catalog.d.ts +4 -0
  20. package/dist/src/seller-catalog.d.ts.map +1 -1
  21. package/dist/src/seller-catalog.js.map +1 -1
  22. package/dist/src/seller-pool.d.ts +13 -0
  23. package/dist/src/seller-pool.d.ts.map +1 -1
  24. package/dist/src/seller-pool.js +43 -2
  25. package/dist/src/seller-pool.js.map +1 -1
  26. package/dist/src/seller-route-planner.d.ts +9 -0
  27. package/dist/src/seller-route-planner.d.ts.map +1 -1
  28. package/dist/src/seller-route-planner.js +39 -15
  29. package/dist/src/seller-route-planner.js.map +1 -1
  30. package/dist/src/seller-routing-strategy.d.ts +6 -4
  31. package/dist/src/seller-routing-strategy.d.ts.map +1 -1
  32. package/dist/src/seller-routing-strategy.js +15 -12
  33. package/dist/src/seller-routing-strategy.js.map +1 -1
  34. package/dist/src/terminal-detect.d.ts +5 -5
  35. package/dist/src/terminal-detect.d.ts.map +1 -1
  36. package/dist/src/terminal-detect.js +79 -26
  37. package/dist/src/terminal-detect.js.map +1 -1
  38. package/package.json +1 -1
  39. package/src/daemon.ts +168 -46
  40. package/src/doctor-diagnostics.ts +5 -1
  41. package/src/prewarm-cache.ts +5 -0
  42. package/src/prewarm-scheduler.ts +6 -1
  43. package/src/provider-install.ts +203 -18
  44. package/src/seller-catalog.ts +4 -0
  45. package/src/seller-pool.ts +68 -2
  46. package/src/seller-route-planner.ts +61 -15
  47. package/src/seller-routing-strategy.ts +21 -16
  48. package/src/terminal-detect.ts +81 -24
  49. package/static/ui/assets/index-DEDEl8o2.js +236 -0
  50. package/static/ui/assets/{index-UAfOhbwC.js.map → index-DEDEl8o2.js.map} +1 -1
  51. package/static/ui/index.html +1 -1
  52. package/tests/control-plane-ui-endpoints.test.ts +73 -0
  53. package/tests/seller-pool.test.ts +55 -0
  54. package/tests/seller-route-planner.test.ts +45 -1
  55. package/tests/seller-routing-strategy.test.ts +6 -5
  56. package/tests/tokenbuddy.test.ts +346 -38
  57. package/static/ui/assets/index-UAfOhbwC.js +0 -236
@@ -163,6 +163,10 @@ interface ProviderDefinition {
163
163
  protocolPreference?: ProtocolPreference;
164
164
  }
165
165
 
166
+ function isPlainRecord(value: unknown): value is Record<string, unknown> {
167
+ return Boolean(value && typeof value === "object" && !Array.isArray(value));
168
+ }
169
+
166
170
  function resolveHome(home?: string): string {
167
171
  return home && home.trim() ? home : os.homedir();
168
172
  }
@@ -209,6 +213,124 @@ function readJsonObject(filePath: string): Record<string, unknown> {
209
213
  }
210
214
  }
211
215
 
216
+ function parseYamlScalar(value: string): unknown {
217
+ const trimmed = value.trim();
218
+ if (!trimmed) {
219
+ return "";
220
+ }
221
+ if (trimmed === "true") {
222
+ return true;
223
+ }
224
+ if (trimmed === "false") {
225
+ return false;
226
+ }
227
+ if (trimmed === "null") {
228
+ return null;
229
+ }
230
+ if ((trimmed.startsWith('"') && trimmed.endsWith('"')) || (trimmed.startsWith("'") && trimmed.endsWith("'"))) {
231
+ return trimmed.slice(1, -1);
232
+ }
233
+ const numeric = Number(trimmed);
234
+ if (Number.isFinite(numeric) && /^-?\d+(?:\.\d+)?$/.test(trimmed)) {
235
+ return numeric;
236
+ }
237
+ return trimmed;
238
+ }
239
+
240
+ function parseSimpleYamlObject(text: string): Record<string, unknown> {
241
+ const root: Record<string, unknown> = {};
242
+ const stack: Array<{ indent: number; value: Record<string, unknown> }> = [{ indent: -1, value: root }];
243
+ for (const rawLine of text.split(/\r?\n/)) {
244
+ if (!rawLine.trim() || rawLine.trimStart().startsWith("#")) {
245
+ continue;
246
+ }
247
+ const indent = rawLine.match(/^ */)?.[0].length ?? 0;
248
+ const trimmed = rawLine.trim();
249
+ const separatorIndex = trimmed.indexOf(":");
250
+ if (separatorIndex <= 0) {
251
+ continue;
252
+ }
253
+ const key = trimmed.slice(0, separatorIndex).trim();
254
+ const rest = trimmed.slice(separatorIndex + 1).trim();
255
+ while (stack.length > 1 && indent <= stack[stack.length - 1].indent) {
256
+ stack.pop();
257
+ }
258
+ const parent = stack[stack.length - 1].value;
259
+ if (!rest) {
260
+ const child = isPlainRecord(parent[key]) ? parent[key] as Record<string, unknown> : {};
261
+ parent[key] = child;
262
+ stack.push({ indent, value: child });
263
+ } else {
264
+ parent[key] = parseYamlScalar(rest);
265
+ }
266
+ }
267
+ return root;
268
+ }
269
+
270
+ function readYamlObject(filePath: string): Record<string, unknown> {
271
+ const text = readText(filePath);
272
+ if (!text) {
273
+ return {};
274
+ }
275
+ return parseSimpleYamlObject(text);
276
+ }
277
+
278
+ function yamlScalarContent(value: unknown): string {
279
+ if (typeof value === "number" || typeof value === "boolean") {
280
+ return String(value);
281
+ }
282
+ if (value === null) {
283
+ return "null";
284
+ }
285
+ const text = String(value ?? "");
286
+ if (!text || /[:#\n\r\t]|^\s|\s$|^(true|false|null)$/i.test(text) || /^-?\d+(?:\.\d+)?$/.test(text)) {
287
+ return JSON.stringify(text);
288
+ }
289
+ return text;
290
+ }
291
+
292
+ function yamlContent(value: Record<string, unknown>): string {
293
+ const lines: string[] = [];
294
+ const writeObject = (objectValue: Record<string, unknown>, indent: number): void => {
295
+ for (const [key, entry] of Object.entries(objectValue)) {
296
+ const prefix = " ".repeat(indent);
297
+ if (isPlainRecord(entry)) {
298
+ lines.push(`${prefix}${key}:`);
299
+ writeObject(entry, indent + 2);
300
+ } else {
301
+ lines.push(`${prefix}${key}: ${yamlScalarContent(entry)}`);
302
+ }
303
+ }
304
+ };
305
+ writeObject(value, 0);
306
+ return `${lines.join("\n")}\n`;
307
+ }
308
+
309
+ function replaceTopLevelYamlSection(existing: string, sectionName: string, sectionBody: string): string {
310
+ const lines = existing.split(/\r?\n/);
311
+ const sectionStart = lines.findIndex((line) => {
312
+ return line === `${sectionName}:` || line.startsWith(`${sectionName}: `);
313
+ });
314
+ const bodyLines = [`${sectionName}:`, ...sectionBody.trimEnd().split(/\r?\n/).map((line) => ` ${line}`)];
315
+ if (sectionStart < 0) {
316
+ const prefix = existing.trimEnd();
317
+ return `${prefix}${prefix ? "\n" : ""}${bodyLines.join("\n")}\n`;
318
+ }
319
+ let sectionEnd = sectionStart + 1;
320
+ while (sectionEnd < lines.length) {
321
+ const line = lines[sectionEnd];
322
+ if (line.trim() && !line.startsWith(" ") && !line.startsWith("\t")) {
323
+ break;
324
+ }
325
+ sectionEnd += 1;
326
+ }
327
+ return `${[
328
+ ...lines.slice(0, sectionStart),
329
+ ...bodyLines,
330
+ ...lines.slice(sectionEnd),
331
+ ].join("\n").replace(/\n*$/, "")}\n`;
332
+ }
333
+
212
334
  function readObjectField(value: unknown, key: string): Record<string, unknown> | undefined {
213
335
  if (!value || typeof value !== "object" || Array.isArray(value)) {
214
336
  return undefined;
@@ -493,14 +615,57 @@ function claudeDesktopConfig(home: string, proxyUrl: string, config: ProviderRun
493
615
 
494
616
  function openclawConfig(home: string, proxyUrl: string, config: ProviderRuntimeConfig): ProviderFileChange[] {
495
617
  const model = pickConfiguredModel(config);
496
- const configPath = path.join(home, ".openclaw", "config.json");
618
+ const configPath = path.join(home, ".openclaw", "openclaw.json");
497
619
  const current = readJsonObject(configPath);
498
- current.api_url = proxyUrl;
499
- current.api_key = PROXY_ACCESS_TOKEN_PLACEHOLDER;
500
- current.model = model;
620
+ const models = isPlainRecord(current.models) ? current.models : {};
621
+ const providers = isPlainRecord(models.providers) ? models.providers : {};
622
+ const existingProvider = isPlainRecord(providers.tokenbuddy) ? providers.tokenbuddy : {};
623
+ const existingModels = Array.isArray(existingProvider.models) ? existingProvider.models : [];
624
+ const nextModels = [
625
+ ...existingModels.filter((entry) => {
626
+ return !(isPlainRecord(entry) && entry.id === model);
627
+ }),
628
+ {
629
+ id: model,
630
+ name: model,
631
+ api: "openai-completions",
632
+ input: ["text", "image"],
633
+ },
634
+ ];
635
+ providers.tokenbuddy = {
636
+ ...existingProvider,
637
+ baseUrl: openAiBaseUrl(proxyUrl),
638
+ apiKey: PROXY_ACCESS_TOKEN_PLACEHOLDER,
639
+ auth: "api-key",
640
+ api: "openai-completions",
641
+ models: nextModels,
642
+ };
643
+ models.providers = providers;
644
+ current.models = models;
645
+ const agents = isPlainRecord(current.agents) ? current.agents : {};
646
+ const defaults = isPlainRecord(agents.defaults) ? agents.defaults : {};
647
+ defaults.model = `tokenbuddy/${model}`;
648
+ agents.defaults = defaults;
649
+ current.agents = agents;
501
650
  return [makeChange("openclaw", configPath, "configure OpenClaw proxy settings", jsonContent(current))];
502
651
  }
503
652
 
653
+ function isOpenclawTokenBuddyConfigured(filePath: string): boolean {
654
+ const current = readJsonObject(filePath);
655
+ const tokenbuddy = readObjectField(readObjectField(readObjectField(current, "models"), "providers"), "tokenbuddy");
656
+ const defaults = readObjectField(readObjectField(current, "agents"), "defaults");
657
+ if (!tokenbuddy || !defaults) {
658
+ return false;
659
+ }
660
+ const defaultModel = defaults.model;
661
+ return tokenbuddy.apiKey === PROXY_ACCESS_TOKEN_PLACEHOLDER &&
662
+ typeof tokenbuddy.baseUrl === "string" &&
663
+ tokenbuddy.baseUrl.includes("127.0.0.1") &&
664
+ tokenbuddy.baseUrl.endsWith("/v1") &&
665
+ typeof defaultModel === "string" &&
666
+ defaultModel.startsWith("tokenbuddy/");
667
+ }
668
+
504
669
  function openAiBaseUrl(proxyUrl: string): string {
505
670
  const normalized = proxyUrl.replace(/\/+$/, "");
506
671
  return normalized.endsWith("/v1") ? normalized : `${normalized}/v1`;
@@ -556,18 +721,36 @@ function isOpencodeTokenBuddyConfigured(filePath: string): boolean {
556
721
 
557
722
  function hermesConfig(home: string, proxyUrl: string, config: ProviderRuntimeConfig): ProviderFileChange[] {
558
723
  const model = pickConfiguredModel(config);
559
- const configPath = path.join(home, ".hermes", "settings.json");
560
- const current = readJsonObject(configPath);
561
- const openai = current.openai && typeof current.openai === "object" && !Array.isArray(current.openai)
562
- ? (current.openai as Record<string, unknown>)
563
- : {};
564
- current.openai = {
565
- ...openai,
566
- base_url: proxyUrl,
724
+ const configPath = path.join(home, ".hermes", "config.yaml");
725
+ const existing = readText(configPath) || "";
726
+ const current = parseSimpleYamlObject(existing);
727
+ const modelConfig = isPlainRecord(current.model) ? current.model : {};
728
+ const nextModelConfig = {
729
+ ...modelConfig,
730
+ default: model,
731
+ provider: "custom",
732
+ base_url: openAiBaseUrl(proxyUrl),
567
733
  api_key: PROXY_ACCESS_TOKEN_PLACEHOLDER,
568
- model,
734
+ api_mode: "chat_completions",
569
735
  };
570
- return [makeChange("hermes", configPath, "configure Hermes OpenAI proxy settings", jsonContent(current))];
736
+ const content = replaceTopLevelYamlSection(existing, "model", yamlContent(nextModelConfig));
737
+ return [makeChange("hermes", configPath, "configure Hermes OpenAI proxy settings", content)];
738
+ }
739
+
740
+ function isHermesTokenBuddyConfigured(filePath: string): boolean {
741
+ const current = readYamlObject(filePath);
742
+ const modelConfig = readObjectField(current, "model");
743
+ if (!modelConfig) {
744
+ return false;
745
+ }
746
+ return modelConfig.provider === "custom" &&
747
+ modelConfig.api_key === PROXY_ACCESS_TOKEN_PLACEHOLDER &&
748
+ modelConfig.api_mode === "chat_completions" &&
749
+ typeof modelConfig.base_url === "string" &&
750
+ modelConfig.base_url.includes("127.0.0.1") &&
751
+ modelConfig.base_url.endsWith("/v1") &&
752
+ typeof modelConfig.default === "string" &&
753
+ modelConfig.default.length > 0;
571
754
  }
572
755
 
573
756
  const PROVIDERS: ProviderDefinition[] = [
@@ -601,10 +784,11 @@ const PROVIDERS: ProviderDefinition[] = [
601
784
  id: "openclaw",
602
785
  name: "OpenClaw Agent",
603
786
  commandName: "openclaw",
604
- configPath: (home) => path.join(home, ".openclaw", "config.json"),
787
+ configPath: (home) => path.join(home, ".openclaw", "openclaw.json"),
788
+ isConfigured: isOpenclawTokenBuddyConfigured,
605
789
  observedPaths: (home) => [
606
- path.join(home, ".openclaw", "openclaw.json"),
607
790
  path.join(home, ".openclaw", "configs"),
791
+ path.join(home, ".openclaw", "config.json"),
608
792
  ],
609
793
  changes: openclawConfig,
610
794
  modelSelectionKind: "single-model",
@@ -624,9 +808,10 @@ const PROVIDERS: ProviderDefinition[] = [
624
808
  id: "hermes",
625
809
  name: "Hermes Terminal",
626
810
  commandName: "hermes",
627
- configPath: (home) => path.join(home, ".hermes", "settings.json"),
811
+ configPath: (home) => path.join(home, ".hermes", "config.yaml"),
812
+ isConfigured: isHermesTokenBuddyConfigured,
628
813
  observedPaths: (home) => [
629
- path.join(home, ".hermes", "config.yaml"),
814
+ path.join(home, ".hermes", "settings.json"),
630
815
  path.join(home, ".hermes", "auth.json"),
631
816
  ],
632
817
  changes: hermesConfig,
@@ -174,6 +174,10 @@ export interface SellerCatalogEntry {
174
174
  discountRatio?: number;
175
175
  /** 服务手续费系数(来自 manifest.selection) */
176
176
  serviceFeeRatio?: number;
177
+ /** 最近一次 TTFT(毫秒),来自本地 seller pool 运行时指标 */
178
+ ttftMs?: number;
179
+ /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),来自本地 seller pool 运行时指标 */
180
+ avgTokensPerSecond?: number;
177
181
  /** 模型数(来自 manifest) */
178
182
  modelCount?: number;
179
183
  /** seller 支持的协议(manifest > registry fallback) */
@@ -65,6 +65,10 @@ export interface PoolEntry {
65
65
  ttftMs?: number;
66
66
  /** 平均推理延迟(毫秒),可选 */
67
67
  avgInferenceMs?: number;
68
+ /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
69
+ avgTokensPerSecond?: number;
70
+ /** 最近一次 runtime speed 指标观测时间;用于避免旧 prewarm 覆盖 live inference 指标 */
71
+ runtimeMetricsObservedAt?: number;
68
72
  /** 上游状态,可选 */
69
73
  upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
70
74
  /** 上游错误类名,可选 */
@@ -73,6 +77,15 @@ export interface PoolEntry {
73
77
  capacityBlockedUntil?: number;
74
78
  }
75
79
 
80
+ export interface SellerRuntimeMetricsUpdate {
81
+ /** TTFT(毫秒),可选 */
82
+ ttftMs?: number;
83
+ /** 平均推理延迟(毫秒),可选 */
84
+ avgInferenceMs?: number;
85
+ /** 输出吞吐(tokens/s),可选 */
86
+ avgTokensPerSecond?: number;
87
+ }
88
+
76
89
  /**
77
90
  * `SellerPool.pick()` 的入参:标识一次路由请求 + 可选的时间/数量约束。
78
91
  */
@@ -214,8 +227,10 @@ export class SellerPool {
214
227
  healthScore: candidate.healthScore,
215
228
  avgLatencyMs: candidate.avgLatencyMs,
216
229
  healthProbeLatencyMs: candidate.healthProbeLatencyMs,
217
- ttftMs: candidate.ttftMs,
218
- avgInferenceMs: candidate.avgInferenceMs,
230
+ ttftMs: preferRuntimeMetric(candidate.ttftMs, candidate.lastSuccessAt, previous?.ttftMs, previous?.runtimeMetricsObservedAt),
231
+ avgInferenceMs: preferRuntimeMetric(candidate.avgInferenceMs, candidate.lastSuccessAt, previous?.avgInferenceMs, previous?.runtimeMetricsObservedAt),
232
+ avgTokensPerSecond: preferRuntimeMetric(candidate.avgTokensPerSecond, candidate.lastSuccessAt, previous?.avgTokensPerSecond, previous?.runtimeMetricsObservedAt),
233
+ runtimeMetricsObservedAt: Math.max(previous?.runtimeMetricsObservedAt ?? 0, candidate.lastSuccessAt || 0) || undefined,
219
234
  upstreamStatus: candidate.upstreamStatus,
220
235
  upstreamErrorClass: candidate.upstreamErrorClass,
221
236
  capacityBlockedUntil: candidate.capacityBlockedUntil ?? previous?.capacityBlockedUntil
@@ -340,6 +355,38 @@ export class SellerPool {
340
355
  return next;
341
356
  }
342
357
 
358
+ recordRuntimeMetrics(
359
+ sellerId: string,
360
+ metrics: SellerRuntimeMetricsUpdate,
361
+ now: number = this.now()
362
+ ): PoolEntry | undefined {
363
+ const entry = this.entries.get(sellerId);
364
+ if (!entry) {
365
+ return undefined;
366
+ }
367
+ const ttftMs = finiteNonNegative(metrics.ttftMs);
368
+ const avgInferenceMs = finiteNonNegative(metrics.avgInferenceMs);
369
+ const avgTokensPerSecond = finiteNonNegative(metrics.avgTokensPerSecond);
370
+ const next: PoolEntry = {
371
+ ...entry,
372
+ lastSuccessAt: now,
373
+ healthScore: Math.min(100, Math.max(entry.healthScore, 60)),
374
+ avgLatencyMs: avgInferenceMs ?? entry.avgLatencyMs,
375
+ ttftMs: ttftMs ?? entry.ttftMs,
376
+ avgInferenceMs: avgInferenceMs ?? entry.avgInferenceMs,
377
+ avgTokensPerSecond: avgTokensPerSecond ?? entry.avgTokensPerSecond,
378
+ runtimeMetricsObservedAt: Math.max(entry.runtimeMetricsObservedAt ?? 0, now)
379
+ };
380
+ this.entries.set(sellerId, next);
381
+ logger.info("pool.runtime_metrics.recorded", "seller pool runtime metrics updated", {
382
+ sellerId,
383
+ ttftMs: next.ttftMs,
384
+ avgInferenceMs: next.avgInferenceMs,
385
+ avgTokensPerSecond: next.avgTokensPerSecond
386
+ });
387
+ return next;
388
+ }
389
+
343
390
  /**
344
391
  * Record a failure against `sellerId`. Returns the new PoolEntry. The
345
392
  * caller (route-failover) uses the returned `entry.circuit` and the
@@ -475,6 +522,25 @@ export class SellerPool {
475
522
  }
476
523
  }
477
524
 
525
+ function finiteNonNegative(value: number | undefined): number | undefined {
526
+ return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
527
+ }
528
+
529
+ function preferRuntimeMetric(
530
+ prewarmValue: number | undefined,
531
+ prewarmObservedAt: number | undefined,
532
+ previousValue: number | undefined,
533
+ previousObservedAt: number | undefined
534
+ ): number | undefined {
535
+ if (prewarmValue === undefined) {
536
+ return previousValue;
537
+ }
538
+ if (previousValue !== undefined && (previousObservedAt ?? 0) > (prewarmObservedAt ?? 0)) {
539
+ return previousValue;
540
+ }
541
+ return prewarmValue;
542
+ }
543
+
478
544
  function isCapacityBlocked(entry: PoolEntry, now: number): boolean {
479
545
  return Number.isFinite(entry.capacityBlockedUntil) && (entry.capacityBlockedUntil as number) > now;
480
546
  }
@@ -28,6 +28,8 @@ export interface SellerRouteMetric {
28
28
  ttftMs?: number;
29
29
  /** 平均推理延迟(毫秒),可选 */
30
30
  avgInferenceMs?: number;
31
+ /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
32
+ avgTokensPerSecond?: number;
31
33
  /** 折扣系数(0-1),可选;缺省时 scoring 视为"无折扣信息" */
32
34
  discountRatio?: number;
33
35
  /** 当前熔断状态,可选;`open` 的 seller 直接被剔除候选 */
@@ -52,6 +54,12 @@ export interface SellerRoutePrewarmCandidate {
52
54
  healthScore?: number;
53
55
  /** 平均延迟(毫秒),可选 */
54
56
  avgLatencyMs?: number;
57
+ /** TTFT(毫秒),可选 */
58
+ ttftMs?: number;
59
+ /** 平均推理延迟(毫秒),可选 */
60
+ avgInferenceMs?: number;
61
+ /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
62
+ avgTokensPerSecond?: number;
55
63
  }
56
64
 
57
65
  /**
@@ -103,6 +111,7 @@ export interface PlannedSellerRoute {
103
111
  avgLatencyMs?: number;
104
112
  ttftMs?: number;
105
113
  avgInferenceMs?: number;
114
+ avgTokensPerSecond?: number;
106
115
  discountRatio?: number;
107
116
  /** 在 registry 里的声明顺序(0-based,tie-breaker) */
108
117
  registryOrder: number;
@@ -205,6 +214,7 @@ export function planSellerRouteSet(input: SellerRoutePlannerInput): SellerRouteP
205
214
  avgLatencyMs: candidate.avgLatencyMs,
206
215
  ttftMs: candidate.ttftMs,
207
216
  avgInferenceMs: candidate.avgInferenceMs,
217
+ avgTokensPerSecond: candidate.avgTokensPerSecond,
208
218
  discountRatio: candidate.discountRatio,
209
219
  registryOrder: candidate.registryOrder
210
220
  }
@@ -230,6 +240,7 @@ function chooseCandidateSource(
230
240
  ): CandidateSourceResult {
231
241
  const prewarm = input.prewarmCandidates ?? [];
232
242
  let prewarmDiagnostics: PrewarmSourceDiagnostics = emptyPrewarmDiagnostics();
243
+ const prewarmBySellerId = new Map(prewarm.map((candidate) => [candidate.sellerId, candidate]));
233
244
  if (prewarm.length > 0) {
234
245
  const missingSellerIds: string[] = [];
235
246
  const blockedSellerIds: string[] = [];
@@ -263,26 +274,28 @@ function chooseCandidateSource(
263
274
  };
264
275
 
265
276
  if (prewarmCandidates.length > 0) {
277
+ const registryCandidatesBeforeCompatibility = buildRegistryCandidates({
278
+ input,
279
+ indexed,
280
+ metrics,
281
+ prewarmBySellerId
282
+ });
266
283
  return {
267
284
  source: "prewarm_cache",
268
- sourceReason: "prewarm_candidates_compatible",
269
- candidates: prewarmCandidates,
270
- incompatibleSellerIds: prewarmDiagnostics.incompatibleSellerIds,
285
+ sourceReason: "prewarm_metrics_merged_with_registry",
286
+ candidates: registryCandidatesBeforeCompatibility.filter(isSelectableCandidate),
287
+ incompatibleSellerIds: incompatibleSellerIds(registryCandidatesBeforeCompatibility),
271
288
  prewarmDiagnostics
272
289
  };
273
290
  }
274
291
  }
275
292
 
276
- const registryCandidatesBeforeCompatibility = indexed.ordered
277
- .filter((entry) => !metrics.blockedSellerIds.has(entry.seller.id))
278
- .map((entry) => buildCandidate({
279
- seller: entry.seller,
280
- registryOrder: entry.registryOrder,
281
- modelId: input.modelId,
282
- protocol: input.protocol,
283
- paymentMethod: input.paymentMethod,
284
- metric: metrics.bySellerId.get(entry.seller.id)
285
- }));
293
+ const registryCandidatesBeforeCompatibility = buildRegistryCandidates({
294
+ input,
295
+ indexed,
296
+ metrics,
297
+ prewarmBySellerId
298
+ });
286
299
 
287
300
  return {
288
301
  source: "registry_fallback",
@@ -293,6 +306,27 @@ function chooseCandidateSource(
293
306
  };
294
307
  }
295
308
 
309
+ function buildRegistryCandidates(input: {
310
+ input: SellerRoutePlannerInput;
311
+ indexed: ReturnType<typeof indexRegistrySellers>;
312
+ metrics: MetricIndex;
313
+ prewarmBySellerId: Map<string, SellerRoutePrewarmCandidate>;
314
+ }): RoutingCandidate[] {
315
+ return input.indexed.ordered
316
+ .filter((entry) => !input.metrics.blockedSellerIds.has(entry.seller.id))
317
+ .map((entry) => buildCandidate({
318
+ seller: entry.seller,
319
+ registryOrder: entry.registryOrder,
320
+ modelId: input.input.modelId,
321
+ protocol: input.input.protocol,
322
+ paymentMethod: input.input.paymentMethod,
323
+ metric: mergeOptionalMetric(
324
+ input.metrics.bySellerId.get(entry.seller.id),
325
+ input.prewarmBySellerId.get(entry.seller.id)
326
+ )
327
+ }));
328
+ }
329
+
296
330
  function buildCandidate(input: {
297
331
  seller: RegistrySeller;
298
332
  registryOrder: number;
@@ -311,6 +345,7 @@ function buildCandidate(input: {
311
345
  avgLatencyMs: input.metric?.avgLatencyMs,
312
346
  ttftMs: input.metric?.ttftMs,
313
347
  avgInferenceMs: input.metric?.avgInferenceMs,
348
+ avgTokensPerSecond: input.metric?.avgTokensPerSecond,
314
349
  discountRatio: input.metric?.discountRatio,
315
350
  registryOrder: input.registryOrder
316
351
  };
@@ -417,8 +452,9 @@ function mergeMetric(
417
452
  sellerId: prewarm.sellerId,
418
453
  healthScore: prewarm.healthScore ?? metric?.healthScore,
419
454
  avgLatencyMs: prewarm.avgLatencyMs ?? metric?.avgLatencyMs,
420
- ttftMs: metric?.ttftMs,
421
- avgInferenceMs: metric?.avgInferenceMs,
455
+ ttftMs: metric?.ttftMs ?? prewarm.ttftMs,
456
+ avgInferenceMs: metric?.avgInferenceMs ?? prewarm.avgInferenceMs,
457
+ avgTokensPerSecond: metric?.avgTokensPerSecond ?? prewarm.avgTokensPerSecond,
422
458
  discountRatio: metric?.discountRatio,
423
459
  circuit: metric?.circuit,
424
460
  capacityBlockedUntil: metric?.capacityBlockedUntil,
@@ -427,6 +463,16 @@ function mergeMetric(
427
463
  };
428
464
  }
429
465
 
466
+ function mergeOptionalMetric(
467
+ metric: SellerRouteMetric | undefined,
468
+ prewarm: SellerRoutePrewarmCandidate | undefined
469
+ ): SellerRouteMetric | undefined {
470
+ if (!prewarm) {
471
+ return metric;
472
+ }
473
+ return mergeMetric(metric, prewarm);
474
+ }
475
+
430
476
  function isCapacityBlocked(metric: SellerRouteMetric, now: number): boolean {
431
477
  return Number.isFinite(metric.capacityBlockedUntil) && (metric.capacityBlockedUntil as number) > now;
432
478
  }
@@ -7,7 +7,7 @@
7
7
  export type SellerRoutingMode = "fixed" | "fixedSet" | "fullAuto";
8
8
  /**
9
9
  * 评分器:决定如何把候选的健康/延迟/折扣分折算成总分。
10
- * - `speed`:TTFT / 推理延迟优先
10
+ * - `speed`:TTFT / 输出吞吐优先
11
11
  * - `discount`:折扣系数优先
12
12
  * - `balanced`:三方面加权均衡
13
13
  */
@@ -55,6 +55,8 @@ export interface RoutingCandidate {
55
55
  ttftMs?: number;
56
56
  /** 平均推理延迟(毫秒),可选 */
57
57
  avgInferenceMs?: number;
58
+ /** 最近 10 分钟窗口内的平均输出吞吐(tokens/s),可选 */
59
+ avgTokensPerSecond?: number;
58
60
  /** 折扣系数 0-1,可选;缺省视为"无折扣信息" */
59
61
  discountRatio?: number;
60
62
  /** 上游状态,可选 */
@@ -92,12 +94,12 @@ export interface CandidateScoreBreakdown {
92
94
  healthComponent?: number;
93
95
  /** TTFT 分量(仅 `speed` / `balanced` 有意义) */
94
96
  ttftComponent?: number;
95
- /** 平均推理延迟分量(仅 `speed` / `balanced` 有意义) */
96
- avgInferenceComponent?: number;
97
+ /** 输出吞吐分量(仅 `speed` / `balanced` 有意义) */
98
+ avgTokensPerSecondComponent?: number;
97
99
  /** 折扣分量(仅 `discount` / `balanced` 有意义) */
98
100
  discountComponent?: number;
99
101
  /** 打分时缺失的输入项;缺越多则越说明"无依据" */
100
- missingInputs: Array<"healthScore" | "ttftMs" | "avgInferenceMs" | "discountRatio">;
102
+ missingInputs: Array<"healthScore" | "ttftMs" | "avgTokensPerSecond" | "discountRatio">;
101
103
  }
102
104
 
103
105
  type SortableCandidate = RoutingCandidate & { score: number };
@@ -201,7 +203,7 @@ function compareCandidates(a: SortableCandidate, b: SortableCandidate, scorer: S
201
203
 
202
204
  if (scorer === "speed") {
203
205
  return compareFiniteAsc(effectiveTtftMs(a), effectiveTtftMs(b))
204
- || compareFiniteAsc(effectiveAvgInferenceMs(a), effectiveAvgInferenceMs(b))
206
+ || compareFiniteDesc(a.avgTokensPerSecond, b.avgTokensPerSecond)
205
207
  || compareFiniteDesc(a.healthScore, b.healthScore)
206
208
  || compareRegistryOrder(a, b);
207
209
  }
@@ -227,14 +229,14 @@ export function scoreCandidateBreakdown(candidate: RoutingCandidate, scorer: Sel
227
229
  const missingInputs = missingScoreInputs(candidate);
228
230
  if (scorer === "speed") {
229
231
  const ttftComponent = latencyScore(effectiveTtftMs(candidate)) * 0.65;
230
- const avgInferenceComponent = latencyScore(effectiveAvgInferenceMs(candidate)) * 0.25;
232
+ const avgTokensPerSecondComponent = tokensPerSecondScore(candidate.avgTokensPerSecond) * 0.25;
231
233
  const healthComponent = finiteOr(candidate.healthScore, 0) * 0.1;
232
234
  return {
233
235
  scorer,
234
- totalScore: ttftComponent + avgInferenceComponent + healthComponent,
236
+ totalScore: ttftComponent + avgTokensPerSecondComponent + healthComponent,
235
237
  healthComponent,
236
238
  ttftComponent,
237
- avgInferenceComponent,
239
+ avgTokensPerSecondComponent,
238
240
  missingInputs
239
241
  };
240
242
  }
@@ -251,14 +253,14 @@ export function scoreCandidateBreakdown(candidate: RoutingCandidate, scorer: Sel
251
253
 
252
254
  const healthComponent = finiteOr(candidate.healthScore, 0) * 0.35;
253
255
  const ttftComponent = latencyScore(effectiveTtftMs(candidate)) * 0.2;
254
- const avgInferenceComponent = latencyScore(effectiveAvgInferenceMs(candidate)) * 0.2;
256
+ const avgTokensPerSecondComponent = tokensPerSecondScore(candidate.avgTokensPerSecond) * 0.2;
255
257
  const discountComponent = discountScore(candidate.discountRatio) * 0.25;
256
258
  return {
257
259
  scorer,
258
- totalScore: healthComponent + ttftComponent + avgInferenceComponent + discountComponent,
260
+ totalScore: healthComponent + ttftComponent + avgTokensPerSecondComponent + discountComponent,
259
261
  healthComponent,
260
262
  ttftComponent,
261
- avgInferenceComponent,
263
+ avgTokensPerSecondComponent,
262
264
  discountComponent,
263
265
  missingInputs
264
266
  };
@@ -271,6 +273,13 @@ function latencyScore(latencyMs: number | undefined): number {
271
273
  return Math.max(0, 100 - Math.max(0, latencyMs as number) / 10);
272
274
  }
273
275
 
276
+ function tokensPerSecondScore(value: number | undefined): number {
277
+ if (!Number.isFinite(value)) {
278
+ return 0;
279
+ }
280
+ return Math.max(0, Math.min(100, value as number));
281
+ }
282
+
274
283
  function discountScore(discountRatio: number | undefined): number {
275
284
  if (!Number.isFinite(discountRatio)) {
276
285
  return 0;
@@ -294,10 +303,6 @@ function effectiveTtftMs(candidate: RoutingCandidate): number | undefined {
294
303
  return candidate.ttftMs ?? candidate.healthProbeLatencyMs ?? candidate.avgLatencyMs;
295
304
  }
296
305
 
297
- function effectiveAvgInferenceMs(candidate: RoutingCandidate): number | undefined {
298
- return candidate.avgInferenceMs ?? candidate.avgLatencyMs ?? candidate.healthProbeLatencyMs;
299
- }
300
-
301
306
  function compareRegistryOrder(a: RoutingCandidate, b: RoutingCandidate): number {
302
307
  return a.registryOrder - b.registryOrder;
303
308
  }
@@ -310,7 +315,7 @@ function missingScoreInputs(candidate: RoutingCandidate): CandidateScoreBreakdow
310
315
  const missing: CandidateScoreBreakdown["missingInputs"] = [];
311
316
  if (!Number.isFinite(candidate.healthScore)) missing.push("healthScore");
312
317
  if (!Number.isFinite(candidate.ttftMs)) missing.push("ttftMs");
313
- if (!Number.isFinite(candidate.avgInferenceMs)) missing.push("avgInferenceMs");
318
+ if (!Number.isFinite(candidate.avgTokensPerSecond)) missing.push("avgTokensPerSecond");
314
319
  if (!Number.isFinite(candidate.discountRatio)) missing.push("discountRatio");
315
320
  return missing;
316
321
  }