npm - @tokenbuddy/tokenbuddy - Versions diffs - 1.0.29 → 1.0.31 - Mend

@tokenbuddy/tokenbuddy 1.0.29 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/dist/src/daemon.d.ts +11 -4
package/dist/src/daemon.d.ts.map +1 -1
package/dist/src/daemon.js +130 -42
package/dist/src/daemon.js.map +1 -1
package/dist/src/doctor-diagnostics.d.ts.map +1 -1
package/dist/src/doctor-diagnostics.js +7 -1
package/dist/src/doctor-diagnostics.js.map +1 -1
package/dist/src/prewarm-cache.d.ts +4 -0
package/dist/src/prewarm-cache.d.ts.map +1 -1
package/dist/src/prewarm-cache.js +1 -0
package/dist/src/prewarm-cache.js.map +1 -1
package/dist/src/prewarm-scheduler.d.ts +2 -0
package/dist/src/prewarm-scheduler.d.ts.map +1 -1
package/dist/src/prewarm-scheduler.js +4 -1
package/dist/src/prewarm-scheduler.js.map +1 -1
package/dist/src/provider-install.d.ts.map +1 -1
package/dist/src/provider-install.js +196 -18
package/dist/src/provider-install.js.map +1 -1
package/dist/src/seller-catalog.d.ts +4 -0
package/dist/src/seller-catalog.d.ts.map +1 -1
package/dist/src/seller-catalog.js.map +1 -1
package/dist/src/seller-pool.d.ts +13 -0
package/dist/src/seller-pool.d.ts.map +1 -1
package/dist/src/seller-pool.js +43 -2
package/dist/src/seller-pool.js.map +1 -1
package/dist/src/seller-route-planner.d.ts +9 -0
package/dist/src/seller-route-planner.d.ts.map +1 -1
package/dist/src/seller-route-planner.js +39 -15
package/dist/src/seller-route-planner.js.map +1 -1
package/dist/src/seller-routing-strategy.d.ts +6 -4
package/dist/src/seller-routing-strategy.d.ts.map +1 -1
package/dist/src/seller-routing-strategy.js +15 -12
package/dist/src/seller-routing-strategy.js.map +1 -1
package/dist/src/terminal-detect.d.ts +5 -5
package/dist/src/terminal-detect.d.ts.map +1 -1
package/dist/src/terminal-detect.js +79 -26
package/dist/src/terminal-detect.js.map +1 -1
package/package.json +1 -1
package/src/daemon.ts +168 -46
package/src/doctor-diagnostics.ts +5 -1
package/src/prewarm-cache.ts +5 -0
package/src/prewarm-scheduler.ts +6 -1
package/src/provider-install.ts +203 -18
package/src/seller-catalog.ts +4 -0
package/src/seller-pool.ts +68 -2
package/src/seller-route-planner.ts +61 -15
package/src/seller-routing-strategy.ts +21 -16
package/src/terminal-detect.ts +81 -24
package/static/ui/assets/index-DEDEl8o2.js +236 -0
package/static/ui/assets/{index-UAfOhbwC.js.map → index-DEDEl8o2.js.map} +1 -1
package/static/ui/index.html +1 -1
package/tests/control-plane-ui-endpoints.test.ts +73 -0
package/tests/seller-pool.test.ts +55 -0
package/tests/seller-route-planner.test.ts +45 -1
package/tests/seller-routing-strategy.test.ts +6 -5
package/tests/tokenbuddy.test.ts +346 -38
package/static/ui/assets/index-UAfOhbwC.js +0 -236

package/src/provider-install.ts CHANGED Viewed

@@ -163,6 +163,10 @@ interface ProviderDefinition {
   protocolPreference?: ProtocolPreference;
 }
+function isPlainRecord(value: unknown): value is Record<string, unknown> {
+  return Boolean(value && typeof value === "object" && !Array.isArray(value));
+}
 function resolveHome(home?: string): string {
   return home && home.trim() ? home : os.homedir();
 }
@@ -209,6 +213,124 @@ function readJsonObject(filePath: string): Record<string, unknown> {
   }
 }
+function parseYamlScalar(value: string): unknown {
+  const trimmed = value.trim();
+  if (!trimmed) {
+    return "";
+  }
+  if (trimmed === "true") {
+    return true;
+  }
+  if (trimmed === "false") {
+    return false;
+  }
+  if (trimmed === "null") {
+    return null;
+  }
+  if ((trimmed.startsWith('"') && trimmed.endsWith('"')) || (trimmed.startsWith("'") && trimmed.endsWith("'"))) {
+    return trimmed.slice(1, -1);
+  }
+  const numeric = Number(trimmed);
+  if (Number.isFinite(numeric) && /^-?\d+(?:\.\d+)?$/.test(trimmed)) {
+    return numeric;
+  }
+  return trimmed;
+}
+function parseSimpleYamlObject(text: string): Record<string, unknown> {
+  const root: Record<string, unknown> = {};
+  const stack: Array<{ indent: number; value: Record<string, unknown> }> = [{ indent: -1, value: root }];
+  for (const rawLine of text.split(/\r?\n/)) {
+    if (!rawLine.trim() || rawLine.trimStart().startsWith("#")) {
+      continue;
+    }
+    const indent = rawLine.match(/^ */)?.[0].length ?? 0;
+    const trimmed = rawLine.trim();
+    const separatorIndex = trimmed.indexOf(":");
+    if (separatorIndex <= 0) {
+      continue;
+    }
+    const key = trimmed.slice(0, separatorIndex).trim();
+    const rest = trimmed.slice(separatorIndex + 1).trim();
+    while (stack.length > 1 && indent <= stack[stack.length - 1].indent) {
+      stack.pop();
+    }
+    const parent = stack[stack.length - 1].value;
+    if (!rest) {
+      const child = isPlainRecord(parent[key]) ? parent[key] as Record<string, unknown> : {};
+      parent[key] = child;
+      stack.push({ indent, value: child });
+    } else {
+      parent[key] = parseYamlScalar(rest);
+    }
+  }
+  return root;
+}
+function readYamlObject(filePath: string): Record<string, unknown> {
+  const text = readText(filePath);
+  if (!text) {
+    return {};
+  }
+  return parseSimpleYamlObject(text);
+}
+function yamlScalarContent(value: unknown): string {
+  if (typeof value === "number" || typeof value === "boolean") {
+    return String(value);
+  }
+  if (value === null) {
+    return "null";
+  }
+  const text = String(value ?? "");
+  if (!text || /[:#\n\r\t]|^\s|\s$|^(true|false|null)$/i.test(text) || /^-?\d+(?:\.\d+)?$/.test(text)) {
+    return JSON.stringify(text);
+  }
+  return text;
+}
+function yamlContent(value: Record<string, unknown>): string {
+  const lines: string[] = [];
+  const writeObject = (objectValue: Record<string, unknown>, indent: number): void => {
+    for (const [key, entry] of Object.entries(objectValue)) {
+      const prefix = " ".repeat(indent);
+      if (isPlainRecord(entry)) {
+        lines.push(`${prefix}${key}:`);
+        writeObject(entry, indent + 2);
+      } else {
+        lines.push(`${prefix}${key}: ${yamlScalarContent(entry)}`);
+      }
+    }
+  };
+  writeObject(value, 0);
+  return `${lines.join("\n")}\n`;
+}
+function replaceTopLevelYamlSection(existing: string, sectionName: string, sectionBody: string): string {
+  const lines = existing.split(/\r?\n/);
+  const sectionStart = lines.findIndex((line) => {
+    return line === `${sectionName}:` || line.startsWith(`${sectionName}: `);
+  });
+  const bodyLines = [`${sectionName}:`, ...sectionBody.trimEnd().split(/\r?\n/).map((line) => `  ${line}`)];
+  if (sectionStart < 0) {
+    const prefix = existing.trimEnd();
+    return `${prefix}${prefix ? "\n" : ""}${bodyLines.join("\n")}\n`;
+  }
+  let sectionEnd = sectionStart + 1;
+  while (sectionEnd < lines.length) {
+    const line = lines[sectionEnd];
+    if (line.trim() && !line.startsWith(" ") && !line.startsWith("\t")) {
+      break;
+    }
+    sectionEnd += 1;
+  }
+  return `${[
+    ...lines.slice(0, sectionStart),
+    ...bodyLines,
+    ...lines.slice(sectionEnd),
+  ].join("\n").replace(/\n*$/, "")}\n`;
+}
 function readObjectField(value: unknown, key: string): Record<string, unknown> | undefined {
   if (!value || typeof value !== "object" || Array.isArray(value)) {
     return undefined;
@@ -493,14 +615,57 @@ function claudeDesktopConfig(home: string, proxyUrl: string, config: ProviderRun
 function openclawConfig(home: string, proxyUrl: string, config: ProviderRuntimeConfig): ProviderFileChange[] {
   const model = pickConfiguredModel(config);
-  const configPath = path.join(home, ".openclaw", "config.json");
+  const configPath = path.join(home, ".openclaw", "openclaw.json");
   const current = readJsonObject(configPath);
-  current.api_url = proxyUrl;
-  current.api_key = PROXY_ACCESS_TOKEN_PLACEHOLDER;
-  current.model = model;
+  const models = isPlainRecord(current.models) ? current.models : {};
+  const providers = isPlainRecord(models.providers) ? models.providers : {};
+  const existingProvider = isPlainRecord(providers.tokenbuddy) ? providers.tokenbuddy : {};
+  const existingModels = Array.isArray(existingProvider.models) ? existingProvider.models : [];
+  const nextModels = [
+    ...existingModels.filter((entry) => {
+      return !(isPlainRecord(entry) && entry.id === model);
+    }),
+    {
+      id: model,
+      name: model,
+      api: "openai-completions",
+      input: ["text", "image"],
+    },
+  ];
+  providers.tokenbuddy = {
+    ...existingProvider,
+    baseUrl: openAiBaseUrl(proxyUrl),
+    apiKey: PROXY_ACCESS_TOKEN_PLACEHOLDER,
+    auth: "api-key",
+    api: "openai-completions",
+    models: nextModels,
+  };
+  models.providers = providers;
+  current.models = models;
+  const agents = isPlainRecord(current.agents) ? current.agents : {};
+  const defaults = isPlainRecord(agents.defaults) ? agents.defaults : {};
+  defaults.model = `tokenbuddy/${model}`;
+  agents.defaults = defaults;
+  current.agents = agents;
   return [makeChange("openclaw", configPath, "configure OpenClaw proxy settings", jsonContent(current))];
 }
+function isOpenclawTokenBuddyConfigured(filePath: string): boolean {
+  const current = readJsonObject(filePath);
+  const tokenbuddy = readObjectField(readObjectField(readObjectField(current, "models"), "providers"), "tokenbuddy");
+  const defaults = readObjectField(readObjectField(current, "agents"), "defaults");
+  if (!tokenbuddy || !defaults) {
+    return false;
+  }
+  const defaultModel = defaults.model;
+  return tokenbuddy.apiKey === PROXY_ACCESS_TOKEN_PLACEHOLDER &&
+    typeof tokenbuddy.baseUrl === "string" &&
+    tokenbuddy.baseUrl.includes("127.0.0.1") &&
+    tokenbuddy.baseUrl.endsWith("/v1") &&
+    typeof defaultModel === "string" &&
+    defaultModel.startsWith("tokenbuddy/");
+}
 function openAiBaseUrl(proxyUrl: string): string {
   const normalized = proxyUrl.replace(/\/+$/, "");
   return normalized.endsWith("/v1") ? normalized : `${normalized}/v1`;
@@ -556,18 +721,36 @@ function isOpencodeTokenBuddyConfigured(filePath: string): boolean {
 function hermesConfig(home: string, proxyUrl: string, config: ProviderRuntimeConfig): ProviderFileChange[] {
   const model = pickConfiguredModel(config);
-  const configPath = path.join(home, ".hermes", "settings.json");
-  const current = readJsonObject(configPath);
-  const openai = current.openai && typeof current.openai === "object" && !Array.isArray(current.openai)
-    ? (current.openai as Record<string, unknown>)
-    : {};
-  current.openai = {
-    ...openai,
-    base_url: proxyUrl,
+  const configPath = path.join(home, ".hermes", "config.yaml");
+  const existing = readText(configPath) || "";
+  const current = parseSimpleYamlObject(existing);
+  const modelConfig = isPlainRecord(current.model) ? current.model : {};
+  const nextModelConfig = {
+    ...modelConfig,
+    default: model,
+    provider: "custom",
+    base_url: openAiBaseUrl(proxyUrl),
     api_key: PROXY_ACCESS_TOKEN_PLACEHOLDER,
-    model,
+    api_mode: "chat_completions",
   };
-  return [makeChange("hermes", configPath, "configure Hermes OpenAI proxy settings", jsonContent(current))];
+  const content = replaceTopLevelYamlSection(existing, "model", yamlContent(nextModelConfig));
+  return [makeChange("hermes", configPath, "configure Hermes OpenAI proxy settings", content)];
+}
+function isHermesTokenBuddyConfigured(filePath: string): boolean {
+  const current = readYamlObject(filePath);
+  const modelConfig = readObjectField(current, "model");
+  if (!modelConfig) {
+    return false;
+  }
+  return modelConfig.provider === "custom" &&
+    modelConfig.api_key === PROXY_ACCESS_TOKEN_PLACEHOLDER &&
+    modelConfig.api_mode === "chat_completions" &&
+    typeof modelConfig.base_url === "string" &&
+    modelConfig.base_url.includes("127.0.0.1") &&
+    modelConfig.base_url.endsWith("/v1") &&
+    typeof modelConfig.default === "string" &&
+    modelConfig.default.length > 0;
 }
 const PROVIDERS: ProviderDefinition[] = [
@@ -601,10 +784,11 @@ const PROVIDERS: ProviderDefinition[] = [
     id: "openclaw",
     name: "OpenClaw Agent",
     commandName: "openclaw",
-    configPath: (home) => path.join(home, ".openclaw", "config.json"),
+    configPath: (home) => path.join(home, ".openclaw", "openclaw.json"),
+    isConfigured: isOpenclawTokenBuddyConfigured,
     observedPaths: (home) => [
-      path.join(home, ".openclaw", "openclaw.json"),
       path.join(home, ".openclaw", "configs"),
+      path.join(home, ".openclaw", "config.json"),
     ],
     changes: openclawConfig,
     modelSelectionKind: "single-model",
@@ -624,9 +808,10 @@ const PROVIDERS: ProviderDefinition[] = [
     id: "hermes",
     name: "Hermes Terminal",
     commandName: "hermes",
-    configPath: (home) => path.join(home, ".hermes", "settings.json"),
+    configPath: (home) => path.join(home, ".hermes", "config.yaml"),
+    isConfigured: isHermesTokenBuddyConfigured,
     observedPaths: (home) => [
-      path.join(home, ".hermes", "config.yaml"),
+      path.join(home, ".hermes", "settings.json"),
       path.join(home, ".hermes", "auth.json"),
     ],
     changes: hermesConfig,

package/src/seller-catalog.ts CHANGED Viewed

@@ -174,6 +174,10 @@ export interface SellerCatalogEntry {
   discountRatio?: number;
   /** 服务手续费系数（来自 manifest.selection） */
   serviceFeeRatio?: number;
+  /** 最近一次 TTFT（毫秒），来自本地 seller pool 运行时指标 */
+  ttftMs?: number;
+  /** 最近 10 分钟窗口内的平均输出吞吐（tokens/s），来自本地 seller pool 运行时指标 */
+  avgTokensPerSecond?: number;
   /** 模型数（来自 manifest） */
   modelCount?: number;
   /** seller 支持的协议（manifest > registry fallback） */

package/src/seller-pool.ts CHANGED Viewed

@@ -65,6 +65,10 @@ export interface PoolEntry {
   ttftMs?: number;
   /** 平均推理延迟（毫秒），可选 */
   avgInferenceMs?: number;
+  /** 最近 10 分钟窗口内的平均输出吞吐（tokens/s），可选 */
+  avgTokensPerSecond?: number;
+  /** 最近一次 runtime speed 指标观测时间；用于避免旧 prewarm 覆盖 live inference 指标 */
+  runtimeMetricsObservedAt?: number;
   /** 上游状态，可选 */
   upstreamStatus?: "healthy" | "degraded" | "unhealthy" | "unknown";
   /** 上游错误类名，可选 */
@@ -73,6 +77,15 @@ export interface PoolEntry {
   capacityBlockedUntil?: number;
 }
+export interface SellerRuntimeMetricsUpdate {
+  /** TTFT（毫秒），可选 */
+  ttftMs?: number;
+  /** 平均推理延迟（毫秒），可选 */
+  avgInferenceMs?: number;
+  /** 输出吞吐（tokens/s），可选 */
+  avgTokensPerSecond?: number;
+}
 /**
  * `SellerPool.pick()` 的入参：标识一次路由请求 + 可选的时间/数量约束。
  */
@@ -214,8 +227,10 @@ export class SellerPool {
           healthScore: candidate.healthScore,
           avgLatencyMs: candidate.avgLatencyMs,
           healthProbeLatencyMs: candidate.healthProbeLatencyMs,
-          ttftMs: candidate.ttftMs,
-          avgInferenceMs: candidate.avgInferenceMs,
+          ttftMs: preferRuntimeMetric(candidate.ttftMs, candidate.lastSuccessAt, previous?.ttftMs, previous?.runtimeMetricsObservedAt),
+          avgInferenceMs: preferRuntimeMetric(candidate.avgInferenceMs, candidate.lastSuccessAt, previous?.avgInferenceMs, previous?.runtimeMetricsObservedAt),
+          avgTokensPerSecond: preferRuntimeMetric(candidate.avgTokensPerSecond, candidate.lastSuccessAt, previous?.avgTokensPerSecond, previous?.runtimeMetricsObservedAt),
+          runtimeMetricsObservedAt: Math.max(previous?.runtimeMetricsObservedAt ?? 0, candidate.lastSuccessAt || 0) || undefined,
           upstreamStatus: candidate.upstreamStatus,
           upstreamErrorClass: candidate.upstreamErrorClass,
           capacityBlockedUntil: candidate.capacityBlockedUntil ?? previous?.capacityBlockedUntil
@@ -340,6 +355,38 @@ export class SellerPool {
     return next;
   }
+  recordRuntimeMetrics(
+    sellerId: string,
+    metrics: SellerRuntimeMetricsUpdate,
+    now: number = this.now()
+  ): PoolEntry | undefined {
+    const entry = this.entries.get(sellerId);
+    if (!entry) {
+      return undefined;
+    }
+    const ttftMs = finiteNonNegative(metrics.ttftMs);
+    const avgInferenceMs = finiteNonNegative(metrics.avgInferenceMs);
+    const avgTokensPerSecond = finiteNonNegative(metrics.avgTokensPerSecond);
+    const next: PoolEntry = {
+      ...entry,
+      lastSuccessAt: now,
+      healthScore: Math.min(100, Math.max(entry.healthScore, 60)),
+      avgLatencyMs: avgInferenceMs ?? entry.avgLatencyMs,
+      ttftMs: ttftMs ?? entry.ttftMs,
+      avgInferenceMs: avgInferenceMs ?? entry.avgInferenceMs,
+      avgTokensPerSecond: avgTokensPerSecond ?? entry.avgTokensPerSecond,
+      runtimeMetricsObservedAt: Math.max(entry.runtimeMetricsObservedAt ?? 0, now)
+    };
+    this.entries.set(sellerId, next);
+    logger.info("pool.runtime_metrics.recorded", "seller pool runtime metrics updated", {
+      sellerId,
+      ttftMs: next.ttftMs,
+      avgInferenceMs: next.avgInferenceMs,
+      avgTokensPerSecond: next.avgTokensPerSecond
+    });
+    return next;
+  }
   /**
    * Record a failure against `sellerId`. Returns the new PoolEntry. The
    * caller (route-failover) uses the returned `entry.circuit` and the
@@ -475,6 +522,25 @@ export class SellerPool {
   }
 }
+function finiteNonNegative(value: number | undefined): number | undefined {
+  return Number.isFinite(value) ? Math.max(0, value as number) : undefined;
+}
+function preferRuntimeMetric(
+  prewarmValue: number | undefined,
+  prewarmObservedAt: number | undefined,
+  previousValue: number | undefined,
+  previousObservedAt: number | undefined
+): number | undefined {
+  if (prewarmValue === undefined) {
+    return previousValue;
+  }
+  if (previousValue !== undefined && (previousObservedAt ?? 0) > (prewarmObservedAt ?? 0)) {
+    return previousValue;
+  }
+  return prewarmValue;
+}
 function isCapacityBlocked(entry: PoolEntry, now: number): boolean {
   return Number.isFinite(entry.capacityBlockedUntil) && (entry.capacityBlockedUntil as number) > now;
 }

package/src/seller-route-planner.ts CHANGED Viewed

@@ -28,6 +28,8 @@ export interface SellerRouteMetric {
   ttftMs?: number;
   /** 平均推理延迟（毫秒），可选 */
   avgInferenceMs?: number;
+  /** 最近 10 分钟窗口内的平均输出吞吐（tokens/s），可选 */
+  avgTokensPerSecond?: number;
   /** 折扣系数（0-1），可选；缺省时 scoring 视为"无折扣信息" */
   discountRatio?: number;
   /** 当前熔断状态，可选；`open` 的 seller 直接被剔除候选 */
@@ -52,6 +54,12 @@ export interface SellerRoutePrewarmCandidate {
   healthScore?: number;
   /** 平均延迟（毫秒），可选 */
   avgLatencyMs?: number;
+  /** TTFT（毫秒），可选 */
+  ttftMs?: number;
+  /** 平均推理延迟（毫秒），可选 */
+  avgInferenceMs?: number;
+  /** 最近 10 分钟窗口内的平均输出吞吐（tokens/s），可选 */
+  avgTokensPerSecond?: number;
 }
 /**
@@ -103,6 +111,7 @@ export interface PlannedSellerRoute {
     avgLatencyMs?: number;
     ttftMs?: number;
     avgInferenceMs?: number;
+    avgTokensPerSecond?: number;
     discountRatio?: number;
     /** 在 registry 里的声明顺序（0-based，tie-breaker） */
     registryOrder: number;
@@ -205,6 +214,7 @@ export function planSellerRouteSet(input: SellerRoutePlannerInput): SellerRouteP
         avgLatencyMs: candidate.avgLatencyMs,
         ttftMs: candidate.ttftMs,
         avgInferenceMs: candidate.avgInferenceMs,
+        avgTokensPerSecond: candidate.avgTokensPerSecond,
         discountRatio: candidate.discountRatio,
         registryOrder: candidate.registryOrder
       }
@@ -230,6 +240,7 @@ function chooseCandidateSource(
 ): CandidateSourceResult {
   const prewarm = input.prewarmCandidates ?? [];
   let prewarmDiagnostics: PrewarmSourceDiagnostics = emptyPrewarmDiagnostics();
+  const prewarmBySellerId = new Map(prewarm.map((candidate) => [candidate.sellerId, candidate]));
   if (prewarm.length > 0) {
     const missingSellerIds: string[] = [];
     const blockedSellerIds: string[] = [];
@@ -263,26 +274,28 @@ function chooseCandidateSource(
     };
     if (prewarmCandidates.length > 0) {
+      const registryCandidatesBeforeCompatibility = buildRegistryCandidates({
+        input,
+        indexed,
+        metrics,
+        prewarmBySellerId
+      });
       return {
         source: "prewarm_cache",
-        sourceReason: "prewarm_candidates_compatible",
-        candidates: prewarmCandidates,
-        incompatibleSellerIds: prewarmDiagnostics.incompatibleSellerIds,
+        sourceReason: "prewarm_metrics_merged_with_registry",
+        candidates: registryCandidatesBeforeCompatibility.filter(isSelectableCandidate),
+        incompatibleSellerIds: incompatibleSellerIds(registryCandidatesBeforeCompatibility),
         prewarmDiagnostics
       };
     }
   }
-  const registryCandidatesBeforeCompatibility = indexed.ordered
-    .filter((entry) => !metrics.blockedSellerIds.has(entry.seller.id))
-    .map((entry) => buildCandidate({
-      seller: entry.seller,
-      registryOrder: entry.registryOrder,
-      modelId: input.modelId,
-      protocol: input.protocol,
-      paymentMethod: input.paymentMethod,
-      metric: metrics.bySellerId.get(entry.seller.id)
-    }));
+  const registryCandidatesBeforeCompatibility = buildRegistryCandidates({
+    input,
+    indexed,
+    metrics,
+    prewarmBySellerId
+  });
   return {
     source: "registry_fallback",
@@ -293,6 +306,27 @@ function chooseCandidateSource(
   };
 }
+function buildRegistryCandidates(input: {
+  input: SellerRoutePlannerInput;
+  indexed: ReturnType<typeof indexRegistrySellers>;
+  metrics: MetricIndex;
+  prewarmBySellerId: Map<string, SellerRoutePrewarmCandidate>;
+}): RoutingCandidate[] {
+  return input.indexed.ordered
+    .filter((entry) => !input.metrics.blockedSellerIds.has(entry.seller.id))
+    .map((entry) => buildCandidate({
+      seller: entry.seller,
+      registryOrder: entry.registryOrder,
+      modelId: input.input.modelId,
+      protocol: input.input.protocol,
+      paymentMethod: input.input.paymentMethod,
+      metric: mergeOptionalMetric(
+        input.metrics.bySellerId.get(entry.seller.id),
+        input.prewarmBySellerId.get(entry.seller.id)
+      )
+    }));
+}
 function buildCandidate(input: {
   seller: RegistrySeller;
   registryOrder: number;
@@ -311,6 +345,7 @@ function buildCandidate(input: {
     avgLatencyMs: input.metric?.avgLatencyMs,
     ttftMs: input.metric?.ttftMs,
     avgInferenceMs: input.metric?.avgInferenceMs,
+    avgTokensPerSecond: input.metric?.avgTokensPerSecond,
     discountRatio: input.metric?.discountRatio,
     registryOrder: input.registryOrder
   };
@@ -417,8 +452,9 @@ function mergeMetric(
     sellerId: prewarm.sellerId,
     healthScore: prewarm.healthScore ?? metric?.healthScore,
     avgLatencyMs: prewarm.avgLatencyMs ?? metric?.avgLatencyMs,
-    ttftMs: metric?.ttftMs,
-    avgInferenceMs: metric?.avgInferenceMs,
+    ttftMs: metric?.ttftMs ?? prewarm.ttftMs,
+    avgInferenceMs: metric?.avgInferenceMs ?? prewarm.avgInferenceMs,
+    avgTokensPerSecond: metric?.avgTokensPerSecond ?? prewarm.avgTokensPerSecond,
     discountRatio: metric?.discountRatio,
     circuit: metric?.circuit,
     capacityBlockedUntil: metric?.capacityBlockedUntil,
@@ -427,6 +463,16 @@ function mergeMetric(
   };
 }
+function mergeOptionalMetric(
+  metric: SellerRouteMetric | undefined,
+  prewarm: SellerRoutePrewarmCandidate | undefined
+): SellerRouteMetric | undefined {
+  if (!prewarm) {
+    return metric;
+  }
+  return mergeMetric(metric, prewarm);
+}
 function isCapacityBlocked(metric: SellerRouteMetric, now: number): boolean {
   return Number.isFinite(metric.capacityBlockedUntil) && (metric.capacityBlockedUntil as number) > now;
 }

package/src/seller-routing-strategy.ts CHANGED Viewed

@@ -7,7 +7,7 @@
 export type SellerRoutingMode = "fixed" | "fixedSet" | "fullAuto";
 /**
  * 评分器：决定如何把候选的健康/延迟/折扣分折算成总分。
- * - `speed`：TTFT / 推理延迟优先
+ * - `speed`：TTFT / 输出吞吐优先
  * - `discount`：折扣系数优先
  * - `balanced`：三方面加权均衡
  */
@@ -55,6 +55,8 @@ export interface RoutingCandidate {
   ttftMs?: number;
   /** 平均推理延迟（毫秒），可选 */
   avgInferenceMs?: number;
+  /** 最近 10 分钟窗口内的平均输出吞吐（tokens/s），可选 */
+  avgTokensPerSecond?: number;
   /** 折扣系数 0-1，可选；缺省视为"无折扣信息" */
   discountRatio?: number;
   /** 上游状态，可选 */
@@ -92,12 +94,12 @@ export interface CandidateScoreBreakdown {
   healthComponent?: number;
   /** TTFT 分量（仅 `speed` / `balanced` 有意义） */
   ttftComponent?: number;
-  /** 平均推理延迟分量（仅 `speed` / `balanced` 有意义） */
-  avgInferenceComponent?: number;
+  /** 输出吞吐分量（仅 `speed` / `balanced` 有意义） */
+  avgTokensPerSecondComponent?: number;
   /** 折扣分量（仅 `discount` / `balanced` 有意义） */
   discountComponent?: number;
   /** 打分时缺失的输入项；缺越多则越说明"无依据" */
-  missingInputs: Array<"healthScore" | "ttftMs" | "avgInferenceMs" | "discountRatio">;
+  missingInputs: Array<"healthScore" | "ttftMs" | "avgTokensPerSecond" | "discountRatio">;
 }
 type SortableCandidate = RoutingCandidate & { score: number };
@@ -201,7 +203,7 @@ function compareCandidates(a: SortableCandidate, b: SortableCandidate, scorer: S
   if (scorer === "speed") {
     return compareFiniteAsc(effectiveTtftMs(a), effectiveTtftMs(b))
-      || compareFiniteAsc(effectiveAvgInferenceMs(a), effectiveAvgInferenceMs(b))
+      || compareFiniteDesc(a.avgTokensPerSecond, b.avgTokensPerSecond)
       || compareFiniteDesc(a.healthScore, b.healthScore)
       || compareRegistryOrder(a, b);
   }
@@ -227,14 +229,14 @@ export function scoreCandidateBreakdown(candidate: RoutingCandidate, scorer: Sel
   const missingInputs = missingScoreInputs(candidate);
   if (scorer === "speed") {
     const ttftComponent = latencyScore(effectiveTtftMs(candidate)) * 0.65;
-    const avgInferenceComponent = latencyScore(effectiveAvgInferenceMs(candidate)) * 0.25;
+    const avgTokensPerSecondComponent = tokensPerSecondScore(candidate.avgTokensPerSecond) * 0.25;
     const healthComponent = finiteOr(candidate.healthScore, 0) * 0.1;
     return {
       scorer,
-      totalScore: ttftComponent + avgInferenceComponent + healthComponent,
+      totalScore: ttftComponent + avgTokensPerSecondComponent + healthComponent,
       healthComponent,
       ttftComponent,
-      avgInferenceComponent,
+      avgTokensPerSecondComponent,
       missingInputs
     };
   }
@@ -251,14 +253,14 @@ export function scoreCandidateBreakdown(candidate: RoutingCandidate, scorer: Sel
   const healthComponent = finiteOr(candidate.healthScore, 0) * 0.35;
   const ttftComponent = latencyScore(effectiveTtftMs(candidate)) * 0.2;
-  const avgInferenceComponent = latencyScore(effectiveAvgInferenceMs(candidate)) * 0.2;
+  const avgTokensPerSecondComponent = tokensPerSecondScore(candidate.avgTokensPerSecond) * 0.2;
   const discountComponent = discountScore(candidate.discountRatio) * 0.25;
   return {
     scorer,
-    totalScore: healthComponent + ttftComponent + avgInferenceComponent + discountComponent,
+    totalScore: healthComponent + ttftComponent + avgTokensPerSecondComponent + discountComponent,
     healthComponent,
     ttftComponent,
-    avgInferenceComponent,
+    avgTokensPerSecondComponent,
     discountComponent,
     missingInputs
   };
@@ -271,6 +273,13 @@ function latencyScore(latencyMs: number | undefined): number {
   return Math.max(0, 100 - Math.max(0, latencyMs as number) / 10);
 }
+function tokensPerSecondScore(value: number | undefined): number {
+  if (!Number.isFinite(value)) {
+    return 0;
+  }
+  return Math.max(0, Math.min(100, value as number));
+}
 function discountScore(discountRatio: number | undefined): number {
   if (!Number.isFinite(discountRatio)) {
     return 0;
@@ -294,10 +303,6 @@ function effectiveTtftMs(candidate: RoutingCandidate): number | undefined {
   return candidate.ttftMs ?? candidate.healthProbeLatencyMs ?? candidate.avgLatencyMs;
 }
-function effectiveAvgInferenceMs(candidate: RoutingCandidate): number | undefined {
-  return candidate.avgInferenceMs ?? candidate.avgLatencyMs ?? candidate.healthProbeLatencyMs;
-}
 function compareRegistryOrder(a: RoutingCandidate, b: RoutingCandidate): number {
   return a.registryOrder - b.registryOrder;
 }
@@ -310,7 +315,7 @@ function missingScoreInputs(candidate: RoutingCandidate): CandidateScoreBreakdow
   const missing: CandidateScoreBreakdown["missingInputs"] = [];
   if (!Number.isFinite(candidate.healthScore)) missing.push("healthScore");
   if (!Number.isFinite(candidate.ttftMs)) missing.push("ttftMs");
-  if (!Number.isFinite(candidate.avgInferenceMs)) missing.push("avgInferenceMs");
+  if (!Number.isFinite(candidate.avgTokensPerSecond)) missing.push("avgTokensPerSecond");
   if (!Number.isFinite(candidate.discountRatio)) missing.push("discountRatio");
   return missing;
 }