@tokenbuddy/tokenbuddy 1.0.40 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,7 @@ import { PrewarmScheduler } from "./prewarm-scheduler.js";
21
21
  import { SellerConcurrencyLimiter } from "./seller-concurrency-limiter.js";
22
22
  import { SellerMetadataCache } from "./seller-metadata-cache.js";
23
23
  import { planSellerRouteSet } from "./seller-route-planner.js";
24
+ import { buildSellerRouteRecommendations } from "./seller-route-recommendations.js";
24
25
  import { assertSellerRoutingConfig, mergeSellerRoutingConfig, normalizeSellerRoutingConfig, parseSellerIdList, ROUTING_CONFIG_KEY } from "./seller-routing-config.js";
25
26
  import { AUTO_PROVIDER_CONFIG_KEY, MANUAL_PROVIDER_CONFIG_KEY, MANUAL_PROVIDER_OBSERVATIONS_CONFIG_KEY, PROVIDER_MODE_CONFIG_KEY, defaultProviderModeConfig, normalizeAutoProviderConfig, normalizeManualProviderObservationsConfig, normalizeManualProviderConfig, normalizeManualProvidersConfig, normalizeProviderModeConfig, publicManualProviderConfig } from "./provider-routing-config.js";
26
27
  import { assertInitSetupSteps, buildCompletedInitSetupMarker, INIT_SETUP_CONFIG_KEY, INIT_SETUP_STEPS, isFreshInitMachine, normalizeInitSetupMarker, resolveInitRecommendedModels, } from "./init-setup.js";
@@ -193,6 +194,13 @@ function safeBillingServiceTier(value) {
193
194
  function safeBillingUnit(value) {
194
195
  return value === "tokens" || value === "images" ? value : undefined;
195
196
  }
197
+ function safeBillingSource(value) {
198
+ if (typeof value !== "string")
199
+ return undefined;
200
+ return ["provider_reported", "seller_computed", "reservation_fallback", "buyer_estimated"].includes(value)
201
+ ? value
202
+ : undefined;
203
+ }
196
204
  function safeShortDisplayString(value) {
197
205
  if (typeof value !== "string")
198
206
  return undefined;
@@ -208,9 +216,11 @@ function billingBreakdownSummary(value) {
208
216
  const inputPriceMicrosPer1m = nonNegativeIntegerField(data.inputPriceMicrosPer1m ?? data.input_price_micros_per_1m);
209
217
  const outputPriceMicrosPer1m = nonNegativeIntegerField(data.outputPriceMicrosPer1m ?? data.output_price_micros_per_1m);
210
218
  const cacheReadPriceMicrosPer1m = nonNegativeIntegerField(data.cacheReadPriceMicrosPer1m ?? data.cache_read_price_micros_per_1m);
219
+ const cacheWritePriceMicrosPer1m = nonNegativeIntegerField(data.cacheWritePriceMicrosPer1m ?? data.cache_write_price_micros_per_1m);
211
220
  const inputCostMicros = nonNegativeIntegerField(data.inputCostMicros ?? data.input_cost_micros);
212
221
  const outputCostMicros = nonNegativeIntegerField(data.outputCostMicros ?? data.output_cost_micros);
213
222
  const cacheReadCostMicros = nonNegativeIntegerField(data.cacheReadCostMicros ?? data.cache_read_cost_micros);
223
+ const cacheWriteCostMicros = nonNegativeIntegerField(data.cacheWriteCostMicros ?? data.cache_write_cost_micros);
214
224
  const originalUsdMicros = nonNegativeIntegerField(data.originalUsdMicros ?? data.original_usd_micros);
215
225
  const billingMultiplier = nonNegativeFiniteField(data.billingMultiplier ?? data.billing_multiplier);
216
226
  if (inputPriceMicrosPer1m === undefined ||
@@ -224,13 +234,17 @@ function billingBreakdownSummary(value) {
224
234
  return undefined;
225
235
  }
226
236
  return {
237
+ billingSource: safeBillingSource(data.billingSource ?? data.billing_source),
238
+ reportedCostUsdMicros: nonNegativeIntegerField(data.reportedCostUsdMicros ?? data.reported_cost_usd_micros),
227
239
  billingUnit: safeBillingUnit(data.billingUnit ?? data.billing_unit),
228
240
  inputPriceMicrosPer1m,
229
241
  outputPriceMicrosPer1m,
230
242
  cacheReadPriceMicrosPer1m,
243
+ cacheWritePriceMicrosPer1m,
231
244
  inputCostMicros,
232
245
  outputCostMicros,
233
246
  cacheReadCostMicros,
247
+ cacheWriteCostMicros,
234
248
  originalUsdMicros,
235
249
  billingMultiplier,
236
250
  serviceTier: safeBillingServiceTier(data.serviceTier ?? data.service_tier),
@@ -243,6 +257,20 @@ function billingBreakdownSummary(value) {
243
257
  imageCostMicrosPerImage: nonNegativeIntegerField(data.imageCostMicrosPerImage ?? data.image_cost_micros_per_image)
244
258
  };
245
259
  }
260
+ function billableUsageSummary(value) {
261
+ const data = usageRecord(value);
262
+ if (!data)
263
+ return undefined;
264
+ return {
265
+ inputTokens: nonNegativeIntegerField(data.inputTokens ?? data.input_tokens),
266
+ outputTokens: nonNegativeIntegerField(data.outputTokens ?? data.output_tokens),
267
+ cacheReadTokens: nonNegativeIntegerField(data.cacheReadTokens ?? data.cache_read_tokens),
268
+ cacheWriteTokens: nonNegativeIntegerField(data.cacheWriteTokens ?? data.cache_write_tokens),
269
+ reasoningTokens: nonNegativeIntegerField(data.reasoningTokens ?? data.reasoning_tokens),
270
+ audioInputTokens: nonNegativeIntegerField(data.audioInputTokens ?? data.audio_input_tokens),
271
+ webSearchCalls: nonNegativeIntegerField(data.webSearchCalls ?? data.web_search_calls)
272
+ };
273
+ }
246
274
  function imageUsageMetadata(responseBody, requestBody) {
247
275
  const responseImages = Array.isArray(responseBody?.data) ? responseBody.data : undefined;
248
276
  const firstImage = responseImages?.find((item) => item && typeof item === "object");
@@ -353,11 +381,25 @@ function parseSellerSettlementObject(raw) {
353
381
  remainingCreditMicros,
354
382
  reservedBalanceMicros: numericHeaderField(parsed.reservedBalanceMicros ?? parsed.reserved_balance_micros),
355
383
  spentMicros: numericHeaderField(parsed.spentMicros ?? parsed.spent_micros),
384
+ pricingCurrency: typeof parsed.pricingCurrency === "string"
385
+ ? parsed.pricingCurrency
386
+ : typeof parsed.pricing_currency === "string"
387
+ ? parsed.pricing_currency
388
+ : undefined,
389
+ settlementUnit: typeof parsed.settlementUnit === "string"
390
+ ? parsed.settlementUnit
391
+ : typeof parsed.settlement_unit === "string"
392
+ ? parsed.settlement_unit
393
+ : undefined,
394
+ usdToMicrosRate: numericHeaderField(parsed.usdToMicrosRate ?? parsed.usd_to_micros_rate),
356
395
  priceVersion: typeof parsed.priceVersion === "string"
357
396
  ? parsed.priceVersion
358
397
  : typeof parsed.price_version === "string"
359
398
  ? parsed.price_version
360
399
  : undefined,
400
+ billingSource: safeBillingSource(parsed.billingSource ?? parsed.billing_source),
401
+ reportedCostUsdMicros: numericHeaderField(parsed.reportedCostUsdMicros ?? parsed.reported_cost_usd_micros),
402
+ billableUsage: billableUsageSummary(parsed.billableUsage ?? parsed.billable_usage),
361
403
  billingBreakdown: billingBreakdownSummary(parsed.billingBreakdown ?? parsed.billing_breakdown)
362
404
  };
363
405
  }
@@ -1951,7 +1993,7 @@ export class TokenbuddyDaemon {
1951
1993
  const registrySellers = reorderDefaultSellerFirst(registry.sellers, registry.defaultSeller);
1952
1994
  await this.refreshSellerRouteMetadata(registrySellers);
1953
1995
  this.sellerPool.ensureRegistrySellers(registrySellers);
1954
- this.scheduleLazyPrewarmIfNeeded(modelId, protocol, paymentMethod);
1996
+ const lazyPrewarmDecision = this.scheduleLazyPrewarmIfNeeded(modelId, protocol, paymentMethod);
1955
1997
  this.sellerPool.recycleOpenCircuits();
1956
1998
  const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
1957
1999
  const concurrencySnapshot = this.sellerConcurrencyLimiter.snapshot();
@@ -1986,6 +2028,26 @@ export class TokenbuddyDaemon {
1986
2028
  routeSourceReason: planned.sourceReason,
1987
2029
  routeReason: planned.reason,
1988
2030
  candidateDiagnostics: planned.diagnostics,
2031
+ prewarmCachePresent: lazyPrewarmDecision.freshness.present,
2032
+ prewarmCacheState: lazyPrewarmDecision.freshness.state,
2033
+ prewarmCacheExpired: lazyPrewarmDecision.freshness.expired,
2034
+ prewarmCacheExpiringSoon: lazyPrewarmDecision.freshness.expiringSoon,
2035
+ prewarmCacheRemainingMs: lazyPrewarmDecision.freshness.remainingMs,
2036
+ prewarmCacheCandidateCount: lazyPrewarmDecision.freshness.entry?.candidates.length ?? 0,
2037
+ prewarmLazyScheduled: lazyPrewarmDecision.scheduled,
2038
+ prewarmLazyReason: lazyPrewarmDecision.reason,
2039
+ poolSnapshotCount: poolById.size,
2040
+ localConcurrencyEnabled: concurrencySnapshot.enabled,
2041
+ plannedRoutes: planned.routes.map((route, index) => ({
2042
+ rank: index + 1,
2043
+ sellerId: route.seller.id,
2044
+ routeState: route.metrics.routeState,
2045
+ healthScore: route.metrics.healthScore,
2046
+ ttftMs: route.metrics.ttftMs,
2047
+ avgTokensPerSecond: route.metrics.avgTokensPerSecond,
2048
+ discountRatio: route.metrics.discountRatio,
2049
+ registryOrder: route.metrics.registryOrder
2050
+ })),
1989
2051
  sellerCount: planned.routes.length,
1990
2052
  selectedSellerId: planned.routes[0]?.seller.id,
1991
2053
  selectedSellerName: planned.routes[0]?.seller.name,
@@ -2071,11 +2133,11 @@ export class TokenbuddyDaemon {
2071
2133
  scheduleLazyPrewarmIfNeeded(modelId, protocol, paymentMethod) {
2072
2134
  const freshness = this.prewarmCache.freshness(modelId, protocol, paymentMethod);
2073
2135
  if (freshness.present && !freshness.expired) {
2074
- return;
2136
+ return { scheduled: false, reason: "fresh", freshness };
2075
2137
  }
2076
2138
  const key = prewarmKey(modelId, protocol, paymentMethod);
2077
2139
  if (this.lazyPrewarmKeys.has(key)) {
2078
- return;
2140
+ return { scheduled: false, reason: "already_scheduled", freshness };
2079
2141
  }
2080
2142
  this.lazyPrewarmKeys.add(key);
2081
2143
  logger.info("prewarm.lazy.scheduled", "lazy prewarm scheduled for requested model", {
@@ -2092,6 +2154,7 @@ export class TokenbuddyDaemon {
2092
2154
  }).finally(() => {
2093
2155
  this.lazyPrewarmKeys.delete(key);
2094
2156
  });
2157
+ return { scheduled: true, reason: "scheduled", freshness };
2095
2158
  }
2096
2159
  schedulePrewarmForModel(input) {
2097
2160
  if (!input.protocol || !input.paymentMethod) {
@@ -2137,6 +2200,22 @@ export class TokenbuddyDaemon {
2137
2200
  }
2138
2201
  return "soft_5xx";
2139
2202
  }
2203
+ classifyRouteException(error, responseStarted) {
2204
+ if (responseStarted) {
2205
+ return "stream_aborted";
2206
+ }
2207
+ if (this.isBuyerDeadlineError(error)) {
2208
+ return "deadline";
2209
+ }
2210
+ return "soft_5xx";
2211
+ }
2212
+ isBuyerDeadlineError(error) {
2213
+ const message = this.failoverErrorMessage(error).toLowerCase();
2214
+ if (message.includes("buyer deadline exceeded")) {
2215
+ return true;
2216
+ }
2217
+ return error instanceof Error && error.name === "AbortError";
2218
+ }
2140
2219
  /**
2141
2220
  * Emit the structured failover log line. The decision itself is
2142
2221
  * produced by `RouteFailover.decide`; this helper exists only to keep
@@ -2160,7 +2239,14 @@ export class TokenbuddyDaemon {
2160
2239
  nextAttempt: context.attempt + 1,
2161
2240
  nextAttemptNumber: context.attempt + 2,
2162
2241
  reason: decision.reason,
2242
+ controllerAction: decision.action,
2243
+ failureKind: context.failureKind,
2163
2244
  status: context.status,
2245
+ protocol: context.protocol,
2246
+ paymentMethod: context.paymentMethod,
2247
+ routePlanSource: context.routePlanSource,
2248
+ routePlanReason: context.routePlanReason,
2249
+ routePlanSellerCount: context.routePlanSellerCount,
2164
2250
  retryDelayMs: decision.retryDelayMs
2165
2251
  });
2166
2252
  return;
@@ -2179,7 +2265,14 @@ export class TokenbuddyDaemon {
2179
2265
  attempt: context.attempt,
2180
2266
  attemptNumber: context.attempt + 1,
2181
2267
  reason: decision.reason,
2268
+ controllerAction: decision.action,
2269
+ failureKind: context.failureKind,
2182
2270
  status: context.status,
2271
+ protocol: context.protocol,
2272
+ paymentMethod: context.paymentMethod,
2273
+ routePlanSource: context.routePlanSource,
2274
+ routePlanReason: context.routePlanReason,
2275
+ routePlanSellerCount: context.routePlanSellerCount,
2183
2276
  wastedCreditMicros: decision.wastedCreditMicros,
2184
2277
  freshPurchase: decision.freshPurchase,
2185
2278
  retryAttemptsBeforeFailover: decision.retryAttemptsBeforeFailover
@@ -2199,7 +2292,13 @@ export class TokenbuddyDaemon {
2199
2292
  attemptNumber: context.attempt + 1,
2200
2293
  action: decision.action,
2201
2294
  reason: decision.reason,
2202
- status: context.status
2295
+ failureKind: context.failureKind,
2296
+ status: context.status,
2297
+ protocol: context.protocol,
2298
+ paymentMethod: context.paymentMethod,
2299
+ routePlanSource: context.routePlanSource,
2300
+ routePlanReason: context.routePlanReason,
2301
+ routePlanSellerCount: context.routePlanSellerCount
2203
2302
  });
2204
2303
  }
2205
2304
  logPaymentProofResolved(route, proofSource, requestId) {
@@ -2365,11 +2464,12 @@ export class TokenbuddyDaemon {
2365
2464
  if (entry.upstreamStatus === "degraded") {
2366
2465
  return "degraded";
2367
2466
  }
2368
- if (Number.isFinite(entry.healthScore)) {
2369
- if (entry.healthScore < 40) {
2467
+ const healthScore = entry.healthScore;
2468
+ if (Number.isFinite(healthScore)) {
2469
+ if (healthScore < 40) {
2370
2470
  return "error";
2371
2471
  }
2372
- if (entry.healthScore < 70) {
2472
+ if (healthScore < 70) {
2373
2473
  return "degraded";
2374
2474
  }
2375
2475
  }
@@ -2408,6 +2508,10 @@ export class TokenbuddyDaemon {
2408
2508
  promptTokens: 0,
2409
2509
  completionTokens: 0,
2410
2510
  cacheReadTokens: 0,
2511
+ cacheWriteTokens: 0,
2512
+ reasoningTokens: 0,
2513
+ audioInputTokens: 0,
2514
+ webSearchCalls: 0,
2411
2515
  billedMicros: 0
2412
2516
  };
2413
2517
  if (!bodyText.trim()) {
@@ -2425,6 +2529,10 @@ export class TokenbuddyDaemon {
2425
2529
  const usage = usageRecord(data?.usage) ?? usageRecord(usageRecord(data?.response)?.usage);
2426
2530
  const promptDetails = usageRecord(usage?.prompt_tokens_details);
2427
2531
  const inputDetails = usageRecord(usage?.input_tokens_details);
2532
+ const completionDetails = usageRecord(usage?.completion_tokens_details);
2533
+ const outputDetails = usageRecord(usage?.output_tokens_details);
2534
+ const cacheCreation = usageRecord(usage?.cache_creation);
2535
+ const serverToolUse = usageRecord(usage?.server_tool_use);
2428
2536
  const promptTokens = nonNegativeIntegerField(usage?.prompt_tokens) ?? nonNegativeIntegerField(usage?.input_tokens) ?? 0;
2429
2537
  const completionTokens = nonNegativeIntegerField(usage?.completion_tokens) ?? nonNegativeIntegerField(usage?.output_tokens) ?? 0;
2430
2538
  const cacheReadTokens = nonNegativeIntegerField(promptDetails?.cached_tokens)
@@ -2432,18 +2540,36 @@ export class TokenbuddyDaemon {
2432
2540
  ?? nonNegativeIntegerField(usage?.cache_read_input_tokens)
2433
2541
  ?? nonNegativeIntegerField(usage?.cache_read_tokens)
2434
2542
  ?? 0;
2543
+ const cacheCreationOneHour = nonNegativeIntegerField(cacheCreation?.ephemeral_1h_input_tokens);
2544
+ const cacheCreationFiveMinute = nonNegativeIntegerField(cacheCreation?.ephemeral_5m_input_tokens);
2545
+ const cacheCreationBucketTokens = cacheCreationOneHour !== undefined || cacheCreationFiveMinute !== undefined
2546
+ ? (cacheCreationOneHour ?? 0) + (cacheCreationFiveMinute ?? 0)
2547
+ : undefined;
2548
+ const cacheWriteTokens = nonNegativeIntegerField(promptDetails?.cache_write_tokens)
2549
+ ?? nonNegativeIntegerField(usage?.cache_creation_input_tokens)
2550
+ ?? cacheCreationBucketTokens
2551
+ ?? 0;
2552
+ const reasoningTokens = nonNegativeIntegerField(completionDetails?.reasoning_tokens)
2553
+ ?? nonNegativeIntegerField(outputDetails?.reasoning_tokens)
2554
+ ?? 0;
2555
+ const audioInputTokens = nonNegativeIntegerField(promptDetails?.audio_tokens) ?? 0;
2556
+ const webSearchCalls = nonNegativeIntegerField(serverToolUse?.web_search_requests) ?? 0;
2435
2557
  const upstreamCostMicros = extractUpstreamCostMicros(data, usage);
2436
2558
  const billablePromptTokens = Math.max(0, promptTokens - cacheReadTokens);
2437
2559
  const imageMetadata = endpoint === "/v1/images/generations"
2438
2560
  ? imageUsageMetadata(data, usageRecord(requestBody))
2439
2561
  : {};
2440
2562
  return {
2563
+ ...imageMetadata,
2441
2564
  promptTokens,
2442
2565
  completionTokens,
2443
2566
  cacheReadTokens,
2567
+ cacheWriteTokens,
2568
+ reasoningTokens,
2569
+ audioInputTokens,
2570
+ webSearchCalls,
2444
2571
  billedMicros: upstreamCostMicros ?? (billablePromptTokens + completionTokens) * 4,
2445
- ...(upstreamCostMicros === undefined ? {} : { upstreamCostMicros }),
2446
- ...imageMetadata
2572
+ ...(upstreamCostMicros === undefined ? {} : { upstreamCostMicros })
2447
2573
  };
2448
2574
  }
2449
2575
  readUsageFromSse(bodyText, endpoint, requestBody) {
@@ -2499,18 +2625,29 @@ export class TokenbuddyDaemon {
2499
2625
  status: settlement ? "settled" : "estimated",
2500
2626
  promptTokens: usage.promptTokens,
2501
2627
  completionTokens: usage.completionTokens,
2502
- cacheReadTokens: usage.cacheReadTokens,
2628
+ cacheReadTokens: settlement?.billableUsage?.cacheReadTokens ?? usage.cacheReadTokens,
2629
+ cacheWriteTokens: settlement?.billableUsage?.cacheWriteTokens ?? usage.cacheWriteTokens,
2630
+ reasoningTokens: settlement?.billableUsage?.reasoningTokens ?? usage.reasoningTokens,
2631
+ audioInputTokens: settlement?.billableUsage?.audioInputTokens ?? usage.audioInputTokens,
2632
+ webSearchCalls: settlement?.billableUsage?.webSearchCalls ?? usage.webSearchCalls,
2503
2633
  billedMicros: settledMicros ?? usage.billedMicros,
2504
2634
  estimatedMicros: usage.billedMicros,
2505
2635
  settledMicros,
2506
2636
  settledUsdMicros: settlement?.settledUsdMicros,
2637
+ pricingCurrency: settlement?.pricingCurrency,
2638
+ settlementUnit: settlement?.settlementUnit,
2639
+ usdToMicrosRate: settlement?.usdToMicrosRate,
2640
+ billingSource: settlement?.billingSource ?? billingBreakdown?.billingSource ?? (settlement ? "seller_computed" : "buyer_estimated"),
2641
+ reportedCostUsdMicros: settlement?.reportedCostUsdMicros ?? billingBreakdown?.reportedCostUsdMicros,
2507
2642
  priceVersion: settlement?.priceVersion,
2508
2643
  inputPriceMicrosPer1m: billingBreakdown?.inputPriceMicrosPer1m,
2509
2644
  outputPriceMicrosPer1m: billingBreakdown?.outputPriceMicrosPer1m,
2510
2645
  cacheReadPriceMicrosPer1m: billingBreakdown?.cacheReadPriceMicrosPer1m,
2646
+ cacheWritePriceMicrosPer1m: billingBreakdown?.cacheWritePriceMicrosPer1m,
2511
2647
  inputCostMicros: billingBreakdown?.inputCostMicros,
2512
2648
  outputCostMicros: billingBreakdown?.outputCostMicros,
2513
2649
  cacheReadCostMicros: billingBreakdown?.cacheReadCostMicros,
2650
+ cacheWriteCostMicros: billingBreakdown?.cacheWriteCostMicros,
2514
2651
  originalUsdMicros: billingBreakdown?.originalUsdMicros,
2515
2652
  billingMultiplier: billingBreakdown?.billingMultiplier,
2516
2653
  serviceTier: billingBreakdown?.serviceTier,
@@ -3528,7 +3665,11 @@ export class TokenbuddyDaemon {
3528
3665
  const usage = this.readUsage(responseBody, endpoint, upstreamBody);
3529
3666
  const durationMs = Date.now() - startedAt;
3530
3667
  const ttftMs = firstByteAt !== undefined ? firstByteAt - startedAt : undefined;
3531
- const billedMicros = usage.billedMicros > 0 ? usage.billedMicros : Math.max(1, bytes);
3668
+ const billedMicros = usage.upstreamCostMicros !== undefined
3669
+ ? usage.upstreamCostMicros
3670
+ : usage.billedMicros > 0
3671
+ ? usage.billedMicros
3672
+ : Math.max(1, bytes);
3532
3673
  const completionTokens = usage.completionTokens;
3533
3674
  const avgTokensPerSecond = completionTokens > 0
3534
3675
  ? averageOutputTokensPerSecond(completionTokens, durationMs, ttftMs)
@@ -3549,8 +3690,15 @@ export class TokenbuddyDaemon {
3549
3690
  promptTokens: usage.promptTokens,
3550
3691
  completionTokens: usage.completionTokens,
3551
3692
  cacheReadTokens: usage.cacheReadTokens,
3693
+ cacheWriteTokens: usage.cacheWriteTokens,
3694
+ reasoningTokens: usage.reasoningTokens,
3695
+ audioInputTokens: usage.audioInputTokens,
3696
+ webSearchCalls: usage.webSearchCalls,
3552
3697
  billedMicros,
3553
3698
  estimatedMicros: billedMicros,
3699
+ pricingCurrency: usage.upstreamCostMicros === undefined ? undefined : "USD",
3700
+ billingSource: usage.upstreamCostMicros === undefined ? "buyer_estimated" : "provider_reported",
3701
+ reportedCostUsdMicros: usage.upstreamCostMicros,
3554
3702
  priceVersion: `local-provider:${provider.id}`,
3555
3703
  billingUnit: endpoint === "/v1/images/generations" ? "images" : "tokens",
3556
3704
  balanceSource: "self_funded_provider",
@@ -3587,8 +3735,15 @@ export class TokenbuddyDaemon {
3587
3735
  promptTokens: usage.promptTokens,
3588
3736
  completionTokens: usage.completionTokens,
3589
3737
  cacheReadTokens: usage.cacheReadTokens,
3738
+ cacheWriteTokens: usage.cacheWriteTokens,
3739
+ reasoningTokens: usage.reasoningTokens,
3740
+ audioInputTokens: usage.audioInputTokens,
3741
+ webSearchCalls: usage.webSearchCalls,
3590
3742
  billedMicros: usage.billedMicros,
3591
3743
  estimatedMicros: usage.billedMicros,
3744
+ pricingCurrency: usage.upstreamCostMicros === undefined ? undefined : "USD",
3745
+ billingSource: usage.upstreamCostMicros === undefined ? "buyer_estimated" : "provider_reported",
3746
+ reportedCostUsdMicros: usage.upstreamCostMicros,
3592
3747
  priceVersion: `local-provider:${provider.id}`,
3593
3748
  billingUnit: endpoint === "/v1/images/generations" ? "images" : "tokens",
3594
3749
  imageCount: usage.imageCount,
@@ -3622,12 +3777,32 @@ export class TokenbuddyDaemon {
3622
3777
  const modelId = resolvedModelId;
3623
3778
  const requestId = req.header("x-request-id") || (body && typeof body === "object" ? body.requestId : undefined) || `proxy_req_${crypto.randomBytes(8).toString("hex")}`;
3624
3779
  const idempotencyKey = req.header("idempotency-key") || `idem_${crypto.randomBytes(12).toString("hex")}`;
3780
+ const protocol = this.endpointProtocol(endpoint);
3781
+ const providerMode = this.currentProviderMode().mode;
3782
+ logger.info("proxy.request.received", "proxy request received", {
3783
+ requestId,
3784
+ requestedModel: requestedModelId,
3785
+ resolvedModel: modelId,
3786
+ model: modelId,
3787
+ endpoint,
3788
+ protocol,
3789
+ providerMode,
3790
+ stream: Boolean(body.stream)
3791
+ });
3625
3792
  if (!modelId) {
3793
+ logger.warn("proxy.request.rejected", "proxy request rejected before route selection", {
3794
+ requestId,
3795
+ endpoint,
3796
+ protocol,
3797
+ providerMode,
3798
+ errorCode: "model_required",
3799
+ durationMs: Date.now() - startedAt
3800
+ });
3626
3801
  res.status(400).json({ error: { code: "model_required", message: "request body must include model" } });
3627
3802
  return;
3628
3803
  }
3629
3804
  try {
3630
- const routeSelection = this.currentProviderMode().mode === "manual"
3805
+ const routeSelection = providerMode === "manual"
3631
3806
  ? this.selectManualProviderRoutes(endpoint, modelId)
3632
3807
  : await this.selectSellerRoutes(endpoint, modelId, requestId);
3633
3808
  const { routes, plan, paymentMethod } = routeSelection;
@@ -3684,6 +3859,7 @@ export class TokenbuddyDaemon {
3684
3859
  sellerKey,
3685
3860
  sellerId: sellerKey,
3686
3861
  model: modelId,
3862
+ requestedModel: requestedModelId,
3687
3863
  endpoint,
3688
3864
  protocol: route.protocol,
3689
3865
  paymentMethod: route.paymentMethod,
@@ -3704,6 +3880,7 @@ export class TokenbuddyDaemon {
3704
3880
  sellerKey,
3705
3881
  sellerId: sellerKey,
3706
3882
  model: modelId,
3883
+ requestedModel: requestedModelId,
3707
3884
  endpoint,
3708
3885
  protocol: route.protocol,
3709
3886
  paymentMethod: route.paymentMethod,
@@ -3811,8 +3988,14 @@ export class TokenbuddyDaemon {
3811
3988
  attempt,
3812
3989
  attemptNumber: attempt + 1,
3813
3990
  reason: "purchase_failed",
3991
+ failureKind: "purchase_failed",
3814
3992
  controllerReason: decision.reason,
3815
- controllerAction: decision.action
3993
+ controllerAction: decision.action,
3994
+ protocol: route.protocol,
3995
+ paymentMethod: route.paymentMethod,
3996
+ routePlanSource: route.planSource,
3997
+ routePlanReason: route.planReason,
3998
+ routePlanSellerCount: route.planSellerCount
3816
3999
  });
3817
4000
  lastError = purchaseError;
3818
4001
  break;
@@ -3908,7 +4091,13 @@ export class TokenbuddyDaemon {
3908
4091
  routeIndex,
3909
4092
  routesRemaining: routes.length - routeIndex,
3910
4093
  attempt,
3911
- status: upstreamResponse.status
4094
+ status: upstreamResponse.status,
4095
+ failureKind: kind,
4096
+ protocol: route.protocol,
4097
+ paymentMethod: route.paymentMethod,
4098
+ routePlanSource: route.planSource,
4099
+ routePlanReason: route.planReason,
4100
+ routePlanSellerCount: route.planSellerCount
3912
4101
  });
3913
4102
  if (decision.action === "fail_fast" || decision.action === "abort") {
3914
4103
  this.copyUpstreamHeaders(upstreamResponse, res);
@@ -4034,7 +4223,7 @@ export class TokenbuddyDaemon {
4034
4223
  }
4035
4224
  catch (routeError) {
4036
4225
  lastError = routeError;
4037
- const kind = "deadline";
4226
+ const kind = this.classifyRouteException(routeError, res.headersSent);
4038
4227
  const decision = this.routeFailover.decide({
4039
4228
  sellerId: sellerKey,
4040
4229
  errorKind: kind,
@@ -4049,16 +4238,35 @@ export class TokenbuddyDaemon {
4049
4238
  routeIndex,
4050
4239
  routesRemaining: routes.length - routeIndex,
4051
4240
  attempt,
4052
- reason: "exception"
4241
+ reason: "exception",
4242
+ failureKind: kind,
4243
+ protocol: route.protocol,
4244
+ paymentMethod: route.paymentMethod,
4245
+ routePlanSource: route.planSource,
4246
+ routePlanReason: route.planReason,
4247
+ routePlanSellerCount: route.planSellerCount
4053
4248
  });
4054
4249
  logger.warn("proxy.route.failed", "seller route failed before response", {
4055
4250
  requestId,
4056
4251
  sellerKey,
4057
4252
  model: modelId,
4058
4253
  endpoint,
4254
+ failureKind: kind,
4255
+ protocol: route.protocol,
4256
+ paymentMethod: route.paymentMethod,
4257
+ routePlanSource: route.planSource,
4258
+ routePlanReason: route.planReason,
4259
+ routePlanSellerCount: route.planSellerCount,
4260
+ responseStarted: res.headersSent,
4059
4261
  errorMessage: this.failoverErrorMessage(routeError),
4060
4262
  durationMs: Date.now() - startedAt
4061
4263
  });
4264
+ if (kind === "stream_aborted") {
4265
+ if (!res.writableEnded) {
4266
+ res.end();
4267
+ }
4268
+ return;
4269
+ }
4062
4270
  if (decision.action === "retry_same_seller") {
4063
4271
  attempt += 1;
4064
4272
  if (decision.retryDelayMs) {
@@ -4078,6 +4286,19 @@ export class TokenbuddyDaemon {
4078
4286
  lease.release();
4079
4287
  }
4080
4288
  }
4289
+ logger.warn("route.failover.exhausted", "all seller routes exhausted", {
4290
+ requestId,
4291
+ model: modelId,
4292
+ requestedModel: requestedModelId,
4293
+ endpoint,
4294
+ protocol,
4295
+ providerMode,
4296
+ routePlanReason: plan.reason,
4297
+ routePlanSellerCount: routes.length,
4298
+ attemptedSellerIds: routes.map((route) => route.seller.id),
4299
+ lastErrorMessage: lastError instanceof Error ? lastError.message : lastError === undefined ? undefined : String(lastError),
4300
+ durationMs: Date.now() - startedAt
4301
+ });
4081
4302
  throw lastError instanceof Error ? lastError : new Error("all seller routes failed");
4082
4303
  }
4083
4304
  catch (error) {
@@ -5303,6 +5524,40 @@ export class TokenbuddyDaemon {
5303
5524
  });
5304
5525
  // 2) GET /routing/preview — 算「假如改完会怎样」,不改 state
5305
5526
  // query: modelId? protocol? paymentMethod? mode? scorer? sellerId? sellerIds?(逗号分隔)
5527
+ controlApp.get("/routing/recommendations", async (req, res) => {
5528
+ try {
5529
+ const scope = buildRecommendationScopeFromQuery(req.query);
5530
+ const result = await this.buildRoutingRecommendations({
5531
+ modelIds: parseCommaSeparatedList(typeof req.query.modelIds === "string" ? req.query.modelIds : ""),
5532
+ protocol: typeof req.query.protocol === "string" ? req.query.protocol : undefined,
5533
+ paymentMethod: typeof req.query.paymentMethod === "string" ? req.query.paymentMethod : undefined,
5534
+ scorer: parseRoutingScorerQuery(typeof req.query.scorer === "string" ? req.query.scorer : ""),
5535
+ scope,
5536
+ topK: parsePositiveIntegerQuery(typeof req.query.topK === "string" ? req.query.topK : "")
5537
+ });
5538
+ if ("error" in result) {
5539
+ res.status(409).json({
5540
+ error: { code: result.error, message: `cannot build recommendations: ${result.error}` }
5541
+ });
5542
+ return;
5543
+ }
5544
+ logger.info("routing.recommendations.built", "routing recommendations built", {
5545
+ scope: result.scope.type,
5546
+ focusModelCount: result.focusModelSet.length,
5547
+ protocol: result.protocol,
5548
+ protocolByModelId: result.protocolByModelId,
5549
+ sellerCount: result.sellers.length,
5550
+ excludedSellerCount: result.excludedSellers.length,
5551
+ scorer: result.scorer
5552
+ });
5553
+ res.status(200).json(result);
5554
+ }
5555
+ catch (error) {
5556
+ const errorMessage = error instanceof Error ? error.message : String(error);
5557
+ logger.warn("routing.recommendations.failed", "routing recommendations failed", { errorMessage });
5558
+ res.status(400).json({ error: { code: "routing_recommendations_failed", message: errorMessage } });
5559
+ }
5560
+ });
5306
5561
  controlApp.get("/routing/preview", async (req, res) => {
5307
5562
  try {
5308
5563
  const override = buildRoutingConfigFromQuery(req.query);
@@ -5602,6 +5857,71 @@ export class TokenbuddyDaemon {
5602
5857
  });
5603
5858
  return { modelId, protocol, paymentMethod, plan };
5604
5859
  }
5860
+ async buildRoutingRecommendations(input) {
5861
+ const registry = this.lastRegistrySnapshot ?? (this.forceRegistrySnapshotForTest ? null : await this.fetchRegistry());
5862
+ if (!registry) {
5863
+ return { error: "registry_not_loaded" };
5864
+ }
5865
+ const registrySellers = reorderDefaultSellerFirst(registry.sellers, registry.defaultSeller);
5866
+ const focusModelSet = this.resolveRecommendationFocusSet(input.modelIds, registrySellers);
5867
+ if (focusModelSet.length === 0) {
5868
+ return { error: "no_focus_model_available" };
5869
+ }
5870
+ const paymentMethod = input.paymentMethod?.trim() || this.defaultPaymentMethod() || "clawtip";
5871
+ const explicitProtocol = input.protocol?.trim();
5872
+ const protocolByModelId = this.resolveRecommendationProtocols(focusModelSet, registrySellers, paymentMethod, explicitProtocol);
5873
+ const protocol = explicitProtocol || recommendationProtocolLabel(protocolByModelId);
5874
+ const scorer = input.scorer ?? this.currentAutoProviderConfig().scorer;
5875
+ const scope = input.scope ?? { type: "recommended" };
5876
+ await this.refreshSellerRouteMetadata(registrySellers);
5877
+ this.sellerPool.ensureRegistrySellers(registrySellers);
5878
+ const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
5879
+ const concurrencySnapshot = this.sellerConcurrencyLimiter.snapshot();
5880
+ const localConcurrencyBySellerId = new Map(concurrencySnapshot.active.map((entry) => [entry.sellerId, entry.activeCount]));
5881
+ return buildSellerRouteRecommendations({
5882
+ focusModelSet,
5883
+ protocol,
5884
+ protocolByModelId,
5885
+ paymentMethod,
5886
+ scorer,
5887
+ scope,
5888
+ topK: input.topK,
5889
+ registrySellers,
5890
+ prewarmCandidatesForModel: (modelId, protocolForModel) => this.prewarmCache.get(modelId, protocolForModel, paymentMethod)?.candidates,
5891
+ sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry, concurrencySnapshot.enabled
5892
+ ? {
5893
+ localConcurrencyActive: localConcurrencyBySellerId.get(entry.sellerId) ?? 0,
5894
+ localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
5895
+ }
5896
+ : undefined)),
5897
+ now: Date.now()
5898
+ });
5899
+ }
5900
+ resolveRecommendationProtocols(modelIds, registrySellers, paymentMethod, explicitProtocol) {
5901
+ return Object.fromEntries(modelIds.map((modelId) => [
5902
+ modelId,
5903
+ explicitProtocol || this.resolveRecommendationProtocol(modelId, paymentMethod, registrySellers)
5904
+ ]));
5905
+ }
5906
+ resolveRecommendationProtocol(modelId, paymentMethod, registrySellers) {
5907
+ for (const protocol of recommendationProtocolPriority(modelId)) {
5908
+ if (registrySellers.some((seller) => recommendationSellerSupports(seller, modelId, protocol, paymentMethod))) {
5909
+ return protocol;
5910
+ }
5911
+ }
5912
+ return this.resolvePrewarmProtocol(modelId, paymentMethod) ?? recommendationProtocolPriority(modelId)[0] ?? "chat_completions";
5913
+ }
5914
+ resolveRecommendationFocusSet(modelIds, registrySellers) {
5915
+ const explicit = this.uniqueModelIds(modelIds ?? []);
5916
+ if (explicit.length > 0) {
5917
+ return explicit;
5918
+ }
5919
+ const focusSet = this.resolveFocusSet();
5920
+ if (focusSet.length > 0) {
5921
+ return focusSet;
5922
+ }
5923
+ return this.uniqueModelIds(registrySellers.flatMap((seller) => seller.models ?? [])).slice(0, 20);
5924
+ }
5605
5925
  async runStartupPrewarmSweep() {
5606
5926
  const focusSet = this.resolveFocusSet();
5607
5927
  if (focusSet.length === 0) {
@@ -5626,7 +5946,7 @@ export class TokenbuddyDaemon {
5626
5946
  }
5627
5947
  }
5628
5948
  resolvePrewarmProtocol(modelId, paymentMethod = "clawtip") {
5629
- for (const protocol of ["chat_completions", "messages", "responses", "images_generations"]) {
5949
+ for (const protocol of recommendationProtocolPriority(modelId)) {
5630
5950
  if (this.modelIndex.sellersFor(modelId, { protocol, paymentMethod }).length > 0) {
5631
5951
  return protocol;
5632
5952
  }
@@ -6031,6 +6351,90 @@ function normalizeTrustedRegistryCache(value) {
6031
6351
  trust: data.trust
6032
6352
  };
6033
6353
  }
6354
+ function recommendationProtocolLabel(protocolByModelId) {
6355
+ const protocols = Array.from(new Set(Object.values(protocolByModelId).filter(Boolean)));
6356
+ if (protocols.length === 1) {
6357
+ return protocols[0] ?? "chat_completions";
6358
+ }
6359
+ return protocols.length > 1 ? "mixed" : "chat_completions";
6360
+ }
6361
+ function recommendationProtocolPriority(modelId) {
6362
+ const normalized = normalizeRecommendationLookup(modelId);
6363
+ if (normalized.includes("gpt-image") || normalized.includes("dall-e")) {
6364
+ return ["images_generations", "chat_completions", "responses", "messages"];
6365
+ }
6366
+ if (normalized.includes("claude") || normalized.includes("anthropic")) {
6367
+ return ["messages", "chat_completions", "responses", "images_generations"];
6368
+ }
6369
+ return ["chat_completions", "responses", "messages", "images_generations"];
6370
+ }
6371
+ function recommendationSellerSupports(seller, modelId, protocol, paymentMethod) {
6372
+ return recommendationSellerSupportsModel(seller, modelId) &&
6373
+ recommendationSellerSupportsProtocol(seller, protocol) &&
6374
+ recommendationSellerSupportsPayment(seller, paymentMethod);
6375
+ }
6376
+ function recommendationSellerSupportsModel(seller, modelId) {
6377
+ const normalized = normalizeRecommendationLookup(modelId);
6378
+ return (seller.models ?? []).some((model) => normalizeRecommendationLookup(model) === normalized);
6379
+ }
6380
+ function recommendationSellerSupportsProtocol(seller, protocol) {
6381
+ const normalized = normalizeRecommendationLookup(protocol);
6382
+ return recommendationProtocolAliases(seller.supportedProtocols ?? [])
6383
+ .some((entry) => normalizeRecommendationLookup(entry) === normalized);
6384
+ }
6385
+ function recommendationSellerSupportsPayment(seller, paymentMethod) {
6386
+ const normalized = normalizeRecommendationLookup(paymentMethod);
6387
+ return (seller.paymentMethods ?? []).some((entry) => normalizeRecommendationLookup(entry) === normalized);
6388
+ }
6389
+ function recommendationProtocolAliases(protocols) {
6390
+ return protocols.includes("anthropic_messages") && !protocols.includes("messages")
6391
+ ? [...protocols, "messages"]
6392
+ : protocols;
6393
+ }
6394
+ function normalizeRecommendationLookup(value) {
6395
+ return value.trim().toLowerCase();
6396
+ }
6397
+ function buildRecommendationScopeFromQuery(query) {
6398
+ const scope = typeof query.scope === "string" ? query.scope.trim() : "";
6399
+ const sellerIdsRaw = typeof query.sellerIds === "string" ? query.sellerIds.trim() : "";
6400
+ if (!scope || scope === "recommended") {
6401
+ return { type: "recommended" };
6402
+ }
6403
+ if (scope !== "custom") {
6404
+ throw new Error("scope must be recommended or custom");
6405
+ }
6406
+ return {
6407
+ type: "custom",
6408
+ sellerIds: parseSellerIdList(sellerIdsRaw)
6409
+ };
6410
+ }
6411
+ function parseRoutingScorerQuery(value) {
6412
+ const scorer = value.trim();
6413
+ if (!scorer) {
6414
+ return undefined;
6415
+ }
6416
+ if (scorer !== "speed" && scorer !== "discount" && scorer !== "balanced") {
6417
+ throw new Error("scorer must be speed, discount, or balanced");
6418
+ }
6419
+ return scorer;
6420
+ }
6421
+ function parsePositiveIntegerQuery(value) {
6422
+ const trimmed = value.trim();
6423
+ if (!trimmed) {
6424
+ return undefined;
6425
+ }
6426
+ const parsed = Number(trimmed);
6427
+ if (!Number.isInteger(parsed) || parsed <= 0) {
6428
+ throw new Error("topK must be a positive integer");
6429
+ }
6430
+ return parsed;
6431
+ }
6432
+ function parseCommaSeparatedList(value) {
6433
+ return value
6434
+ .split(",")
6435
+ .map((entry) => entry.trim())
6436
+ .filter(Boolean);
6437
+ }
6034
6438
  /**
6035
6439
  * 从 query string 构造 `BuyerSellerRoutingConfig` override(用于 GET /routing/preview)。
6036
6440
  * 接受 `mode` / `scorer` / `sellerId` / `sellerIds`(逗号分隔)。