@tokenbuddy/tokenbuddy 1.0.35 → 1.0.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. package/dist/src/buyer-store.d.ts +6 -1
  2. package/dist/src/buyer-store.js +43 -4
  3. package/dist/src/cli.js +2 -2
  4. package/dist/src/daemon.d.ts +12 -0
  5. package/dist/src/daemon.js +791 -61
  6. package/dist/src/doctor-diagnostics.js +1 -6
  7. package/dist/src/provider-install.d.ts +2 -2
  8. package/dist/src/provider-install.js +248 -2
  9. package/dist/src/seller-catalog.d.ts +21 -0
  10. package/dist/src/seller-catalog.js +17 -0
  11. package/dist/src/seller-route-planner.d.ts +4 -1
  12. package/dist/src/seller-route-planner.js +3 -0
  13. package/dist/src/seller-routing-strategy.d.ts +3 -0
  14. package/dist/src/terminal-detect.d.ts +1 -1
  15. package/dist/src/terminal-detect.js +3 -2
  16. package/package.json +15 -2
  17. package/static/ui/assets/index-Djfl9tw5.js +271 -0
  18. package/static/ui/assets/index-DkfztCkn.css +1 -0
  19. package/static/ui/index.html +2 -2
  20. package/dist/src/buyer-store.d.ts.map +0 -1
  21. package/dist/src/buyer-store.js.map +0 -1
  22. package/dist/src/clawtip-bootstrap.d.ts.map +0 -1
  23. package/dist/src/clawtip-bootstrap.js.map +0 -1
  24. package/dist/src/cli.d.ts.map +0 -1
  25. package/dist/src/cli.js.map +0 -1
  26. package/dist/src/credit-tracker.d.ts.map +0 -1
  27. package/dist/src/credit-tracker.js.map +0 -1
  28. package/dist/src/daemon.d.ts.map +0 -1
  29. package/dist/src/daemon.js.map +0 -1
  30. package/dist/src/doctor-clawtip-wallet.d.ts.map +0 -1
  31. package/dist/src/doctor-clawtip-wallet.js.map +0 -1
  32. package/dist/src/doctor-diagnostics.d.ts.map +0 -1
  33. package/dist/src/doctor-diagnostics.js.map +0 -1
  34. package/dist/src/index.d.ts.map +0 -1
  35. package/dist/src/index.js.map +0 -1
  36. package/dist/src/init-clawtip-activation.d.ts.map +0 -1
  37. package/dist/src/init-clawtip-activation.js.map +0 -1
  38. package/dist/src/init-payment-options.d.ts.map +0 -1
  39. package/dist/src/init-payment-options.js.map +0 -1
  40. package/dist/src/init-setup.d.ts.map +0 -1
  41. package/dist/src/init-setup.js.map +0 -1
  42. package/dist/src/model-index.d.ts.map +0 -1
  43. package/dist/src/model-index.js.map +0 -1
  44. package/dist/src/package-update.d.ts.map +0 -1
  45. package/dist/src/package-update.js.map +0 -1
  46. package/dist/src/prewarm-cache.d.ts.map +0 -1
  47. package/dist/src/prewarm-cache.js.map +0 -1
  48. package/dist/src/prewarm-scheduler.d.ts.map +0 -1
  49. package/dist/src/prewarm-scheduler.js.map +0 -1
  50. package/dist/src/provider-install.d.ts.map +0 -1
  51. package/dist/src/provider-install.js.map +0 -1
  52. package/dist/src/provider-routing-config.d.ts.map +0 -1
  53. package/dist/src/provider-routing-config.js.map +0 -1
  54. package/dist/src/registry-trust.d.ts.map +0 -1
  55. package/dist/src/registry-trust.js.map +0 -1
  56. package/dist/src/route-failover.d.ts.map +0 -1
  57. package/dist/src/route-failover.js.map +0 -1
  58. package/dist/src/seller-catalog.d.ts.map +0 -1
  59. package/dist/src/seller-catalog.js.map +0 -1
  60. package/dist/src/seller-concurrency-limiter.d.ts.map +0 -1
  61. package/dist/src/seller-concurrency-limiter.js.map +0 -1
  62. package/dist/src/seller-metadata-cache.d.ts.map +0 -1
  63. package/dist/src/seller-metadata-cache.js.map +0 -1
  64. package/dist/src/seller-pool.d.ts.map +0 -1
  65. package/dist/src/seller-pool.js.map +0 -1
  66. package/dist/src/seller-route-planner.d.ts.map +0 -1
  67. package/dist/src/seller-route-planner.js.map +0 -1
  68. package/dist/src/seller-routing-config.d.ts.map +0 -1
  69. package/dist/src/seller-routing-config.js.map +0 -1
  70. package/dist/src/seller-routing-strategy.d.ts.map +0 -1
  71. package/dist/src/seller-routing-strategy.js.map +0 -1
  72. package/dist/src/stream-failover.d.ts.map +0 -1
  73. package/dist/src/stream-failover.js.map +0 -1
  74. package/dist/src/tb-clawtip-proof.d.ts.map +0 -1
  75. package/dist/src/tb-clawtip-proof.js.map +0 -1
  76. package/dist/src/tb-proxyd.d.ts.map +0 -1
  77. package/dist/src/tb-proxyd.js.map +0 -1
  78. package/dist/src/terminal-detect.d.ts.map +0 -1
  79. package/dist/src/terminal-detect.js.map +0 -1
  80. package/dist/src/terminal-image.d.ts.map +0 -1
  81. package/dist/src/terminal-image.js.map +0 -1
  82. package/src/buyer-store.ts +0 -1090
  83. package/src/clawtip-bootstrap.ts +0 -65
  84. package/src/cli.ts +0 -2243
  85. package/src/credit-tracker.ts +0 -295
  86. package/src/daemon.ts +0 -5475
  87. package/src/doctor-clawtip-wallet.ts +0 -95
  88. package/src/doctor-diagnostics.ts +0 -1026
  89. package/src/index.ts +0 -16
  90. package/src/init-clawtip-activation.ts +0 -695
  91. package/src/init-payment-options.ts +0 -373
  92. package/src/init-setup.ts +0 -165
  93. package/src/model-index.ts +0 -278
  94. package/src/package-update.ts +0 -311
  95. package/src/prewarm-cache.ts +0 -485
  96. package/src/prewarm-scheduler.ts +0 -675
  97. package/src/provider-install.ts +0 -1006
  98. package/src/provider-routing-config.ts +0 -410
  99. package/src/registry-trust.ts +0 -51
  100. package/src/route-failover.ts +0 -304
  101. package/src/seller-catalog.ts +0 -505
  102. package/src/seller-concurrency-limiter.ts +0 -161
  103. package/src/seller-metadata-cache.ts +0 -91
  104. package/src/seller-pool.ts +0 -557
  105. package/src/seller-route-planner.ts +0 -513
  106. package/src/seller-routing-config.ts +0 -211
  107. package/src/seller-routing-strategy.ts +0 -362
  108. package/src/stream-failover.ts +0 -152
  109. package/src/tb-clawtip-proof.ts +0 -28
  110. package/src/tb-proxyd.ts +0 -101
  111. package/src/terminal-detect.ts +0 -333
  112. package/src/terminal-image.ts +0 -228
  113. package/static/ui/assets/index-0MVXD7bH.css +0 -1
  114. package/static/ui/assets/index-BVbeDEwq.js +0 -271
  115. package/static/ui/assets/index-BVbeDEwq.js.map +0 -1
  116. package/tests/cli-routing.test.ts +0 -363
  117. package/tests/control-plane-ui-endpoints.test.ts +0 -1630
  118. package/tests/credit-tracker.test.ts +0 -165
  119. package/tests/daemon-413-fallback.test.ts +0 -92
  120. package/tests/daemon-classify.test.ts +0 -452
  121. package/tests/daemon-roles.test.ts +0 -92
  122. package/tests/daemon-trusted-registry-cache.test.ts +0 -132
  123. package/tests/e2e.test.ts +0 -366
  124. package/tests/image-generation-e2e.test.ts +0 -230
  125. package/tests/model-index.test.ts +0 -198
  126. package/tests/package-update.test.ts +0 -147
  127. package/tests/prewarm-cache.test.ts +0 -296
  128. package/tests/prewarm-scheduler.test.ts +0 -367
  129. package/tests/provider-routing-config.test.ts +0 -150
  130. package/tests/registry-trust.test.ts +0 -28
  131. package/tests/route-failover.test.ts +0 -222
  132. package/tests/seller-catalog-413.test.ts +0 -120
  133. package/tests/seller-catalog-utilities.test.ts +0 -124
  134. package/tests/seller-concurrency-limiter.test.ts +0 -83
  135. package/tests/seller-metadata-cache.test.ts +0 -89
  136. package/tests/seller-pool.test.ts +0 -365
  137. package/tests/seller-route-planner.test.ts +0 -312
  138. package/tests/seller-routing-config.test.ts +0 -124
  139. package/tests/seller-routing-strategy.test.ts +0 -167
  140. package/tests/stream-failover.test.ts +0 -52
  141. package/tests/thousand-seller.test.ts +0 -151
  142. package/tests/tokenbuddy.test.ts +0 -4043
  143. package/tsconfig.json +0 -8
@@ -9,7 +9,7 @@ import { BuyerStore } from "./buyer-store.js";
9
9
  import { DEFAULT_CLAWTIP_BOOTSTRAP_URL, fetchClawtipBootstrap } from "./clawtip-bootstrap.js";
10
10
  import { inspectOpenClawWalletConfig, } from "./init-payment-options.js";
11
11
  import { startClawtipWalletBootstrap, waitForClawtipActivationConfirmation, } from "./init-clawtip-activation.js";
12
- import { applyProviderInstall, detectProviders, previewProviderInstall, rollbackProviderInstall, } from "./provider-install.js";
12
+ import { applyProviderInstall, detectProviders, previewProviderInstall, PROXY_ACCESS_TOKEN_PLACEHOLDER, rollbackProviderInstall, } from "./provider-install.js";
13
13
  import { discoverSellerBackedModels, fetchSellerRegistryWithTrust, isBuyerVisibleRegistrySeller, normalizeSellerUrl, RegistryTooLargeError, } from "./seller-catalog.js";
14
14
  import { shouldVerifyRegistry, verifyTrustedRegistrySignature } from "./registry-trust.js";
15
15
  import { ModelIndex } from "./model-index.js";
@@ -33,6 +33,28 @@ const SELLER_CAPACITY_BLOCK_MS = 2_000;
33
33
  const CLAWTIP_STATIC_ROUTE = "/static/clawtip";
34
34
  const CLAWTIP_RECHARGE_QR_FILE = "recharge.png";
35
35
  const MANUAL_PROVIDER_SECRET_CONFIG_KEY = "manual-provider-secrets";
36
+ const USER_INFERENCE_TEST_TIMEOUT_MS = 60_000;
37
+ class BuyerPaymentSetupError extends Error {
38
+ errorCode;
39
+ statusCode;
40
+ constructor(message, errorCode = ErrorCode.PaymentMethodNotReady, statusCode = 402) {
41
+ super(message);
42
+ this.name = "BuyerPaymentSetupError";
43
+ this.errorCode = errorCode;
44
+ this.statusCode = statusCode;
45
+ }
46
+ }
47
+ const NON_FAILOVER_PURCHASE_ERROR_CODES = new Set([
48
+ ErrorCode.PaymentRequired,
49
+ ErrorCode.PaymentAccountInsufficient,
50
+ ErrorCode.PaymentMethodNotReady,
51
+ ErrorCode.PaymentAuthorizationRequired,
52
+ ErrorCode.PaymentCancelled,
53
+ ErrorCode.PaymentTimeout,
54
+ ErrorCode.PaymentProviderFailed,
55
+ ErrorCode.ClawtipCredentialInvalid,
56
+ ErrorCode.ClawtipOrderMismatch
57
+ ]);
36
58
  function currentModuleDir() {
37
59
  if (typeof __dirname !== "undefined") {
38
60
  return __dirname;
@@ -73,7 +95,7 @@ function buildCustomClientToolStatus(proxyPort) {
73
95
  manualConfig: {
74
96
  openaiBaseUrl,
75
97
  anthropicBaseUrl,
76
- apiKey: "TOKENBUDDY_PROXY",
98
+ apiKey: PROXY_ACCESS_TOKEN_PLACEHOLDER,
77
99
  },
78
100
  };
79
101
  }
@@ -202,6 +224,20 @@ function imageCostMicrosPerImage(totalMicros, imageCount) {
202
224
  }
203
225
  return Math.ceil(totalMicros / imageCount);
204
226
  }
227
+ function averageOutputTokensPerSecond(completionTokens, durationMs, ttftMs) {
228
+ if (!Number.isFinite(completionTokens) || !Number.isFinite(durationMs) || !Number.isFinite(ttftMs)) {
229
+ return undefined;
230
+ }
231
+ const outputTokens = completionTokens;
232
+ const outputWindowMs = durationMs - ttftMs;
233
+ if (outputWindowMs <= 0) {
234
+ return undefined;
235
+ }
236
+ if (outputTokens <= 0) {
237
+ return 0;
238
+ }
239
+ return outputTokens / (outputWindowMs / 1000);
240
+ }
205
241
  function purchasePaymentSummaryFromQuote(value) {
206
242
  const quote = usageRecord(value);
207
243
  if (!quote)
@@ -388,6 +424,16 @@ function reorderDefaultSellerFirst(sellers, defaultSellerId) {
388
424
  ...sellers.filter((seller) => seller.id !== defaultSellerId)
389
425
  ];
390
426
  }
427
+ function purchaseResponsePaymentSetupError(response, data, fallbackMessage) {
428
+ const errorCode = typeof data.error?.code === "string" ? data.error.code : undefined;
429
+ if (errorCode && NON_FAILOVER_PURCHASE_ERROR_CODES.has(errorCode)) {
430
+ return new BuyerPaymentSetupError(data.error?.message || fallbackMessage, errorCode, response.status || 402);
431
+ }
432
+ if (response.status === 402) {
433
+ return new BuyerPaymentSetupError(data.error?.message || fallbackMessage, ErrorCode.PaymentMethodNotReady, 402);
434
+ }
435
+ return undefined;
436
+ }
391
437
  /**
392
438
  * buyer 端守护进程。
393
439
  * 负责启动两个 Express 服务:控制接口(healthz + 控制路由)+ 反向代理(OpenAI / Anthropic 协议入口)。
@@ -719,6 +765,14 @@ export class TokenbuddyDaemon {
719
765
  lastRegistrySnapshot = null;
720
766
  forceRegistrySnapshotForTest = false;
721
767
  async fetchRegistry() {
768
+ if (this.forceRegistrySnapshotForTest && this.lastRegistrySnapshot) {
769
+ this.modelIndex.rebuild(this.lastRegistrySnapshot.sellers, {
770
+ registryVersion: this.lastRegistrySnapshot.version,
771
+ defaultSellerId: this.lastRegistrySnapshot.defaultSeller
772
+ });
773
+ this.sellerPool.sync();
774
+ return this.lastRegistrySnapshot;
775
+ }
722
776
  try {
723
777
  const fetched = await fetchSellerRegistryWithTrust(this.config.sellerRegistryUrl);
724
778
  const registry = fetched.registry;
@@ -1375,6 +1429,19 @@ export class TokenbuddyDaemon {
1375
1429
  }
1376
1430
  return nextBody;
1377
1431
  }
1432
+ withOpenAiStreamUsage(endpoint, body) {
1433
+ if (endpoint !== "/v1/chat/completions" || body.stream !== true) {
1434
+ return body;
1435
+ }
1436
+ const streamOptions = usageRecord(body.stream_options);
1437
+ return {
1438
+ ...body,
1439
+ stream_options: {
1440
+ ...(streamOptions ?? {}),
1441
+ include_usage: true
1442
+ }
1443
+ };
1444
+ }
1378
1445
  defaultPaymentMethod() {
1379
1446
  const payments = this.tokenStore.listPayments().filter((payment) => payment.enabled);
1380
1447
  return payments.find((payment) => payment.isDefault)?.method || payments.find((payment) => payment.method === "mock")?.method;
@@ -1806,6 +1873,7 @@ export class TokenbuddyDaemon {
1806
1873
  name: provider.name,
1807
1874
  url: provider.baseUrl,
1808
1875
  status: observation?.status ?? "active",
1876
+ routeState: this.routeStateFromCatalogStatus(observation?.status ?? "active"),
1809
1877
  ttftMs: observation?.ttftMs,
1810
1878
  avgTokensPerSecond: observation?.avgTokensPerSecond
1811
1879
  };
@@ -1841,10 +1909,21 @@ export class TokenbuddyDaemon {
1841
1909
  }
1842
1910
  sellerCatalogWithRuntimeMetrics(sellers) {
1843
1911
  const runtimeBySellerId = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
1912
+ const concurrencySnapshot = this.sellerConcurrencyLimiter.snapshot();
1913
+ const activeBySellerId = new Map(concurrencySnapshot.active.map((entry) => [entry.sellerId, entry.activeCount]));
1844
1914
  return sellers.map((seller) => {
1845
1915
  const runtime = runtimeBySellerId.get(seller.id);
1916
+ const concurrency = concurrencySnapshot.enabled
1917
+ ? {
1918
+ localConcurrencyActive: activeBySellerId.get(seller.id) ?? 0,
1919
+ localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
1920
+ }
1921
+ : undefined;
1846
1922
  return {
1847
1923
  ...seller,
1924
+ routeState: runtime
1925
+ ? this.routeStateFromPoolEntry(runtime, concurrency)
1926
+ : seller.routeState ?? this.routeStateFromCatalogStatus(seller.status),
1848
1927
  ttftMs: runtime?.ttftMs ?? seller.ttftMs,
1849
1928
  avgTokensPerSecond: runtime?.avgTokensPerSecond ?? seller.avgTokensPerSecond
1850
1929
  };
@@ -1872,9 +1951,67 @@ export class TokenbuddyDaemon {
1872
1951
  discountRatio: metadata?.discountRatio,
1873
1952
  circuit: entry.circuit,
1874
1953
  capacityBlockedUntil: entry.capacityBlockedUntil,
1954
+ routeState: this.routeStateFromPoolEntry(entry, concurrency),
1875
1955
  ...(concurrency ?? {})
1876
1956
  };
1877
1957
  }
1958
+ routeStateFromPoolEntry(entry, concurrency) {
1959
+ if (concurrency &&
1960
+ Number.isFinite(concurrency.localConcurrencyActive) &&
1961
+ Number.isFinite(concurrency.localConcurrencyLimit) &&
1962
+ concurrency.localConcurrencyActive >= concurrency.localConcurrencyLimit) {
1963
+ return "full";
1964
+ }
1965
+ if (Number.isFinite(entry.capacityBlockedUntil) && entry.capacityBlockedUntil > Date.now()) {
1966
+ return "full";
1967
+ }
1968
+ if (entry.circuit === "open" || entry.circuit === "half_open") {
1969
+ return "cooldown";
1970
+ }
1971
+ if (entry.upstreamStatus === "unhealthy") {
1972
+ return "error";
1973
+ }
1974
+ if (entry.upstreamStatus === "degraded") {
1975
+ return "degraded";
1976
+ }
1977
+ if (Number.isFinite(entry.healthScore)) {
1978
+ if (entry.healthScore < 40) {
1979
+ return "error";
1980
+ }
1981
+ if (entry.healthScore < 70) {
1982
+ return "degraded";
1983
+ }
1984
+ }
1985
+ if (entry.lastFailAt > 0 && entry.lastFailAt >= entry.lastSuccessAt) {
1986
+ return "degraded";
1987
+ }
1988
+ if (entry.lastSuccessAt <= 0 && entry.lastProbeAt <= 0) {
1989
+ return "unknown";
1990
+ }
1991
+ return "ok";
1992
+ }
1993
+ routeStateFromCatalogStatus(status) {
1994
+ const normalized = status?.trim().toLowerCase();
1995
+ if (!normalized) {
1996
+ return "unknown";
1997
+ }
1998
+ if (normalized === "ok" || normalized === "active" || normalized === "healthy" || normalized === "online") {
1999
+ return "ok";
2000
+ }
2001
+ if (normalized === "degraded") {
2002
+ return "degraded";
2003
+ }
2004
+ if (normalized === "cooldown" || normalized === "cooling_down" || normalized === "half_open") {
2005
+ return "cooldown";
2006
+ }
2007
+ if (normalized === "full" || normalized === "busy_capacity" || normalized === "capacity_full") {
2008
+ return "full";
2009
+ }
2010
+ if (normalized === "error" || normalized === "failed" || normalized === "manifest_unavailable" || normalized === "unhealthy" || normalized === "offline") {
2011
+ return "error";
2012
+ }
2013
+ return "unknown";
2014
+ }
1878
2015
  readUsage(bodyText, endpoint, requestBody) {
1879
2016
  const fallback = {
1880
2017
  promptTokens: 0,
@@ -1887,30 +2024,54 @@ export class TokenbuddyDaemon {
1887
2024
  }
1888
2025
  try {
1889
2026
  const data = usageRecord(JSON.parse(bodyText));
1890
- const usage = usageRecord(data?.usage) ?? usageRecord(usageRecord(data?.response)?.usage);
1891
- const promptDetails = usageRecord(usage?.prompt_tokens_details);
1892
- const inputDetails = usageRecord(usage?.input_tokens_details);
1893
- const promptTokens = nonNegativeIntegerField(usage?.prompt_tokens) ?? nonNegativeIntegerField(usage?.input_tokens) ?? 0;
1894
- const completionTokens = nonNegativeIntegerField(usage?.completion_tokens) ?? nonNegativeIntegerField(usage?.output_tokens) ?? 0;
1895
- const cacheReadTokens = nonNegativeIntegerField(promptDetails?.cached_tokens)
1896
- ?? nonNegativeIntegerField(inputDetails?.cached_tokens)
1897
- ?? nonNegativeIntegerField(usage?.cache_read_input_tokens)
1898
- ?? nonNegativeIntegerField(usage?.cache_read_tokens)
1899
- ?? 0;
1900
- const imageMetadata = endpoint === "/v1/images/generations"
1901
- ? imageUsageMetadata(data, usageRecord(requestBody))
1902
- : {};
1903
- return {
1904
- promptTokens,
1905
- completionTokens,
1906
- cacheReadTokens,
1907
- billedMicros: (promptTokens + completionTokens) * 4,
1908
- ...imageMetadata
1909
- };
2027
+ return this.usageSummaryFromData(data, endpoint, requestBody);
1910
2028
  }
1911
2029
  catch {
1912
- return fallback;
2030
+ return this.readUsageFromSse(bodyText, endpoint, requestBody) ?? fallback;
2031
+ }
2032
+ }
2033
+ usageSummaryFromData(data, endpoint, requestBody) {
2034
+ const usage = usageRecord(data?.usage) ?? usageRecord(usageRecord(data?.response)?.usage);
2035
+ const promptDetails = usageRecord(usage?.prompt_tokens_details);
2036
+ const inputDetails = usageRecord(usage?.input_tokens_details);
2037
+ const promptTokens = nonNegativeIntegerField(usage?.prompt_tokens) ?? nonNegativeIntegerField(usage?.input_tokens) ?? 0;
2038
+ const completionTokens = nonNegativeIntegerField(usage?.completion_tokens) ?? nonNegativeIntegerField(usage?.output_tokens) ?? 0;
2039
+ const cacheReadTokens = nonNegativeIntegerField(promptDetails?.cached_tokens)
2040
+ ?? nonNegativeIntegerField(inputDetails?.cached_tokens)
2041
+ ?? nonNegativeIntegerField(usage?.cache_read_input_tokens)
2042
+ ?? nonNegativeIntegerField(usage?.cache_read_tokens)
2043
+ ?? 0;
2044
+ const imageMetadata = endpoint === "/v1/images/generations"
2045
+ ? imageUsageMetadata(data, usageRecord(requestBody))
2046
+ : {};
2047
+ return {
2048
+ promptTokens,
2049
+ completionTokens,
2050
+ cacheReadTokens,
2051
+ billedMicros: (promptTokens + completionTokens) * 4,
2052
+ ...imageMetadata
2053
+ };
2054
+ }
2055
+ readUsageFromSse(bodyText, endpoint, requestBody) {
2056
+ let latestUsageData;
2057
+ for (const line of bodyText.split(/\r?\n/)) {
2058
+ const trimmed = line.trim();
2059
+ if (!trimmed.startsWith("data:"))
2060
+ continue;
2061
+ const payload = trimmed.slice("data:".length).trim();
2062
+ if (!payload || payload === "[DONE]")
2063
+ continue;
2064
+ try {
2065
+ const data = usageRecord(JSON.parse(payload));
2066
+ const usage = usageRecord(data?.usage) ?? usageRecord(usageRecord(data?.response)?.usage);
2067
+ if (usage)
2068
+ latestUsageData = data;
2069
+ }
2070
+ catch {
2071
+ continue;
2072
+ }
1913
2073
  }
2074
+ return latestUsageData ? this.usageSummaryFromData(latestUsageData, endpoint, requestBody) : undefined;
1914
2075
  }
1915
2076
  parseSellerSettlementSummary(headers) {
1916
2077
  const raw = headers.get("x-tokenbuddy-settlement");
@@ -1935,6 +2096,7 @@ export class TokenbuddyDaemon {
1935
2096
  ? settlement.requestId
1936
2097
  : undefined;
1937
2098
  const billingBreakdown = settlement?.billingBreakdown;
2099
+ const avgOutputTokensPerSecond = nonNegativeFiniteField(extras?.avgOutputTokensPerSecond);
1938
2100
  this.tokenStore.recordInferenceLedger({
1939
2101
  requestId,
1940
2102
  sellerKey: route.seller.id,
@@ -1976,6 +2138,7 @@ export class TokenbuddyDaemon {
1976
2138
  falloverChain: extras?.falloverChain,
1977
2139
  upstreamStatus: extras?.upstreamStatus,
1978
2140
  durationMs: extras?.durationMs,
2141
+ avgOutputTokensPerSecond,
1979
2142
  paymentMethod: extras?.paymentMethod
1980
2143
  });
1981
2144
  logger.info("inference.ledger.recorded", "safe inference ledger recorded", {
@@ -2002,6 +2165,7 @@ export class TokenbuddyDaemon {
2002
2165
  falloverChain: extras?.falloverChain,
2003
2166
  upstreamStatus: extras?.upstreamStatus,
2004
2167
  durationMs: extras?.durationMs,
2168
+ avgOutputTokensPerSecond,
2005
2169
  paymentMethod: extras?.paymentMethod
2006
2170
  });
2007
2171
  }
@@ -2119,6 +2283,13 @@ export class TokenbuddyDaemon {
2119
2283
  }
2120
2284
  return parsed;
2121
2285
  }
2286
+ userInferenceTestTimeoutMs() {
2287
+ const warmupTimeoutMs = this.config.warmupProbeTimeoutMs;
2288
+ if (!Number.isFinite(warmupTimeoutMs)) {
2289
+ return USER_INFERENCE_TEST_TIMEOUT_MS;
2290
+ }
2291
+ return Math.max(warmupTimeoutMs ?? 0, USER_INFERENCE_TEST_TIMEOUT_MS);
2292
+ }
2122
2293
  /**
2123
2294
  * Safety margin subtracted from the cached token's `expiresAt` before
2124
2295
  * deciding to reuse it. Buying a new token 60s before expiry gives the
@@ -2212,14 +2383,19 @@ export class TokenbuddyDaemon {
2212
2383
  });
2213
2384
  const createData = await createRes.json();
2214
2385
  if (!createRes.ok) {
2386
+ const paymentSetupError = purchaseResponsePaymentSetupError(createRes, createData, "purchase/create failed");
2215
2387
  logger.warn("purchase.create.failed", "seller purchase create failed", {
2216
2388
  sellerKey,
2217
2389
  model: modelId,
2218
2390
  requestId,
2219
2391
  status: createRes.status,
2392
+ errorCode: createData.error?.code,
2220
2393
  errorMessage: createData.error?.message || "purchase/create failed",
2221
2394
  durationMs: Date.now() - startedAt
2222
2395
  });
2396
+ if (paymentSetupError) {
2397
+ throw paymentSetupError;
2398
+ }
2223
2399
  throw new Error(createData.error?.message || "purchase/create failed");
2224
2400
  }
2225
2401
  const purchaseId = createData.purchaseId || createData.purchase_id;
@@ -2273,15 +2449,20 @@ export class TokenbuddyDaemon {
2273
2449
  });
2274
2450
  const completeData = await completeRes.json();
2275
2451
  if (!completeRes.ok) {
2452
+ const paymentSetupError = purchaseResponsePaymentSetupError(completeRes, completeData, "purchase/complete failed");
2276
2453
  logger.warn("purchase.complete.failed", "seller purchase complete failed", {
2277
2454
  sellerKey,
2278
2455
  model: modelId,
2279
2456
  requestId,
2280
2457
  purchaseId,
2281
2458
  status: completeRes.status,
2459
+ errorCode: completeData.error?.code,
2282
2460
  errorMessage: completeData.error?.message || "purchase/complete failed",
2283
2461
  durationMs: Date.now() - startedAt
2284
2462
  });
2463
+ if (paymentSetupError) {
2464
+ throw paymentSetupError;
2465
+ }
2285
2466
  throw new Error(completeData.error?.message || "purchase/complete failed");
2286
2467
  }
2287
2468
  const token = completeData.accessToken || completeData.access_token;
@@ -2360,7 +2541,7 @@ export class TokenbuddyDaemon {
2360
2541
  return "mock-proof-data";
2361
2542
  }
2362
2543
  if (route.paymentMethod !== "clawtip") {
2363
- throw new Error(`unsupported payment method for auto purchase: ${route.paymentMethod}`);
2544
+ throw new BuyerPaymentSetupError(`unsupported payment method for auto purchase: ${route.paymentMethod}`, ErrorCode.PaymentMethodNotReady);
2364
2545
  }
2365
2546
  const proofCommand = process.env.TB_PROXYD_CLAWTIP_PROOF_COMMAND;
2366
2547
  if (proofCommand?.trim()) {
@@ -2379,7 +2560,7 @@ export class TokenbuddyDaemon {
2379
2560
  this.logPaymentProofResolved(route, "env", requestId);
2380
2561
  return proof.trim();
2381
2562
  }
2382
- throw new Error("clawtip auto purchase requires TB_PROXYD_CLAWTIP_PROOF_COMMAND or a ClawTip proof env/file");
2563
+ throw new BuyerPaymentSetupError("clawtip auto purchase requires TB_PROXYD_CLAWTIP_PROOF_COMMAND or a ClawTip proof env/file", ErrorCode.PaymentMethodNotReady);
2383
2564
  }
2384
2565
  runClawtipProofCommand(route, createData, commandPath, requestId) {
2385
2566
  const timeoutMs = this.clawtipProofTimeoutMs();
@@ -2416,7 +2597,7 @@ export class TokenbuddyDaemon {
2416
2597
  }
2417
2598
  settled = true;
2418
2599
  child.kill("SIGTERM");
2419
- reject(new Error("clawtip proof provider timed out"));
2600
+ reject(new BuyerPaymentSetupError("clawtip proof provider timed out", ErrorCode.PaymentTimeout));
2420
2601
  }, timeoutMs);
2421
2602
  child.stdout.on("data", (chunk) => {
2422
2603
  stdout += chunk.toString("utf8");
@@ -2433,7 +2614,7 @@ export class TokenbuddyDaemon {
2433
2614
  }
2434
2615
  settled = true;
2435
2616
  clearTimeout(timer);
2436
- reject(error);
2617
+ reject(new BuyerPaymentSetupError(`clawtip proof provider failed: ${error.message}`, ErrorCode.PaymentProviderFailed));
2437
2618
  });
2438
2619
  child.on("close", (code) => {
2439
2620
  if (settled) {
@@ -2443,7 +2624,7 @@ export class TokenbuddyDaemon {
2443
2624
  clearTimeout(timer);
2444
2625
  const proof = stdout.trim();
2445
2626
  if (code !== 0 || !proof) {
2446
- reject(new Error(`clawtip proof provider failed with exit ${code}: ${stderr.trim() || "empty proof"}`));
2627
+ reject(new BuyerPaymentSetupError(`clawtip proof provider failed with exit ${code}: ${stderr.trim() || "empty proof"}`, ErrorCode.PaymentProviderFailed));
2447
2628
  return;
2448
2629
  }
2449
2630
  logger.info("purchase.clawtip_proof.succeeded", "clawtip proof provider succeeded", {
@@ -2522,6 +2703,7 @@ export class TokenbuddyDaemon {
2522
2703
  const startedAt = Date.now();
2523
2704
  const ac = new AbortController();
2524
2705
  const timer = setTimeout(() => ac.abort(new Error("manual provider model probe timeout")), this.config.warmupProbeTimeoutMs ?? 3000);
2706
+ let modelIds = [];
2525
2707
  try {
2526
2708
  const response = await fetch(this.manualProviderEndpointUrlFromBase(parsed.toString(), "/v1/models"), {
2527
2709
  method: "GET",
@@ -2540,19 +2722,234 @@ export class TokenbuddyDaemon {
2540
2722
  throw new Error(`manual provider model probe returned HTTP ${response.status}`);
2541
2723
  }
2542
2724
  const body = await response.json().catch(() => undefined);
2543
- const modelIds = parseOpenAiModelIds(body);
2725
+ modelIds = parseOpenAiModelIds(body);
2544
2726
  if (modelIds.length === 0) {
2545
2727
  throw new Error("manual provider model list is empty");
2546
2728
  }
2729
+ }
2730
+ finally {
2731
+ clearTimeout(timer);
2732
+ }
2733
+ const protocolProbes = await this.probeManualProviderProtocols({
2734
+ baseUrl: parsed.toString(),
2735
+ apiKey: input.apiKey.trim(),
2736
+ modelIds
2737
+ });
2738
+ const supportedProtocols = protocolProbes
2739
+ .filter((probe) => probe.supported)
2740
+ .map((probe) => probe.protocol);
2741
+ if (supportedProtocols.length === 0) {
2742
+ throw new Error("manual provider has models but no supported inference protocols were detected");
2743
+ }
2744
+ return {
2745
+ modelIds,
2746
+ supportedProtocols,
2747
+ protocolProbes,
2748
+ elapsedMs: Date.now() - startedAt
2749
+ };
2750
+ }
2751
+ async probeManualProviderProtocols(input) {
2752
+ const protocols = [
2753
+ { protocol: "chat_completions", endpoint: "/v1/chat/completions" },
2754
+ { protocol: "responses", endpoint: "/v1/responses" },
2755
+ { protocol: "images_generations", endpoint: "/v1/images/generations" }
2756
+ ];
2757
+ const timeoutMs = this.config.warmupProbeTimeoutMs ?? 3000;
2758
+ return Promise.all(protocols.map((protocol) => this.probeManualProviderProtocol(input, protocol, timeoutMs)));
2759
+ }
2760
+ async probeManualProviderProtocol(input, protocol, timeoutMs) {
2761
+ let lastResult;
2762
+ for (const modelId of manualProviderProbeModelCandidates(input.modelIds, protocol.protocol)) {
2763
+ const result = await this.probeManualProviderProtocolModel(input, protocol, modelId, timeoutMs);
2764
+ if (result.supported) {
2765
+ return result;
2766
+ }
2767
+ lastResult = result;
2768
+ }
2769
+ return lastResult ?? {
2770
+ protocol: protocol.protocol,
2771
+ supported: false,
2772
+ errorMessage: "missing model for capability probe"
2773
+ };
2774
+ }
2775
+ async probeManualProviderProtocolModel(input, protocol, modelId, timeoutMs) {
2776
+ const ac = new AbortController();
2777
+ const timer = setTimeout(() => ac.abort(new Error("manual provider protocol probe timeout")), timeoutMs);
2778
+ try {
2779
+ const response = await fetch(this.manualProviderEndpointUrlFromBase(input.baseUrl, protocol.endpoint), {
2780
+ method: "POST",
2781
+ headers: {
2782
+ "Content-Type": "application/json",
2783
+ "Authorization": `Bearer ${input.apiKey}`
2784
+ },
2785
+ body: JSON.stringify(manualProviderProtocolProbeBody(protocol.protocol, modelId)),
2786
+ signal: ac.signal
2787
+ });
2788
+ const responseText = await readResponseText(response);
2789
+ if (protocol.protocol === "images_generations" && imageValidationProbeSupported(response.status, responseText)) {
2790
+ return {
2791
+ protocol: protocol.protocol,
2792
+ supported: true,
2793
+ modelId,
2794
+ httpStatus: response.status
2795
+ };
2796
+ }
2797
+ return {
2798
+ protocol: protocol.protocol,
2799
+ supported: response.ok,
2800
+ modelId,
2801
+ httpStatus: response.status,
2802
+ ...(response.ok ? {} : { errorMessage: `probe returned HTTP ${response.status}` })
2803
+ };
2804
+ }
2805
+ catch (error) {
2806
+ const errorMessage = error instanceof Error ? error.message : String(error);
2547
2807
  return {
2548
- modelIds,
2549
- elapsedMs: Date.now() - startedAt
2808
+ protocol: protocol.protocol,
2809
+ supported: false,
2810
+ modelId,
2811
+ errorMessage
2550
2812
  };
2551
2813
  }
2552
2814
  finally {
2553
2815
  clearTimeout(timer);
2554
2816
  }
2555
2817
  }
2818
+ async testManualProviderInference(provider) {
2819
+ const modelId = provider.models[0];
2820
+ if (!modelId) {
2821
+ throw new Error(`manual provider has no models: ${provider.id}`);
2822
+ }
2823
+ const startedAt = Date.now();
2824
+ const timeoutMs = this.userInferenceTestTimeoutMs();
2825
+ const ac = new AbortController();
2826
+ const timer = setTimeout(() => ac.abort(new Error("manual provider inference probe timeout")), timeoutMs);
2827
+ try {
2828
+ const response = await fetch(this.manualProviderEndpointUrl(provider, "/v1/chat/completions"), {
2829
+ method: "POST",
2830
+ headers: {
2831
+ "Content-Type": "application/json",
2832
+ "Authorization": `Bearer ${this.manualProviderApiKey(provider)}`
2833
+ },
2834
+ body: JSON.stringify({
2835
+ model: modelId,
2836
+ messages: [{ role: "user", content: "say hi" }],
2837
+ max_tokens: 8,
2838
+ temperature: 0,
2839
+ stream: false
2840
+ }),
2841
+ signal: ac.signal
2842
+ });
2843
+ const elapsedMs = Date.now() - startedAt;
2844
+ if (!response.ok) {
2845
+ const body = await response.text().catch(() => "");
2846
+ throw new Error(`manual provider inference probe returned HTTP ${response.status}${body ? `: ${body.slice(0, 240)}` : ""}`);
2847
+ }
2848
+ const body = await response.json().catch(() => undefined);
2849
+ if (!hasOpenAiChatChoice(body)) {
2850
+ throw new Error("manual provider inference probe returned an invalid chat completion");
2851
+ }
2852
+ return { modelId, elapsedMs };
2853
+ }
2854
+ finally {
2855
+ clearTimeout(timer);
2856
+ }
2857
+ }
2858
+ async testSellerInference(sellerId) {
2859
+ const registry = await this.fetchRegistry();
2860
+ const seller = registry.sellers.find((entry) => entry.id === sellerId);
2861
+ if (!seller) {
2862
+ throw new Error(`seller not found: ${sellerId}`);
2863
+ }
2864
+ if (!isBuyerVisibleRegistrySeller(seller)) {
2865
+ throw new Error(`seller is not active: ${sellerId}`);
2866
+ }
2867
+ if (!(seller.supportedProtocols ?? []).includes("chat_completions")) {
2868
+ throw new Error(`seller does not support chat_completions: ${sellerId}`);
2869
+ }
2870
+ const modelId = seller.models?.[0];
2871
+ if (!modelId) {
2872
+ throw new Error(`seller has no models: ${sellerId}`);
2873
+ }
2874
+ const paymentMethod = this.defaultPaymentMethod();
2875
+ if (!paymentMethod || !(seller.paymentMethods ?? []).includes(paymentMethod)) {
2876
+ throw new Error(`seller does not support the active payment method: ${sellerId}`);
2877
+ }
2878
+ const startedAt = Date.now();
2879
+ const requestId = `seller_test_${crypto.randomBytes(8).toString("hex")}`;
2880
+ const route = {
2881
+ seller,
2882
+ transport: "tokenbuddy_seller",
2883
+ manifest: null,
2884
+ protocol: "chat_completions",
2885
+ modelId,
2886
+ paymentMethod,
2887
+ planSource: "registry_fallback",
2888
+ planReason: "manual_seller_test",
2889
+ planSellerCount: 1
2890
+ };
2891
+ const token = await this.getOrPurchaseToken(route, requestId);
2892
+ const sellerUrl = normalizeSellerUrl(seller);
2893
+ const timeoutMs = this.userInferenceTestTimeoutMs();
2894
+ const ac = new AbortController();
2895
+ const timer = setTimeout(() => ac.abort(new Error("seller inference test timeout")), timeoutMs);
2896
+ try {
2897
+ const response = await fetch(`${sellerUrl}/v1/chat/completions`, {
2898
+ method: "POST",
2899
+ headers: {
2900
+ "Content-Type": "application/json",
2901
+ "Authorization": `Bearer ${token}`,
2902
+ "X-Request-Id": requestId,
2903
+ "Idempotency-Key": `idem_${crypto.randomBytes(12).toString("hex")}`,
2904
+ "X-TokenBuddy-Deadline-Ms": String(timeoutMs)
2905
+ },
2906
+ body: JSON.stringify({
2907
+ model: modelId,
2908
+ messages: [{ role: "user", content: "say hi" }],
2909
+ max_tokens: 8,
2910
+ temperature: 0,
2911
+ stream: false,
2912
+ requestId
2913
+ }),
2914
+ signal: ac.signal
2915
+ });
2916
+ const elapsedMs = Date.now() - startedAt;
2917
+ if (!response.ok) {
2918
+ const body = await response.text().catch(() => "");
2919
+ throw new Error(`seller inference test returned HTTP ${response.status}${body ? `: ${body.slice(0, 240)}` : ""}`);
2920
+ }
2921
+ const body = await response.json().catch(() => undefined);
2922
+ if (!hasOpenAiChatChoice(body)) {
2923
+ throw new Error("seller inference test returned an invalid chat completion");
2924
+ }
2925
+ this.sellerPool.recordRuntimeMetrics(seller.id, { ttftMs: elapsedMs });
2926
+ logger.info("seller.inference_test.succeeded", "seller inference test succeeded", {
2927
+ sellerId,
2928
+ model: modelId,
2929
+ elapsedMs
2930
+ });
2931
+ return {
2932
+ sellerId,
2933
+ sellerName: seller.name,
2934
+ modelId,
2935
+ elapsedMs
2936
+ };
2937
+ }
2938
+ finally {
2939
+ clearTimeout(timer);
2940
+ }
2941
+ }
2942
+ recordManualProviderProbeResult(input) {
2943
+ const existing = this.currentManualProviderObservations().observations.find((entry) => entry.providerId === input.providerId);
2944
+ this.recordManualProviderObservation({
2945
+ providerId: input.providerId,
2946
+ current: existing?.current ?? false,
2947
+ status: input.status,
2948
+ ttftMs: input.ttftMs,
2949
+ errorClass: input.errorClass,
2950
+ errorMessage: input.errorMessage
2951
+ });
2952
+ }
2556
2953
  manualProviderErrorClass(status) {
2557
2954
  if (status === 401 || status === 403) {
2558
2955
  return "auth_failed";
@@ -2609,10 +3006,17 @@ export class TokenbuddyDaemon {
2609
3006
  }
2610
3007
  const sellerKey = route.seller.id;
2611
3008
  const attemptStartedAt = Date.now();
2612
- const upstreamBody = this.applyResolvedModelToBody(endpoint, {
3009
+ let firstByteAt;
3010
+ const markManualFirstByte = () => {
3011
+ if (firstByteAt === undefined) {
3012
+ firstByteAt = Date.now();
3013
+ }
3014
+ markFirstByte();
3015
+ };
3016
+ const upstreamBody = this.withOpenAiStreamUsage(endpoint, this.applyResolvedModelToBody(endpoint, {
2613
3017
  ...reqBody,
2614
3018
  requestId
2615
- }, modelId);
3019
+ }, modelId));
2616
3020
  logger.info("manual_provider.request.started", "manual provider request started", {
2617
3021
  requestId,
2618
3022
  providerId: provider.id,
@@ -2699,6 +3103,7 @@ export class TokenbuddyDaemon {
2699
3103
  return true;
2700
3104
  }
2701
3105
  let bytes = 0;
3106
+ let responseBody = "";
2702
3107
  const decoder = new TextDecoder();
2703
3108
  while (true) {
2704
3109
  const { done, value } = await reader.read();
@@ -2708,24 +3113,32 @@ export class TokenbuddyDaemon {
2708
3113
  bytes += value.byteLength;
2709
3114
  const chunk = decoder.decode(value, { stream: true });
2710
3115
  if (chunk.length > 0) {
2711
- markFirstByte();
3116
+ responseBody += chunk;
3117
+ markManualFirstByte();
2712
3118
  res.write(chunk);
2713
3119
  }
2714
3120
  }
2715
3121
  const decoderTail = decoder.decode();
2716
3122
  if (decoderTail.length > 0) {
2717
- markFirstByte();
3123
+ responseBody += decoderTail;
3124
+ markManualFirstByte();
2718
3125
  res.write(decoderTail);
2719
3126
  }
2720
3127
  res.end();
3128
+ const usage = this.readUsage(responseBody, endpoint, upstreamBody);
2721
3129
  const durationMs = Date.now() - startedAt;
2722
- const ttftMs = durationMs > 0 ? Date.now() - attemptStartedAt : undefined;
3130
+ const ttftMs = firstByteAt !== undefined ? firstByteAt - startedAt : undefined;
3131
+ const billedMicros = usage.billedMicros > 0 ? usage.billedMicros : Math.max(1, bytes);
3132
+ const completionTokens = usage.completionTokens;
3133
+ const avgTokensPerSecond = completionTokens > 0
3134
+ ? averageOutputTokensPerSecond(completionTokens, durationMs, ttftMs)
3135
+ : undefined;
2723
3136
  this.recordManualProviderObservation({
2724
3137
  providerId: provider.id,
2725
3138
  current: true,
2726
3139
  status: "healthy",
2727
3140
  ttftMs,
2728
- avgTokensPerSecond: undefined
3141
+ avgTokensPerSecond
2729
3142
  });
2730
3143
  this.tokenStore.recordInferenceLedger({
2731
3144
  requestId,
@@ -2733,11 +3146,11 @@ export class TokenbuddyDaemon {
2733
3146
  modelId,
2734
3147
  endpoint,
2735
3148
  status: "ok",
2736
- promptTokens: 0,
2737
- completionTokens: 0,
2738
- cacheReadTokens: 0,
2739
- billedMicros: Math.max(1, bytes),
2740
- estimatedMicros: Math.max(1, bytes),
3149
+ promptTokens: usage.promptTokens,
3150
+ completionTokens: usage.completionTokens,
3151
+ cacheReadTokens: usage.cacheReadTokens,
3152
+ billedMicros,
3153
+ estimatedMicros: billedMicros,
2741
3154
  priceVersion: `local-provider:${provider.id}`,
2742
3155
  billingUnit: endpoint === "/v1/images/generations" ? "images" : "tokens",
2743
3156
  balanceSource: "self_funded_provider",
@@ -2748,24 +3161,22 @@ export class TokenbuddyDaemon {
2748
3161
  falloverChain: routes.slice(0, routeIndex + 1).map((entry) => entry.seller.id),
2749
3162
  upstreamStatus: "healthy",
2750
3163
  durationMs,
3164
+ avgOutputTokensPerSecond: avgTokensPerSecond,
2751
3165
  paymentMethod
2752
3166
  });
2753
3167
  return true;
2754
3168
  }
3169
+ markManualFirstByte();
2755
3170
  const responseBody = await response.text();
2756
- markFirstByte();
2757
3171
  res.send(responseBody);
2758
3172
  const usage = this.readUsage(responseBody, endpoint, reqBody);
2759
3173
  const durationMs = Date.now() - startedAt;
2760
- const ttftMs = Date.now() - attemptStartedAt;
2761
- const completionTokens = usage.completionTokens;
2762
- const avgTokensPerSecond = durationMs > 0 && completionTokens > 0 ? completionTokens / (durationMs / 1000) : undefined;
3174
+ const ttftMs = firstByteAt !== undefined ? firstByteAt - startedAt : undefined;
2763
3175
  this.recordManualProviderObservation({
2764
3176
  providerId: provider.id,
2765
3177
  current: true,
2766
3178
  status: "healthy",
2767
- ttftMs,
2768
- avgTokensPerSecond
3179
+ ttftMs
2769
3180
  });
2770
3181
  this.tokenStore.recordInferenceLedger({
2771
3182
  requestId,
@@ -2957,6 +3368,23 @@ export class TokenbuddyDaemon {
2957
3368
  token = await this.getOrPurchaseToken(route, requestId);
2958
3369
  }
2959
3370
  catch (purchaseError) {
3371
+ if (purchaseError instanceof BuyerPaymentSetupError) {
3372
+ logger.warn("purchase.payment_setup_failed", "buyer payment setup failed; refusing seller failover", {
3373
+ requestId,
3374
+ sellerKey,
3375
+ model: modelId,
3376
+ endpoint,
3377
+ errorCode: purchaseError.errorCode,
3378
+ errorMessage: purchaseError.message
3379
+ });
3380
+ res.status(purchaseError.statusCode).json({
3381
+ error: {
3382
+ code: purchaseError.errorCode,
3383
+ message: purchaseError.message
3384
+ }
3385
+ });
3386
+ return;
3387
+ }
2960
3388
  logger.warn("purchase.failed", "seller auto-purchase failed; failing over without retry", {
2961
3389
  requestId,
2962
3390
  sellerKey,
@@ -3119,6 +3547,7 @@ export class TokenbuddyDaemon {
3119
3547
  return;
3120
3548
  }
3121
3549
  let bytes = 0;
3550
+ let responseBody = "";
3122
3551
  const decoder = new TextDecoder();
3123
3552
  const settlementExtractor = new SellerSettlementStreamExtractor();
3124
3553
  while (true) {
@@ -3135,6 +3564,7 @@ export class TokenbuddyDaemon {
3135
3564
  // 缺 event: 行)由卖方修,buyer 不兜底。
3136
3565
  const sellerChunk = settlementExtractor.push(chunk);
3137
3566
  if (sellerChunk.length > 0) {
3567
+ responseBody += sellerChunk;
3138
3568
  markFirstByte();
3139
3569
  res.write(sellerChunk);
3140
3570
  }
@@ -3147,31 +3577,47 @@ export class TokenbuddyDaemon {
3147
3577
  if (decoderTail.length > 0) {
3148
3578
  const sellerTail = settlementExtractor.push(decoderTail);
3149
3579
  if (sellerTail.length > 0) {
3580
+ responseBody += sellerTail;
3150
3581
  markFirstByte();
3151
3582
  res.write(sellerTail);
3152
3583
  }
3153
3584
  }
3154
3585
  const settlementTrailing = settlementExtractor.finish();
3155
3586
  if (settlementTrailing.downstream.length > 0) {
3587
+ responseBody += settlementTrailing.downstream;
3156
3588
  markFirstByte();
3157
3589
  res.write(settlementTrailing.downstream);
3158
3590
  }
3159
3591
  res.end();
3160
3592
  void this.refreshSellerRuntimeMetrics(route, requestId);
3161
- this.recordReconciledInference(route, endpoint, requestId, { promptTokens: 0, completionTokens: 0, cacheReadTokens: 0, billedMicros: Math.max(1, bytes) }, this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(), this.inferPromptForHash(body), undefined, {
3162
- ttftMs: firstByteAt ? firstByteAt - startedAt : undefined,
3593
+ const streamUsage = this.readUsage(responseBody, endpoint, body);
3594
+ const hasStreamUsage = streamUsage.promptTokens > 0 ||
3595
+ streamUsage.completionTokens > 0 ||
3596
+ streamUsage.cacheReadTokens > 0 ||
3597
+ streamUsage.billedMicros > 0;
3598
+ const usage = hasStreamUsage
3599
+ ? streamUsage
3600
+ : { ...streamUsage, billedMicros: Math.max(1, bytes) };
3601
+ const durationMs = Date.now() - startedAt;
3602
+ const ttftMs = firstByteAt ? firstByteAt - startedAt : undefined;
3603
+ const avgOutputTokensPerSecond = usage.completionTokens > 0
3604
+ ? averageOutputTokensPerSecond(usage.completionTokens, durationMs, ttftMs)
3605
+ : undefined;
3606
+ this.recordReconciledInference(route, endpoint, requestId, usage, this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(), this.inferPromptForHash(body), undefined, {
3607
+ ttftMs,
3163
3608
  fallbackCount: routeIndex,
3164
3609
  routeReason: plan.reason,
3165
3610
  falloverChain: routes.slice(0, routeIndex + 1).map((r) => r.seller.id),
3166
3611
  upstreamStatus: upstreamStatusFromHeaders(upstreamResponse.headers),
3167
- durationMs: Date.now() - startedAt,
3612
+ durationMs,
3613
+ avgOutputTokensPerSecond,
3168
3614
  paymentMethod
3169
3615
  });
3170
3616
  return;
3171
3617
  }
3618
+ markFirstByte();
3172
3619
  const responseBody = await upstreamResponse.text();
3173
3620
  lease.refresh();
3174
- markFirstByte();
3175
3621
  res.send(responseBody);
3176
3622
  const usage = this.readUsage(responseBody, endpoint, body);
3177
3623
  void this.refreshSellerRuntimeMetrics(route, requestId);
@@ -3243,9 +3689,10 @@ export class TokenbuddyDaemon {
3243
3689
  durationMs: Date.now() - startedAt
3244
3690
  });
3245
3691
  if (!res.headersSent) {
3246
- res.status(502).json({
3692
+ const buyerPaymentError = error instanceof BuyerPaymentSetupError ? error : undefined;
3693
+ res.status(buyerPaymentError?.statusCode ?? 502).json({
3247
3694
  error: {
3248
- code: "proxy_request_failed",
3695
+ code: buyerPaymentError?.errorCode ?? "proxy_request_failed",
3249
3696
  message: error instanceof Error ? error.message : String(error)
3250
3697
  }
3251
3698
  });
@@ -3568,6 +4015,32 @@ export class TokenbuddyDaemon {
3568
4015
  });
3569
4016
  }
3570
4017
  });
4018
+ controlApp.post("/sellers/:id/test", async (req, res) => {
4019
+ const sellerId = req.params.id;
4020
+ try {
4021
+ const result = await this.testSellerInference(sellerId);
4022
+ res.status(200).json({
4023
+ ok: true,
4024
+ sellerId: result.sellerId,
4025
+ sellerName: result.sellerName,
4026
+ modelId: result.modelId,
4027
+ elapsedMs: result.elapsedMs
4028
+ });
4029
+ }
4030
+ catch (error) {
4031
+ const errorMessage = error instanceof Error ? error.message : String(error);
4032
+ logger.warn("seller.inference_test.failed", "seller inference test failed", {
4033
+ sellerId,
4034
+ errorMessage
4035
+ });
4036
+ res.status(400).json({
4037
+ error: {
4038
+ code: "seller_inference_test_failed",
4039
+ message: errorMessage
4040
+ }
4041
+ });
4042
+ }
4043
+ });
3571
4044
  controlApp.get("/models", async (req, res) => {
3572
4045
  try {
3573
4046
  const { models, sellers } = await this.listSellerBackedModels();
@@ -3598,7 +4071,7 @@ export class TokenbuddyDaemon {
3598
4071
  });
3599
4072
  controlApp.post("/providers/detect", (req, res) => {
3600
4073
  try {
3601
- const providers = detectProviders({ home: typeof req.body?.home === "string" ? req.body.home : undefined });
4074
+ const providers = detectProviders({ home: typeof req.body?.home === "string" ? req.body.home : this.config.providerHomeDir });
3602
4075
  logger.info("provider.detect.succeeded", "provider detection succeeded", {
3603
4076
  providerCount: providers.length,
3604
4077
  detectedCount: providers.filter((provider) => provider.detected).length
@@ -3644,7 +4117,7 @@ export class TokenbuddyDaemon {
3644
4117
  proxyUrl: String(req.body?.proxyUrl || ""),
3645
4118
  model: typeof req.body?.model === "string" ? req.body.model : undefined,
3646
4119
  providerSelections: req.body?.providerSelections,
3647
- home: typeof req.body?.home === "string" ? req.body.home : undefined
4120
+ home: typeof req.body?.home === "string" ? req.body.home : this.config.providerHomeDir
3648
4121
  });
3649
4122
  logger.info("provider.install.previewed", "provider install previewed", {
3650
4123
  providerCount: new Set(changes.map((change) => change.providerId)).size,
@@ -3672,7 +4145,7 @@ export class TokenbuddyDaemon {
3672
4145
  proxyUrl: String(req.body?.proxyUrl || ""),
3673
4146
  model: typeof req.body?.model === "string" ? req.body.model : undefined,
3674
4147
  providerSelections: req.body?.providerSelections,
3675
- home: typeof req.body?.home === "string" ? req.body.home : undefined
4148
+ home: typeof req.body?.home === "string" ? req.body.home : this.config.providerHomeDir
3676
4149
  }, this.tokenStore);
3677
4150
  logger.info("provider.install.applied", "provider install applied", {
3678
4151
  providerCount: new Set(applied.map((entry) => entry.providerId)).size,
@@ -3695,7 +4168,7 @@ export class TokenbuddyDaemon {
3695
4168
  try {
3696
4169
  const rolledBack = rollbackProviderInstall({
3697
4170
  providers: Array.isArray(req.body?.providers) ? req.body.providers : [],
3698
- home: typeof req.body?.home === "string" ? req.body.home : undefined
4171
+ home: typeof req.body?.home === "string" ? req.body.home : this.config.providerHomeDir
3699
4172
  }, this.tokenStore);
3700
4173
  logger.info("provider.install.rolled_back", "provider install rolled back", {
3701
4174
  providerCount: new Set(rolledBack.map((entry) => entry.providerId)).size,
@@ -3808,11 +4281,14 @@ export class TokenbuddyDaemon {
3808
4281
  const result = await this.probeManualProviderModels({ baseUrl, apiKey });
3809
4282
  logger.info("routing.manual_provider.probed", "manual provider model probe succeeded", {
3810
4283
  modelCount: result.modelIds.length,
4284
+ supportedProtocols: result.supportedProtocols,
3811
4285
  elapsedMs: result.elapsedMs
3812
4286
  });
3813
4287
  res.status(200).json({
3814
4288
  ok: true,
3815
4289
  modelIds: result.modelIds,
4290
+ supportedProtocols: result.supportedProtocols,
4291
+ protocolProbes: result.protocolProbes,
3816
4292
  elapsedMs: result.elapsedMs
3817
4293
  });
3818
4294
  }
@@ -3822,6 +4298,116 @@ export class TokenbuddyDaemon {
3822
4298
  res.status(400).json({ error: { code: "manual_provider_probe_failed", message: errorMessage } });
3823
4299
  }
3824
4300
  });
4301
+ controlApp.post("/routing/manual-providers/:id/test", async (req, res) => {
4302
+ const providerId = req.params.id;
4303
+ let providerFound = false;
4304
+ try {
4305
+ const config = this.currentManualProviders();
4306
+ const provider = config.providers.find((entry) => entry.id === providerId);
4307
+ if (!provider) {
4308
+ res.status(404).json({ error: { code: "manual_provider_not_found", message: `manual provider not found: ${providerId}` } });
4309
+ return;
4310
+ }
4311
+ providerFound = true;
4312
+ const result = await this.testManualProviderInference(provider);
4313
+ this.recordManualProviderProbeResult({
4314
+ providerId,
4315
+ status: "healthy",
4316
+ ttftMs: result.elapsedMs
4317
+ });
4318
+ logger.info("routing.manual_provider.inference_test_succeeded", "manual provider inference test succeeded", {
4319
+ providerId,
4320
+ model: result.modelId,
4321
+ elapsedMs: result.elapsedMs
4322
+ });
4323
+ const observations = new Map(this.currentManualProviderObservations().observations.map((entry) => [entry.providerId, entry]));
4324
+ res.status(200).json({
4325
+ ok: true,
4326
+ providerId,
4327
+ modelId: result.modelId,
4328
+ elapsedMs: result.elapsedMs,
4329
+ provider: publicManualProviderConfig(provider, observations.get(provider.id))
4330
+ });
4331
+ }
4332
+ catch (error) {
4333
+ const errorMessage = error instanceof Error ? error.message : String(error);
4334
+ if (providerFound) {
4335
+ this.recordManualProviderProbeResult({
4336
+ providerId,
4337
+ status: "unhealthy",
4338
+ errorClass: "inference_probe_failed",
4339
+ errorMessage
4340
+ });
4341
+ }
4342
+ logger.warn("routing.manual_provider.inference_test_failed", "manual provider inference test failed", { providerId, errorMessage });
4343
+ res.status(400).json({ error: { code: "manual_provider_inference_test_failed", message: errorMessage } });
4344
+ }
4345
+ });
4346
+ controlApp.post("/routing/manual-providers/:id/models/refresh", async (req, res) => {
4347
+ const providerId = req.params.id;
4348
+ let providerFound = false;
4349
+ try {
4350
+ const config = this.currentManualProviders();
4351
+ const provider = config.providers.find((entry) => entry.id === providerId);
4352
+ if (!provider) {
4353
+ res.status(404).json({ error: { code: "manual_provider_not_found", message: `manual provider not found: ${providerId}` } });
4354
+ return;
4355
+ }
4356
+ providerFound = true;
4357
+ const probe = await this.probeManualProviderModels({
4358
+ baseUrl: provider.baseUrl,
4359
+ apiKey: this.manualProviderApiKey(provider)
4360
+ });
4361
+ const updatedProvider = {
4362
+ ...provider,
4363
+ models: probe.modelIds,
4364
+ supportedProtocols: probe.supportedProtocols,
4365
+ updatedAt: new Date().toISOString()
4366
+ };
4367
+ const nextConfig = {
4368
+ version: 1,
4369
+ providers: config.providers.map((entry) => entry.id === providerId ? updatedProvider : entry),
4370
+ routing: config.routing,
4371
+ updatedAt: new Date().toISOString()
4372
+ };
4373
+ this.saveManualProviders(nextConfig);
4374
+ this.recordManualProviderProbeResult({
4375
+ providerId,
4376
+ status: "healthy"
4377
+ });
4378
+ logger.info("routing.manual_provider.models_refresh_succeeded", "manual provider models refreshed", {
4379
+ providerId,
4380
+ modelCount: probe.modelIds.length,
4381
+ supportedProtocols: probe.supportedProtocols,
4382
+ elapsedMs: probe.elapsedMs
4383
+ });
4384
+ const observations = new Map(this.currentManualProviderObservations().observations.map((entry) => [entry.providerId, entry]));
4385
+ res.status(200).json({
4386
+ ok: true,
4387
+ providerId,
4388
+ modelIds: probe.modelIds,
4389
+ supportedProtocols: probe.supportedProtocols,
4390
+ protocolProbes: probe.protocolProbes,
4391
+ elapsedMs: probe.elapsedMs,
4392
+ provider: publicManualProviderConfig(updatedProvider, observations.get(providerId)),
4393
+ routing: nextConfig.routing,
4394
+ providers: nextConfig.providers.map((entry) => publicManualProviderConfig(entry, observations.get(entry.id)))
4395
+ });
4396
+ }
4397
+ catch (error) {
4398
+ const errorMessage = error instanceof Error ? error.message : String(error);
4399
+ if (providerFound) {
4400
+ this.recordManualProviderProbeResult({
4401
+ providerId,
4402
+ status: "unhealthy",
4403
+ errorClass: "models_refresh_failed",
4404
+ errorMessage
4405
+ });
4406
+ }
4407
+ logger.warn("routing.manual_provider.models_refresh_failed", "manual provider models refresh failed", { providerId, errorMessage });
4408
+ res.status(400).json({ error: { code: "manual_provider_models_refresh_failed", message: errorMessage } });
4409
+ }
4410
+ });
3825
4411
  controlApp.post("/routing/manual-providers/local", async (req, res) => {
3826
4412
  try {
3827
4413
  const body = (req.body ?? {});
@@ -3844,7 +4430,7 @@ export class TokenbuddyDaemon {
3844
4430
  apiKeyEnv: undefined,
3845
4431
  secretRef,
3846
4432
  models: probe.modelIds,
3847
- supportedProtocols: Array.isArray(body.supportedProtocols) ? body.supportedProtocols : ["chat_completions"],
4433
+ supportedProtocols: probe.supportedProtocols,
3848
4434
  enabled: body.enabled === undefined ? true : body.enabled
3849
4435
  };
3850
4436
  delete providerInput.apiKey;
@@ -3863,6 +4449,7 @@ export class TokenbuddyDaemon {
3863
4449
  logger.info("routing.manual_provider.local_created", "local manual provider created", {
3864
4450
  providerId: provider.id,
3865
4451
  modelCount: provider.models.length,
4452
+ supportedProtocols: provider.supportedProtocols,
3866
4453
  enabled: provider.enabled,
3867
4454
  keyRefKind: "secret"
3868
4455
  });
@@ -3936,7 +4523,7 @@ export class TokenbuddyDaemon {
3936
4523
  baseUrl,
3937
4524
  secretRef,
3938
4525
  models: probe.modelIds,
3939
- supportedProtocols: provider.supportedProtocols.length > 0 ? provider.supportedProtocols : ["chat_completions"],
4526
+ supportedProtocols: probe.supportedProtocols,
3940
4527
  enabled: body.enabled === undefined ? provider.enabled : body.enabled,
3941
4528
  updatedAt: new Date().toISOString()
3942
4529
  }, {
@@ -3956,6 +4543,7 @@ export class TokenbuddyDaemon {
3956
4543
  logger.info("routing.manual_provider.local_updated", "local manual provider updated", {
3957
4544
  providerId: updatedProvider.id,
3958
4545
  modelCount: updatedProvider.models.length,
4546
+ supportedProtocols: updatedProvider.supportedProtocols,
3959
4547
  enabled: updatedProvider.enabled,
3960
4548
  keyRefKind: "secret"
3961
4549
  });
@@ -4465,6 +5053,8 @@ export class TokenbuddyDaemon {
4465
5053
  await this.refreshSellerRouteMetadata(registrySellers);
4466
5054
  this.sellerPool.ensureRegistrySellers(registrySellers);
4467
5055
  const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
5056
+ const concurrencySnapshot = this.sellerConcurrencyLimiter.snapshot();
5057
+ const localConcurrencyBySellerId = new Map(concurrencySnapshot.active.map((entry) => [entry.sellerId, entry.activeCount]));
4468
5058
  const plan = planSellerRouteSet({
4469
5059
  modelId,
4470
5060
  protocol,
@@ -4472,7 +5062,12 @@ export class TokenbuddyDaemon {
4472
5062
  registrySellers,
4473
5063
  routing: resolvedRouting,
4474
5064
  prewarmCandidates: this.prewarmCache.get(modelId, protocol, paymentMethod)?.candidates,
4475
- sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry)),
5065
+ sellerMetrics: Array.from(poolById.values()).map((entry) => this.routeMetricFromPoolEntry(entry, concurrencySnapshot.enabled
5066
+ ? {
5067
+ localConcurrencyActive: localConcurrencyBySellerId.get(entry.sellerId) ?? 0,
5068
+ localConcurrencyLimit: concurrencySnapshot.maxInFlightPerSeller
5069
+ }
5070
+ : undefined)),
4476
5071
  now: Date.now()
4477
5072
  });
4478
5073
  return { modelId, protocol, paymentMethod, plan };
@@ -4673,6 +5268,141 @@ function parseOpenAiModelIds(value) {
4673
5268
  })
4674
5269
  .filter((id, index, all) => all.indexOf(id) === index);
4675
5270
  }
5271
+ function hasOpenAiChatChoice(value) {
5272
+ if (!value || typeof value !== "object" || Array.isArray(value)) {
5273
+ return false;
5274
+ }
5275
+ const choices = value.choices;
5276
+ if (!Array.isArray(choices) || choices.length === 0) {
5277
+ return false;
5278
+ }
5279
+ return choices.some((choice) => {
5280
+ if (!choice || typeof choice !== "object" || Array.isArray(choice)) {
5281
+ return false;
5282
+ }
5283
+ const message = choice.message;
5284
+ return Boolean(message && typeof message === "object" && !Array.isArray(message));
5285
+ });
5286
+ }
5287
+ function manualProviderProtocolProbeBody(protocol, modelId) {
5288
+ if (protocol === "responses") {
5289
+ return {
5290
+ model: modelId,
5291
+ input: "ping",
5292
+ max_output_tokens: 1,
5293
+ stream: false
5294
+ };
5295
+ }
5296
+ if (protocol === "images_generations") {
5297
+ return {
5298
+ model: modelId,
5299
+ prompt: "ping",
5300
+ n: 0,
5301
+ size: "1024x1024"
5302
+ };
5303
+ }
5304
+ return {
5305
+ model: modelId,
5306
+ messages: [{ role: "user", content: "ping" }],
5307
+ max_tokens: 1,
5308
+ stream: false
5309
+ };
5310
+ }
5311
+ function manualProviderProbeModelCandidates(modelIds, protocol) {
5312
+ const uniqueIds = modelIds.filter((id, index) => Boolean(id) && modelIds.indexOf(id) === index);
5313
+ if (protocol === "images_generations") {
5314
+ return imageProbeModelCandidates(uniqueIds);
5315
+ }
5316
+ const preferredExactIds = [
5317
+ "openai/gpt-chat-latest",
5318
+ "openai/gpt-5.5",
5319
+ "openai/gpt-5.4",
5320
+ "gpt-5.5",
5321
+ "gpt-5.4",
5322
+ "gpt-5.2",
5323
+ "gpt-4o",
5324
+ "gpt-4.1",
5325
+ "minimax/minimax-m3",
5326
+ "deepseek/deepseek-v4-pro"
5327
+ ];
5328
+ const preferredPrefixes = ["openai/", "qwen/", "google/", "minimax/", "deepseek/"];
5329
+ const candidates = [];
5330
+ const push = (id) => {
5331
+ if (id && !candidates.includes(id)) {
5332
+ candidates.push(id);
5333
+ }
5334
+ };
5335
+ for (const id of preferredExactIds) {
5336
+ push(uniqueIds.find((candidate) => candidate === id));
5337
+ }
5338
+ for (const prefix of preferredPrefixes) {
5339
+ push(uniqueIds.find((candidate) => candidate.startsWith(prefix)));
5340
+ }
5341
+ for (const id of uniqueIds) {
5342
+ push(id);
5343
+ if (candidates.length >= 6) {
5344
+ break;
5345
+ }
5346
+ }
5347
+ return candidates.slice(0, 6);
5348
+ }
5349
+ function imageProbeModelCandidates(uniqueIds) {
5350
+ const preferredExactIds = [
5351
+ "gpt-image-2",
5352
+ "gpt-image-1.5",
5353
+ "gpt-image-1",
5354
+ "openai/gpt-image-2",
5355
+ "openai/gpt-image-1.5",
5356
+ "openai/gpt-image-1"
5357
+ ];
5358
+ const candidates = [];
5359
+ const push = (id) => {
5360
+ if (id && !candidates.includes(id)) {
5361
+ candidates.push(id);
5362
+ }
5363
+ };
5364
+ for (const id of preferredExactIds) {
5365
+ push(uniqueIds.find((candidate) => candidate === id));
5366
+ }
5367
+ for (const id of uniqueIds) {
5368
+ if (/(^|\/)gpt-image/i.test(id)) {
5369
+ push(id);
5370
+ }
5371
+ }
5372
+ for (const id of uniqueIds) {
5373
+ push(id);
5374
+ if (candidates.length >= 6) {
5375
+ break;
5376
+ }
5377
+ }
5378
+ return candidates.slice(0, 6);
5379
+ }
5380
+ async function readResponseText(response) {
5381
+ try {
5382
+ return await response.text();
5383
+ }
5384
+ catch {
5385
+ return "";
5386
+ }
5387
+ }
5388
+ function imageValidationProbeSupported(httpStatus, responseText) {
5389
+ if (httpStatus !== 400) {
5390
+ return false;
5391
+ }
5392
+ const message = imageProbeErrorMessage(responseText).toLowerCase();
5393
+ return message.includes("n must be greater than 0") ||
5394
+ message.includes("n must be at least 1") ||
5395
+ message.includes("greater than or equal to 1");
5396
+ }
5397
+ function imageProbeErrorMessage(responseText) {
5398
+ try {
5399
+ const parsed = JSON.parse(responseText);
5400
+ return typeof parsed.error?.message === "string" ? parsed.error.message : "";
5401
+ }
5402
+ catch {
5403
+ return "";
5404
+ }
5405
+ }
4676
5406
  function normalizeTrustedRegistryCache(value) {
4677
5407
  if (!value || typeof value !== "object") {
4678
5408
  return undefined;