@tokenbuddy/tokenbuddy 1.0.34 → 1.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/daemon.ts CHANGED
@@ -313,6 +313,10 @@ interface UsageSummary {
313
313
  completionTokens: number;
314
314
  cacheReadTokens: number;
315
315
  billedMicros: number;
316
+ imageCount?: number;
317
+ imageSize?: string;
318
+ imageQuality?: string;
319
+ imageOutputFormat?: string;
316
320
  }
317
321
 
318
322
  interface ProxyBodySummary {
@@ -365,6 +369,7 @@ interface SellerSettlementSummary {
365
369
  }
366
370
 
367
371
  interface BillingBreakdownSummary {
372
+ billingUnit?: "tokens" | "images";
368
373
  inputPriceMicrosPer1m: number;
369
374
  outputPriceMicrosPer1m: number;
370
375
  cacheReadPriceMicrosPer1m: number;
@@ -374,6 +379,13 @@ interface BillingBreakdownSummary {
374
379
  originalUsdMicros: number;
375
380
  billingMultiplier: number;
376
381
  serviceTier?: string;
382
+ imageCount?: number;
383
+ imageSize?: string;
384
+ imageQuality?: string;
385
+ imageOutputFormat?: string;
386
+ imageOutputTokens?: number;
387
+ imageOutputCostMicros?: number;
388
+ imageCostMicrosPerImage?: number;
377
389
  }
378
390
 
379
391
  interface SellerAttemptRequestContext {
@@ -424,6 +436,17 @@ function safeBillingServiceTier(value: unknown): string | undefined {
424
436
  return /^[A-Za-z0-9 _.-]+$/.test(trimmed) ? trimmed : undefined;
425
437
  }
426
438
 
439
+ function safeBillingUnit(value: unknown): "tokens" | "images" | undefined {
440
+ return value === "tokens" || value === "images" ? value : undefined;
441
+ }
442
+
443
+ function safeShortDisplayString(value: unknown): string | undefined {
444
+ if (typeof value !== "string") return undefined;
445
+ const trimmed = value.trim();
446
+ if (trimmed.length === 0 || trimmed.length > 80) return undefined;
447
+ return /^[A-Za-z0-9 _./:-]+$/.test(trimmed) ? trimmed : undefined;
448
+ }
449
+
427
450
  function billingBreakdownSummary(value: unknown): BillingBreakdownSummary | undefined {
428
451
  const data = usageRecord(value);
429
452
  if (!data) return undefined;
@@ -448,6 +471,7 @@ function billingBreakdownSummary(value: unknown): BillingBreakdownSummary | unde
448
471
  return undefined;
449
472
  }
450
473
  return {
474
+ billingUnit: safeBillingUnit(data.billingUnit ?? data.billing_unit),
451
475
  inputPriceMicrosPer1m,
452
476
  outputPriceMicrosPer1m,
453
477
  cacheReadPriceMicrosPer1m,
@@ -456,10 +480,36 @@ function billingBreakdownSummary(value: unknown): BillingBreakdownSummary | unde
456
480
  cacheReadCostMicros,
457
481
  originalUsdMicros,
458
482
  billingMultiplier,
459
- serviceTier: safeBillingServiceTier(data.serviceTier ?? data.service_tier)
483
+ serviceTier: safeBillingServiceTier(data.serviceTier ?? data.service_tier),
484
+ imageCount: nonNegativeIntegerField(data.imageCount ?? data.image_count),
485
+ imageSize: safeShortDisplayString(data.imageSize ?? data.image_size),
486
+ imageQuality: safeShortDisplayString(data.imageQuality ?? data.image_quality),
487
+ imageOutputFormat: safeShortDisplayString(data.imageOutputFormat ?? data.image_output_format),
488
+ imageOutputTokens: nonNegativeIntegerField(data.imageOutputTokens ?? data.image_output_tokens),
489
+ imageOutputCostMicros: nonNegativeIntegerField(data.imageOutputCostMicros ?? data.image_output_cost_micros),
490
+ imageCostMicrosPerImage: nonNegativeIntegerField(data.imageCostMicrosPerImage ?? data.image_cost_micros_per_image)
460
491
  };
461
492
  }
462
493
 
494
+ function imageUsageMetadata(responseBody: Record<string, unknown> | undefined, requestBody: Record<string, unknown> | undefined): Partial<UsageSummary> {
495
+ const responseImages = Array.isArray(responseBody?.data) ? responseBody.data : undefined;
496
+ const firstImage = responseImages?.find((item) => item && typeof item === "object") as Record<string, unknown> | undefined;
497
+ const requestedCount = nonNegativeIntegerField(requestBody?.n);
498
+ return {
499
+ imageCount: responseImages ? responseImages.length : requestedCount && requestedCount > 0 ? requestedCount : undefined,
500
+ imageSize: safeShortDisplayString(responseBody?.size) ?? safeShortDisplayString(firstImage?.size) ?? safeShortDisplayString(requestBody?.size),
501
+ imageQuality: safeShortDisplayString(responseBody?.quality) ?? safeShortDisplayString(firstImage?.quality) ?? safeShortDisplayString(requestBody?.quality),
502
+ imageOutputFormat: safeShortDisplayString(responseBody?.output_format) ?? safeShortDisplayString(firstImage?.output_format) ?? safeShortDisplayString(requestBody?.output_format)
503
+ };
504
+ }
505
+
506
+ function imageCostMicrosPerImage(totalMicros: number | undefined, imageCount: number | undefined): number | undefined {
507
+ if (totalMicros === undefined || imageCount === undefined || imageCount <= 0) {
508
+ return undefined;
509
+ }
510
+ return Math.ceil(totalMicros / imageCount);
511
+ }
512
+
463
513
  function purchasePaymentSummaryFromQuote(value: unknown): PurchasePaymentSummary {
464
514
  const quote = usageRecord(value);
465
515
  if (!quote) return {};
@@ -860,7 +910,16 @@ export class TokenbuddyDaemon {
860
910
  const bundledDir = this.bundledClawtipStaticDir();
861
911
  const rechargeSourcePath = bundledDir ? path.join(bundledDir, CLAWTIP_RECHARGE_QR_FILE) : undefined;
862
912
  if (rechargeSourcePath && fs.existsSync(rechargeSourcePath)) {
863
- fs.copyFileSync(rechargeSourcePath, rechargeOutputPath);
913
+ fs.mkdirSync(path.dirname(rechargeOutputPath), { recursive: true });
914
+ try {
915
+ fs.copyFileSync(rechargeSourcePath, rechargeOutputPath);
916
+ } catch (err) {
917
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
918
+ throw err;
919
+ }
920
+ fs.mkdirSync(path.dirname(rechargeOutputPath), { recursive: true });
921
+ fs.copyFileSync(rechargeSourcePath, rechargeOutputPath);
922
+ }
864
923
  }
865
924
  }
866
925
 
@@ -1319,6 +1378,19 @@ export class TokenbuddyDaemon {
1319
1378
  });
1320
1379
  }
1321
1380
 
1381
+ /**
1382
+ * 将 AutoProviderConfig 转换为 BuyerSellerRoutingConfig。
1383
+ *
1384
+ * 映射规则:
1385
+ * - `recommended` 模式 → `fullAuto`:使用全部 registry sellers,由评分器自动排序
1386
+ * - `custom` 模式 → `fixedSet`:使用 `config.sellerIds` 白名单内的 sellers
1387
+ *
1388
+ * 注意:`config.modelIds` 不直接用于路由过滤,而是通过 `applyFocusSet()` 设置焦点集合,
1389
+ * 影响 UI 推荐和初始化向导,但不限制实际路由决策。
1390
+ *
1391
+ * @param config Auto provider 配置
1392
+ * @returns Seller 路由配置
1393
+ */
1322
1394
  private applyAutoProviderRoutingConfig(config: AutoProviderConfig): BuyerSellerRoutingConfig {
1323
1395
  const routing: BuyerSellerRoutingConfig = config.range === "custom"
1324
1396
  ? {
@@ -1340,7 +1412,19 @@ export class TokenbuddyDaemon {
1340
1412
  }
1341
1413
 
1342
1414
  private autoProviderCanRoute(config: AutoProviderConfig): boolean {
1343
- return config.enabled && (config.range !== "custom" || config.sellerIds.length > 0);
1415
+ if (!config.enabled) {
1416
+ logger.warn("route.auto_provider.disabled", "auto provider is disabled in config");
1417
+ return false;
1418
+ }
1419
+ if (config.range === "custom" && config.sellerIds.length === 0) {
1420
+ logger.warn("route.auto_provider.custom_empty", "auto provider custom mode requires sellerIds", {
1421
+ range: config.range,
1422
+ modelIds: config.modelIds,
1423
+ scorer: config.scorer
1424
+ });
1425
+ return false;
1426
+ }
1427
+ return true;
1344
1428
  }
1345
1429
 
1346
1430
  private providerModePayload(): Record<string, unknown> {
@@ -1627,6 +1711,9 @@ export class TokenbuddyDaemon {
1627
1711
  if (endpoint === "/v1/messages" || endpoint === "/messages") {
1628
1712
  return "messages";
1629
1713
  }
1714
+ if (endpoint === "/v1/images/generations") {
1715
+ return "images_generations";
1716
+ }
1630
1717
  return undefined;
1631
1718
  }
1632
1719
 
@@ -1761,6 +1848,20 @@ export class TokenbuddyDaemon {
1761
1848
  planReason: reason,
1762
1849
  planSellerCount: providers.length
1763
1850
  }));
1851
+
1852
+ // Log manual provider routing decision for auditability
1853
+ logger.info("route.manual.decision", "manual provider routing decision", {
1854
+ policy: config.routing.policy,
1855
+ selectedProviderId: providers[0].id,
1856
+ selectedProviderName: providers[0].name,
1857
+ totalCandidates: providers.length,
1858
+ candidateProviderIds: providers.map((p) => p.id),
1859
+ endpoint,
1860
+ modelId,
1861
+ protocol,
1862
+ reason
1863
+ });
1864
+
1764
1865
  return {
1765
1866
  routes,
1766
1867
  paymentMethod: "provider_key",
@@ -1842,11 +1943,15 @@ export class TokenbuddyDaemon {
1842
1943
  selectionMode: this.selectionMode,
1843
1944
  sellerRoutingMode: routing.mode,
1844
1945
  sellerRoutingScorer: routing.scorer,
1946
+ autoProviderRange: this.currentAutoProviderConfig().range,
1947
+ autoProviderCustomSellerIds: this.currentAutoProviderConfig().range === "custom" ? this.currentAutoProviderConfig().sellerIds : undefined,
1845
1948
  routeSource: planned.source,
1846
1949
  routeSourceReason: planned.sourceReason,
1847
1950
  routeReason: planned.reason,
1848
1951
  candidateDiagnostics: planned.diagnostics,
1849
1952
  sellerCount: planned.routes.length,
1953
+ selectedSellerId: planned.routes[0]?.seller.id,
1954
+ selectedSellerName: planned.routes[0]?.seller.name,
1850
1955
  sellers: planned.routes.map((route) => route.seller.id)
1851
1956
  });
1852
1957
 
@@ -2251,7 +2356,7 @@ export class TokenbuddyDaemon {
2251
2356
  };
2252
2357
  }
2253
2358
 
2254
- private readUsage(bodyText: string): UsageSummary {
2359
+ private readUsage(bodyText: string, endpoint?: string, requestBody?: unknown): UsageSummary {
2255
2360
  const fallback: UsageSummary = {
2256
2361
  promptTokens: 0,
2257
2362
  completionTokens: 0,
@@ -2273,11 +2378,15 @@ export class TokenbuddyDaemon {
2273
2378
  ?? nonNegativeIntegerField(usage?.cache_read_input_tokens)
2274
2379
  ?? nonNegativeIntegerField(usage?.cache_read_tokens)
2275
2380
  ?? 0;
2381
+ const imageMetadata = endpoint === "/v1/images/generations"
2382
+ ? imageUsageMetadata(data, usageRecord(requestBody))
2383
+ : {};
2276
2384
  return {
2277
2385
  promptTokens,
2278
2386
  completionTokens,
2279
2387
  cacheReadTokens,
2280
- billedMicros: (promptTokens + completionTokens) * 4
2388
+ billedMicros: (promptTokens + completionTokens) * 4,
2389
+ ...imageMetadata
2281
2390
  };
2282
2391
  } catch {
2283
2392
  return fallback;
@@ -2349,6 +2458,14 @@ export class TokenbuddyDaemon {
2349
2458
  originalUsdMicros: billingBreakdown?.originalUsdMicros,
2350
2459
  billingMultiplier: billingBreakdown?.billingMultiplier,
2351
2460
  serviceTier: billingBreakdown?.serviceTier,
2461
+ billingUnit: billingBreakdown?.billingUnit ?? (endpoint === "/v1/images/generations" ? "images" : "tokens"),
2462
+ imageCount: billingBreakdown?.imageCount ?? usage.imageCount,
2463
+ imageSize: billingBreakdown?.imageSize ?? usage.imageSize,
2464
+ imageQuality: billingBreakdown?.imageQuality ?? usage.imageQuality,
2465
+ imageOutputFormat: billingBreakdown?.imageOutputFormat ?? usage.imageOutputFormat,
2466
+ imageOutputTokens: billingBreakdown?.imageOutputTokens ?? (endpoint === "/v1/images/generations" ? usage.completionTokens : undefined),
2467
+ imageOutputCostMicros: billingBreakdown?.imageOutputCostMicros ?? (endpoint === "/v1/images/generations" ? billingBreakdown?.outputCostMicros : undefined),
2468
+ imageCostMicrosPerImage: billingBreakdown?.imageCostMicrosPerImage ?? imageCostMicrosPerImage(settledMicros ?? usage.billedMicros, billingBreakdown?.imageCount ?? usage.imageCount),
2352
2469
  balanceSnapshotMicros: settlement?.remainingCreditMicros,
2353
2470
  balanceSource: settlement ? "seller_authoritative" : "estimated",
2354
2471
  prompt,
@@ -2371,6 +2488,8 @@ export class TokenbuddyDaemon {
2371
2488
  settledMicros,
2372
2489
  settledUsdMicros: settlement?.settledUsdMicros,
2373
2490
  billedMicros: settledMicros ?? usage.billedMicros,
2491
+ billingUnit: billingBreakdown?.billingUnit ?? (endpoint === "/v1/images/generations" ? "images" : "tokens"),
2492
+ imageCount: billingBreakdown?.imageCount ?? usage.imageCount,
2374
2493
  promptTokens: usage.promptTokens,
2375
2494
  completionTokens: usage.completionTokens,
2376
2495
  cacheReadTokens: usage.cacheReadTokens,
@@ -3192,6 +3311,7 @@ export class TokenbuddyDaemon {
3192
3311
  billedMicros: Math.max(1, bytes),
3193
3312
  estimatedMicros: Math.max(1, bytes),
3194
3313
  priceVersion: `local-provider:${provider.id}`,
3314
+ billingUnit: endpoint === "/v1/images/generations" ? "images" : "tokens",
3195
3315
  balanceSource: "self_funded_provider",
3196
3316
  prompt: this.inferPromptForHash(reqBody),
3197
3317
  ttftMs,
@@ -3208,7 +3328,7 @@ export class TokenbuddyDaemon {
3208
3328
  const responseBody = await response.text();
3209
3329
  markFirstByte();
3210
3330
  res.send(responseBody);
3211
- const usage = this.readUsage(responseBody);
3331
+ const usage = this.readUsage(responseBody, endpoint, reqBody);
3212
3332
  const durationMs = Date.now() - startedAt;
3213
3333
  const ttftMs = Date.now() - attemptStartedAt;
3214
3334
  const completionTokens = usage.completionTokens;
@@ -3232,6 +3352,13 @@ export class TokenbuddyDaemon {
3232
3352
  billedMicros: usage.billedMicros,
3233
3353
  estimatedMicros: usage.billedMicros,
3234
3354
  priceVersion: `local-provider:${provider.id}`,
3355
+ billingUnit: endpoint === "/v1/images/generations" ? "images" : "tokens",
3356
+ imageCount: usage.imageCount,
3357
+ imageSize: usage.imageSize,
3358
+ imageQuality: usage.imageQuality,
3359
+ imageOutputFormat: usage.imageOutputFormat,
3360
+ imageOutputTokens: endpoint === "/v1/images/generations" ? usage.completionTokens : undefined,
3361
+ imageCostMicrosPerImage: imageCostMicrosPerImage(usage.billedMicros, usage.imageCount),
3235
3362
  balanceSource: "self_funded_provider",
3236
3363
  prompt: this.inferPromptForHash(reqBody),
3237
3364
  response: responseBody,
@@ -3644,7 +3771,7 @@ export class TokenbuddyDaemon {
3644
3771
  lease.refresh();
3645
3772
  markFirstByte();
3646
3773
  res.send(responseBody);
3647
- const usage = this.readUsage(responseBody);
3774
+ const usage = this.readUsage(responseBody, endpoint, body);
3648
3775
  void this.refreshSellerRuntimeMetrics(route, requestId);
3649
3776
  this.recordReconciledInference(
3650
3777
  route,
@@ -4857,7 +4984,7 @@ export class TokenbuddyDaemon {
4857
4984
  }
4858
4985
  });
4859
4986
 
4860
- for (const endpoint of ["/v1/chat/completions", "/v1/responses", "/v1/messages", "/messages"]) {
4987
+ for (const endpoint of ["/v1/chat/completions", "/v1/responses", "/v1/messages", "/messages", "/v1/images/generations"]) {
4861
4988
  proxyApp.post(endpoint, async (req: Request, res: Response) => {
4862
4989
  await this.forwardProxyRequest(endpoint, req, res);
4863
4990
  });
@@ -5014,7 +5141,7 @@ export class TokenbuddyDaemon {
5014
5141
  }
5015
5142
 
5016
5143
  private resolvePrewarmProtocol(modelId: string, paymentMethod = "clawtip"): string | undefined {
5017
- for (const protocol of ["chat_completions", "messages", "responses"]) {
5144
+ for (const protocol of ["chat_completions", "messages", "responses", "images_generations"]) {
5018
5145
  if (this.modelIndex.sellersFor(modelId, { protocol, paymentMethod }).length > 0) {
5019
5146
  return protocol;
5020
5147
  }
@@ -7,7 +7,7 @@ export const MANUAL_PROVIDER_OBSERVATIONS_CONFIG_KEY = "manual-provider-observat
7
7
 
8
8
  export type ProviderMode = "manual" | "auto";
9
9
  export type ManualProviderKind = "openai-compatible";
10
- export type ProviderProtocol = "chat_completions" | "responses" | "messages";
10
+ export type ProviderProtocol = "chat_completions" | "responses" | "messages" | "images_generations";
11
11
  export type AutoProviderRange = "recommended" | "custom";
12
12
  export type ManualProviderRoutingPolicy = "fallback" | "locked";
13
13
 
@@ -85,7 +85,7 @@ export interface ManualProviderObservationsConfig {
85
85
  updatedAt: string;
86
86
  }
87
87
 
88
- const VALID_PROTOCOLS = new Set<ProviderProtocol>(["chat_completions", "responses", "messages"]);
88
+ const VALID_PROTOCOLS = new Set<ProviderProtocol>(["chat_completions", "responses", "messages", "images_generations"]);
89
89
  const VALID_SCORERS = new Set<SellerRoutingScorer>(["balanced", "speed", "discount"]);
90
90
 
91
91
  export function defaultProviderModeConfig(now = new Date().toISOString()): ProviderModeConfig {
@@ -11,7 +11,7 @@ const logger = createModuleLogger("tb-proxyd");
11
11
  /**
12
12
  * buyer 端协议偏好(用于按协议过滤 catalog)。`messages` 是 anthropic 协议的简称。
13
13
  */
14
- export type ProtocolPreference = "chat_completions" | "responses" | "messages";
14
+ export type ProtocolPreference = "chat_completions" | "responses" | "messages" | "images_generations";
15
15
 
16
16
  /**
17
17
  * wallet-bootstrap `/registry/sellers` 里的 seller 描述。
@@ -221,6 +221,21 @@ function compareCandidates(a: SortableCandidate, b: SortableCandidate, scorer: S
221
221
  * 计算单个 candidate 在指定 scorer 下的完整打分拆解(含各维度分量和缺失项)。
222
222
  * 不会修改输入 candidate,常用于 doctor 面板和调试日志。
223
223
  *
224
+ * 权重设计理由:
225
+ * - **Speed 评分器**:TTFT 65% + Tok/s 25% + Health 10%
226
+ * - TTFT 占主导是因为首 token 延迟直接影响用户感知的响应速度
227
+ * - Tok/s 次要,因为大多数场景下吞吐差异不如延迟显著
228
+ * - Health 最低,只作为平局时的兜底
229
+ *
230
+ * - **Discount 评分器**:折扣 100%
231
+ * - 纯成本优先,忽略性能指标
232
+ * - 平局时按健康分和注册顺序兜底
233
+ *
234
+ * - **Balanced 评分器**:Health 35% + TTFT 20% + Tok/s 20% + Discount 25%
235
+ * - Health 占比最高是为了避免选择不稳定的 seller
236
+ * - 速度(TTFT + Tok/s)合计 40%,与折扣 25% 形成平衡
237
+ * - 这是默认评分器,适合大多数生产场景
238
+ *
224
239
  * @param candidate 待打分的候选
225
240
  * @param scorer 评分器:`speed` / `discount` / `balanced`
226
241
  * @returns 打分拆解
@@ -266,6 +281,19 @@ export function scoreCandidateBreakdown(candidate: RoutingCandidate, scorer: Sel
266
281
  };
267
282
  }
268
283
 
284
+ /**
285
+ * 将延迟(毫秒)转换为 0-100 分数。
286
+ * 公式:`100 - latency_ms / 10`
287
+ *
288
+ * 设计理由:
289
+ * - 0ms → 100分,1000ms → 0分(线性递减)
290
+ * - 除以 10 的比例来自经验:100ms 是"优秀"延迟,1000ms 是"不可接受"延迟
291
+ * - 线性公式的局限:100ms vs 200ms 的差异被等同于 900ms vs 1000ms
292
+ * 实际用户体验可能不是线性的,未来可考虑对数或分段函数
293
+ *
294
+ * @param latencyMs 延迟(毫秒),undefined 或非有限值返回 0 分
295
+ * @returns 0-100 分数
296
+ */
269
297
  function latencyScore(latencyMs: number | undefined): number {
270
298
  if (!Number.isFinite(latencyMs)) {
271
299
  return 0;
@@ -273,6 +301,19 @@ function latencyScore(latencyMs: number | undefined): number {
273
301
  return Math.max(0, 100 - Math.max(0, latencyMs as number) / 10);
274
302
  }
275
303
 
304
+ /**
305
+ * 将输出吞吐(tokens/s)转换为 0-100 分数。
306
+ * 公式:直接取值并限制在 0-100 范围内
307
+ *
308
+ * 设计理由:
309
+ * - 假设大多数 seller 的吞吐在 0-100 tok/s 范围内
310
+ * - 100 tok/s 及以上都得满分(上界问题)
311
+ * - 这种线性映射的局限:无法区分 100 tok/s 和 200 tok/s 的差异
312
+ * 未来可考虑对数缩放,例如 `100 * log(1 + value) / log(101)`
313
+ *
314
+ * @param value 吞吐(tokens/s),undefined 或非有限值返回 0 分
315
+ * @returns 0-100 分数
316
+ */
276
317
  function tokensPerSecondScore(value: number | undefined): number {
277
318
  if (!Number.isFinite(value)) {
278
319
  return 0;