@tokenbuddy/tokenbuddy 1.0.8 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/src/buyer-store.d.ts +13 -0
  2. package/dist/src/buyer-store.d.ts.map +1 -1
  3. package/dist/src/buyer-store.js +21 -2
  4. package/dist/src/buyer-store.js.map +1 -1
  5. package/dist/src/cli.d.ts.map +1 -1
  6. package/dist/src/cli.js +54 -0
  7. package/dist/src/cli.js.map +1 -1
  8. package/dist/src/credit-tracker.d.ts +118 -0
  9. package/dist/src/credit-tracker.d.ts.map +1 -0
  10. package/dist/src/credit-tracker.js +220 -0
  11. package/dist/src/credit-tracker.js.map +1 -0
  12. package/dist/src/daemon.d.ts +49 -4
  13. package/dist/src/daemon.d.ts.map +1 -1
  14. package/dist/src/daemon.js +541 -405
  15. package/dist/src/daemon.js.map +1 -1
  16. package/dist/src/model-index.d.ts +86 -0
  17. package/dist/src/model-index.d.ts.map +1 -0
  18. package/dist/src/model-index.js +214 -0
  19. package/dist/src/model-index.js.map +1 -0
  20. package/dist/src/prewarm-cache.d.ts +149 -0
  21. package/dist/src/prewarm-cache.d.ts.map +1 -0
  22. package/dist/src/prewarm-cache.js +288 -0
  23. package/dist/src/prewarm-cache.js.map +1 -0
  24. package/dist/src/prewarm-scheduler.d.ts +150 -0
  25. package/dist/src/prewarm-scheduler.d.ts.map +1 -0
  26. package/dist/src/prewarm-scheduler.js +484 -0
  27. package/dist/src/prewarm-scheduler.js.map +1 -0
  28. package/dist/src/provider-install.d.ts.map +1 -1
  29. package/dist/src/provider-install.js +9 -1
  30. package/dist/src/provider-install.js.map +1 -1
  31. package/dist/src/route-failover.d.ts +96 -0
  32. package/dist/src/route-failover.d.ts.map +1 -0
  33. package/dist/src/route-failover.js +177 -0
  34. package/dist/src/route-failover.js.map +1 -0
  35. package/dist/src/seller-catalog.d.ts +26 -0
  36. package/dist/src/seller-catalog.d.ts.map +1 -1
  37. package/dist/src/seller-catalog.js +40 -0
  38. package/dist/src/seller-catalog.js.map +1 -1
  39. package/dist/src/seller-pool.d.ts +127 -0
  40. package/dist/src/seller-pool.d.ts.map +1 -0
  41. package/dist/src/seller-pool.js +243 -0
  42. package/dist/src/seller-pool.js.map +1 -0
  43. package/dist/src/stream-failover.d.ts +78 -0
  44. package/dist/src/stream-failover.d.ts.map +1 -0
  45. package/dist/src/stream-failover.js +93 -0
  46. package/dist/src/stream-failover.js.map +1 -0
  47. package/package.json +1 -1
  48. package/src/buyer-store.ts +32 -2
  49. package/src/cli.ts +61 -0
  50. package/src/credit-tracker.test.ts +165 -0
  51. package/src/credit-tracker.ts +269 -0
  52. package/src/daemon.ts +569 -445
  53. package/src/model-index.test.ts +184 -0
  54. package/src/model-index.ts +266 -0
  55. package/src/prewarm-cache.test.ts +281 -0
  56. package/src/prewarm-cache.ts +373 -0
  57. package/src/prewarm-scheduler.test.ts +367 -0
  58. package/src/prewarm-scheduler.ts +581 -0
  59. package/src/provider-install.ts +9 -1
  60. package/src/route-failover.test.ts +193 -0
  61. package/src/route-failover.ts +233 -0
  62. package/src/seller-catalog-413.test.ts +61 -0
  63. package/src/seller-catalog.ts +47 -0
  64. package/src/seller-pool.test.ts +231 -0
  65. package/src/seller-pool.ts +333 -0
  66. package/src/stream-failover.test.ts +52 -0
  67. package/src/stream-failover.ts +129 -0
  68. package/src/thousand-seller.test.ts +151 -0
  69. package/tests/daemon-413-fallback.test.ts +92 -0
  70. package/tests/e2e.test.ts +3 -2
  71. package/tests/tokenbuddy.test.ts +68 -11
package/src/daemon.ts CHANGED
@@ -14,17 +14,24 @@ import {
14
14
  } from "./provider-install.js";
15
15
  import {
16
16
  discoverSellerBackedModels,
17
- fetchSellerManifest,
18
17
  fetchSellerRegistry,
19
18
  manifestModelIds,
20
19
  manifestPaymentMethods,
21
20
  manifestProtocols,
22
21
  normalizeSellerUrl,
22
+ RegistryTooLargeError,
23
23
  type RegistrySeller,
24
24
  type SellerManifest,
25
25
  type SellerRegistryDocument,
26
26
  type SellerRoutingPreference,
27
27
  } from "./seller-catalog.js";
28
+ import { ModelIndex } from "./model-index.js";
29
+ import { PrewarmCache } from "./prewarm-cache.js";
30
+ import { CreditTracker } from "./credit-tracker.js";
31
+ import { SellerPool, type FailureKind } from "./seller-pool.js";
32
+ import { RouteFailover, type FailoverDecision, type RouteCandidate } from "./route-failover.js";
33
+ import { PrewarmScheduler, type SellerProber } from "./prewarm-scheduler.js";
34
+ import type { PoolEntry } from "./seller-pool.js";
28
35
 
29
36
  const logger = createModuleLogger("tb-proxyd");
30
37
  const PROXY_JSON_BODY_LIMIT = "10mb";
@@ -36,14 +43,21 @@ export interface DaemonConfig {
36
43
  sellerRegistryUrl: string;
37
44
  selectionMode?: "auto" | "manual";
38
45
  selectedSellerId?: string;
46
+ // v1.2 §18.4: focus-set override. When omitted, the daemon derives the
47
+ // focus set from the BuyerStore's historical model usage and the
48
+ // `TB_BUYER_WARMUP_MODELS` env var (comma-separated).
49
+ warmupModels?: string[];
50
+ warmupRefreshIntervalSecs?: number;
51
+ warmupProbeTimeoutMs?: number;
39
52
  }
40
53
 
41
54
  interface SellerRoute {
42
55
  seller: RegistrySeller;
43
- manifest: SellerManifest;
56
+ manifest: SellerManifest | null;
44
57
  protocol: string;
45
58
  modelId: string;
46
59
  paymentMethod: string;
60
+ poolEntry?: PoolEntry;
47
61
  }
48
62
 
49
63
  interface UsageSummary {
@@ -80,225 +94,6 @@ function numericHeaderField(value: unknown): number | undefined {
80
94
  return undefined;
81
95
  }
82
96
 
83
- interface ResponsesStreamState {
84
- itemId: string;
85
- text: string;
86
- contentPartStarted: boolean;
87
- }
88
-
89
- class ResponsesStreamNormalizer {
90
- private pending = "";
91
- private readonly state = new Map<string, ResponsesStreamState>();
92
-
93
- public push(chunk: string): string {
94
- this.pending += chunk;
95
- const blocks = this.pending.split("\n\n");
96
- this.pending = blocks.pop() || "";
97
- return blocks
98
- .map((block) => this.normalizeBlock(block))
99
- .filter((block) => block.length > 0)
100
- .join("\n\n");
101
- }
102
-
103
- public finish(): string {
104
- if (!this.pending.trim()) {
105
- return "";
106
- }
107
- const block = this.normalizeBlock(this.pending);
108
- this.pending = "";
109
- return block;
110
- }
111
-
112
- private normalizeBlock(block: string): string {
113
- if (!block.trim()) {
114
- return "";
115
- }
116
- // Each \n\n separates an event in SSE format
117
- const subBlocks = block.split("\n\n");
118
- const output: string[] = [];
119
-
120
- for (const sub of subBlocks) {
121
- if (!sub.trim() || sub.trim() === "data: [DONE]") {
122
- if (sub.trim()) output.push(sub);
123
- continue;
124
- }
125
-
126
- const lines = sub.split("\n");
127
- const eventLine = lines.find((l) => l.startsWith("event:"));
128
- const dataLine = lines.find((l) => l.startsWith("data:"));
129
- if (!dataLine) {
130
- output.push(sub);
131
- continue;
132
- }
133
- const rawData = dataLine.replace(/^data:\s?/, "");
134
- if (rawData === "[DONE]") {
135
- output.push(sub);
136
- continue;
137
- }
138
-
139
- let payload: any;
140
- try {
141
- payload = JSON.parse(rawData);
142
- } catch {
143
- output.push(sub);
144
- continue;
145
- }
146
-
147
- const eventName =
148
- (eventLine?.replace(/^event:\s?/, "") || payload?.type) as string;
149
- if (!eventName || !eventName.startsWith("response.")) {
150
- output.push(sub);
151
- continue;
152
- }
153
-
154
- // When upstream already sends content_part.added, record it in state
155
- if (
156
- eventName === "response.content_part.added" &&
157
- payload?.item_id
158
- ) {
159
- const current = this.state.get(payload.item_id as string);
160
- if (current) current.contentPartStarted = true;
161
- output.push(sub);
162
- continue;
163
- }
164
-
165
- // response.output_item.added: inject content_part.added only if upstream hasn't
166
- if (
167
- eventName === "response.output_item.added" &&
168
- payload?.item?.type === "message" &&
169
- payload?.item?.id
170
- ) {
171
- const itemId = payload.item.id as string;
172
- const current = this.getState(itemId);
173
- const item = { ...payload.item };
174
- item.content = [{ type: "output_text", text: "", annotations: [] }];
175
- output.push(this.serializeEvent(eventName, {
176
- ...payload,
177
- output_index: payload.output_index ?? 0,
178
- item
179
- }));
180
- if (!current.contentPartStarted) {
181
- current.contentPartStarted = true;
182
- output.push(this.serializeEvent("response.content_part.added", {
183
- type: "response.content_part.added",
184
- item_id: itemId,
185
- output_index: payload.output_index ?? 0,
186
- content_index: 0,
187
- part: { type: "output_text", text: "", annotations: [] }
188
- }));
189
- }
190
- continue;
191
- }
192
-
193
- // response.output_text.delta: inject content_part.added if missing
194
- if (eventName === "response.output_text.delta" && payload?.item_id) {
195
- const itemId = payload.item_id as string;
196
- const current = this.getState(itemId);
197
- if (!current.contentPartStarted) {
198
- current.contentPartStarted = true;
199
- output.push(this.serializeEvent("response.content_part.added", {
200
- type: "response.content_part.added",
201
- item_id: itemId,
202
- output_index: payload.output_index ?? 0,
203
- content_index: payload.content_index ?? 0,
204
- part: { type: "output_text", text: "", annotations: [] }
205
- }));
206
- }
207
- const deltaText =
208
- typeof payload.delta === "string"
209
- ? payload.delta
210
- : typeof payload.delta?.text === "string"
211
- ? payload.delta.text
212
- : "";
213
- current.text += deltaText;
214
- output.push(this.serializeEvent(eventName, {
215
- ...payload,
216
- output_index: payload.output_index ?? 0,
217
- content_index: payload.content_index ?? 0
218
- }));
219
- continue;
220
- }
221
-
222
- // response.output_text.done: also emit content_part.done
223
- if (eventName === "response.output_text.done" && payload?.item_id) {
224
- const itemId = payload.item_id as string;
225
- const current = this.getState(itemId);
226
- output.push(this.serializeEvent(eventName, {
227
- ...payload,
228
- output_index: payload.output_index ?? 0,
229
- content_index: payload.content_index ?? 0
230
- }));
231
- output.push(this.serializeEvent("response.content_part.done", {
232
- type: "response.content_part.done",
233
- item_id: itemId,
234
- output_index: payload.output_index ?? 0,
235
- content_index: payload.content_index ?? 0,
236
- part: { type: "output_text", text: current.text, annotations: [] }
237
- }));
238
- continue;
239
- }
240
-
241
- // response.output_item.done: normalize content to output_text type
242
- if (
243
- eventName === "response.output_item.done" &&
244
- payload?.item?.type === "message" &&
245
- payload?.item?.id
246
- ) {
247
- const itemId = payload.item.id as string;
248
- const current = this.getState(itemId);
249
- const item = {
250
- ...payload.item,
251
- content: [{ type: "output_text", text: current.text, annotations: [] }]
252
- };
253
- output.push(this.serializeEvent(eventName, {
254
- ...payload,
255
- output_index: payload.output_index ?? 0,
256
- item
257
- }));
258
- continue;
259
- }
260
-
261
- // response.completed: patch output if empty
262
- if (eventName === "response.completed" && payload?.response) {
263
- const response = { ...payload.response };
264
- if (!Array.isArray(response.output) || response.output.length === 0) {
265
- const first = this.state.values().next()
266
- .value as ResponsesStreamState | undefined;
267
- if (first) {
268
- response.output = [{
269
- id: first.itemId,
270
- type: "message",
271
- status: "completed",
272
- role: "assistant",
273
- content: [{ type: "output_text", text: first.text, annotations: [] }]
274
- }];
275
- response.output_text = first.text;
276
- }
277
- }
278
- output.push(this.serializeEvent(eventName, { ...payload, response }));
279
- continue;
280
- }
281
-
282
- // All other events: pass through unchanged
283
- output.push(sub);
284
- }
285
-
286
- return output.join("\n\n");
287
- }
288
-
289
- private getState(itemId: string): ResponsesStreamState {
290
- const current = this.state.get(itemId);
291
- if (current) return current;
292
- const created = { itemId, text: "", contentPartStarted: false };
293
- this.state.set(itemId, created);
294
- return created;
295
- }
296
-
297
- private serializeEvent(name: string, data: any): string {
298
- return `event: ${name}\ndata: ${JSON.stringify(data)}`;
299
- }
300
- }
301
-
302
97
  class SellerSettlementStreamExtractor {
303
98
  private pending = "";
304
99
  private settlement: SellerSettlementSummary | undefined;
@@ -410,6 +205,26 @@ export class TokenbuddyDaemon {
410
205
 
411
206
  private activePurchases = new Map<string, Promise<string>>();
412
207
 
208
+ // v1.2 fallback pipeline: model-index, prewarm-cache, credit-tracker,
209
+ // pool, and route-failover together replace the v1
210
+ // "fetchRegistry + manifest per request" path.
211
+ private readonly modelIndex = new ModelIndex();
212
+ private readonly prewarmCache = new PrewarmCache();
213
+ private readonly creditTracker = new CreditTracker();
214
+ private readonly sellerPool = new SellerPool({
215
+ modelIndex: this.modelIndex,
216
+ cache: this.prewarmCache,
217
+ creditTracker: this.creditTracker
218
+ });
219
+ private readonly routeFailover = new RouteFailover({
220
+ pool: this.sellerPool,
221
+ creditTracker: this.creditTracker
222
+ });
223
+ // v1.2 §18.5: assigned in the constructor because the scheduler needs
224
+ // config-derived knobs. The `!` opts out of strict-initialization so the
225
+ // rest of the class can treat it as non-nullable.
226
+ private readonly prewarmScheduler!: PrewarmScheduler;
227
+
413
228
  constructor(config: DaemonConfig) {
414
229
  this.tokenStore = new BuyerStore({ dbPath: config.dbPath });
415
230
  const routingPreference =
@@ -422,6 +237,42 @@ export class TokenbuddyDaemon {
422
237
  "auto";
423
238
  this.selectedSellerId =
424
239
  config.selectedSellerId || routingPreference?.sellerId;
240
+ // v1.2 §18.5: scheduler is created here (not in the field initializer)
241
+ // because it needs the config-derived prober + idle interval.
242
+ Object.assign(this, {
243
+ prewarmScheduler: new PrewarmScheduler({
244
+ modelIndex: this.modelIndex,
245
+ cache: this.prewarmCache,
246
+ prober: this.buildHealthProber(config.warmupProbeTimeoutMs ?? 3000),
247
+ idleIntervalMs: (config.warmupRefreshIntervalSecs ?? 60) * 1000
248
+ })
249
+ });
250
+ }
251
+
252
+ private buildHealthProber(timeoutMs: number): SellerProber {
253
+ return async (seller, signal) => {
254
+ try {
255
+ const ac = new AbortController();
256
+ const timer = setTimeout(() => ac.abort(new Error("healthz timeout")), timeoutMs);
257
+ if (signal) {
258
+ if (signal.aborted) {
259
+ ac.abort(signal.reason);
260
+ } else {
261
+ signal.addEventListener("abort", () => ac.abort(signal.reason), { once: true });
262
+ }
263
+ }
264
+ const startedAt = Date.now();
265
+ const res = await fetch(`${seller.url.replace(/\/+$/, "")}/healthz`, { signal: ac.signal });
266
+ clearTimeout(timer);
267
+ if (!res.ok) {
268
+ return { ok: false, latencyMs: Date.now() - startedAt, httpStatus: res.status, errorMessage: `healthz returned ${res.status}` };
269
+ }
270
+ return { ok: true, latencyMs: Date.now() - startedAt, httpStatus: res.status };
271
+ } catch (err) {
272
+ const message = err instanceof Error ? err.message : String(err);
273
+ return { ok: false, latencyMs: 0, errorMessage: message };
274
+ }
275
+ };
425
276
  }
426
277
 
427
278
  private activeControlPort(): number {
@@ -434,8 +285,43 @@ export class TokenbuddyDaemon {
434
285
  return typeof address === "object" && address ? address.port : this.config.proxyPort;
435
286
  }
436
287
 
288
+ // v1.2 §18.9: stale-cache fallback. The buyer remembers the last
289
+ // successfully fetched registry document and reuses it when the
290
+ // bootstrap returns 413 (`X-TokenBuddy-Registry-Too-Large: 1`). This
291
+ // trades freshness for availability: requests still route, but the
292
+ // model set is whatever was cached before the registry outgrew 1MB.
293
+ private lastRegistrySnapshot: SellerRegistryDocument | null = null;
294
+
437
295
  private async fetchRegistry(): Promise<SellerRegistryDocument> {
438
- return await fetchSellerRegistry(this.config.sellerRegistryUrl);
296
+ try {
297
+ const registry = await fetchSellerRegistry(this.config.sellerRegistryUrl);
298
+ this.modelIndex.rebuild(registry.sellers, {
299
+ registryVersion: registry.version,
300
+ defaultSellerId: registry.defaultSeller
301
+ });
302
+ this.sellerPool.sync();
303
+ this.lastRegistrySnapshot = registry;
304
+ return registry;
305
+ } catch (err) {
306
+ // v1.2 §18.9: if the bootstrap returns 413, fall back to the
307
+ // last-known registry document. This keeps the buyer routing even
308
+ // when the registry temporarily outgrows the 1MB cap.
309
+ if (err instanceof RegistryTooLargeError && this.lastRegistrySnapshot) {
310
+ logger.warn("registry.stale_fallback", "registry returned 413; using last-known snapshot for routing", {
311
+ sellerRegistryUrl: this.config.sellerRegistryUrl,
312
+ cachedVersion: this.lastRegistrySnapshot.version,
313
+ cachedSellers: this.lastRegistrySnapshot.sellers.length
314
+ });
315
+ const stale = this.lastRegistrySnapshot;
316
+ this.modelIndex.rebuild(stale.sellers, {
317
+ registryVersion: stale.version,
318
+ defaultSellerId: stale.defaultSeller
319
+ });
320
+ this.sellerPool.sync();
321
+ return stale;
322
+ }
323
+ throw err;
324
+ }
439
325
  }
440
326
 
441
327
  private runtimeSummary() {
@@ -553,49 +439,41 @@ export class TokenbuddyDaemon {
553
439
  throw new Error("mock or clawtip payment method is not configured as an enabled buyer payment method");
554
440
  }
555
441
 
442
+ // v1.2: registry is the source of truth for routing. We rebuild the
443
+ // model-index once per request (cheap; index lookup is in-memory) so
444
+ // the response always reflects the latest seller list. The previous
445
+ // "fetchSellerManifest per candidate" path is removed in favor of
446
+ // pulling `models` directly off the registry entries.
556
447
  const registry = await this.fetchRegistry();
557
- const defaultSellers = registry.sellers.filter((seller) => seller.id === registry.defaultSeller);
558
- const backupSellers = registry.sellers.filter((seller) => seller.id !== registry.defaultSeller);
559
- const manualSellers = this.selectedSellerId
560
- ? registry.sellers.filter((seller) => seller.id === this.selectedSellerId)
561
- : defaultSellers;
562
- const sellers = this.selectionMode === "manual" ? manualSellers : [...defaultSellers, ...backupSellers];
563
-
564
- const routes: SellerRoute[] = [];
565
- for (const seller of sellers) {
566
- let manifest: SellerManifest;
567
- try {
568
- manifest = await fetchSellerManifest(seller);
569
- } catch (error: unknown) {
570
- logger.warn("route.manifest.failed", "seller manifest unavailable during route selection", {
571
- sellerKey: seller.id,
572
- model: modelId,
573
- endpoint,
574
- errorMessage: error instanceof Error ? error.message : String(error)
575
- });
576
- continue;
577
- }
578
-
579
- const protocols = manifestProtocols(manifest, seller);
580
- const paymentMethods = manifestPaymentMethods(manifest, seller);
581
- const modelIds = manifestModelIds(manifest);
582
- if (!protocols.includes(protocol) || !paymentMethods.includes(paymentMethod) || !modelIds.includes(modelId)) {
583
- continue;
584
- }
585
448
 
586
- routes.push({
587
- seller,
588
- manifest,
589
- protocol,
590
- modelId,
591
- paymentMethod
592
- });
449
+ const indexCandidates = this.modelIndex.sellersFor(modelId, { protocol, paymentMethod });
450
+ let ordered = indexCandidates;
451
+ if (this.selectionMode === "manual" && this.selectedSellerId) {
452
+ ordered = indexCandidates.filter((seller) => seller.id === this.selectedSellerId);
453
+ } else if (this.selectionMode === "manual" && registry.defaultSeller) {
454
+ ordered = indexCandidates.filter((seller) => seller.id === registry.defaultSeller);
455
+ } else if (registry.defaultSeller) {
456
+ // auto mode: default first, then backups in registry order
457
+ ordered = [
458
+ ...indexCandidates.filter((seller) => seller.id === registry.defaultSeller),
459
+ ...indexCandidates.filter((seller) => seller.id !== registry.defaultSeller)
460
+ ];
593
461
  }
594
462
 
595
- if (routes.length === 0) {
463
+ if (ordered.length === 0) {
596
464
  throw new Error(`no compatible seller for ${endpoint} model ${modelId}`);
597
465
  }
598
466
 
467
+ const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
468
+ const routes: SellerRoute[] = ordered.map((seller) => ({
469
+ seller,
470
+ manifest: null,
471
+ protocol,
472
+ modelId,
473
+ paymentMethod,
474
+ poolEntry: poolById.get(seller.id)
475
+ }));
476
+
599
477
  logger.info("route.candidates.prewarmed", "seller route candidates prewarmed", {
600
478
  model: modelId,
601
479
  endpoint,
@@ -608,48 +486,56 @@ export class TokenbuddyDaemon {
608
486
  return routes;
609
487
  }
610
488
 
611
- private logRouteSelected(route: SellerRoute, endpoint: string, routeIndex: number): void {
612
- logger.info("route.selected", "seller route selected", {
613
- sellerKey: route.seller.id,
614
- model: route.modelId,
615
- endpoint,
616
- protocol: route.protocol,
617
- paymentMethod: route.paymentMethod,
618
- routeIndex,
619
- backup: routeIndex > 0
620
- });
489
+ private failoverErrorMessage(error: unknown): string {
490
+ return error instanceof Error ? error.message : String(error);
621
491
  }
622
492
 
623
- private shouldFailoverStatus(status: number): boolean {
624
- return status === 429 || status >= 500;
493
+ /**
494
+ * Map an HTTP status from a failed seller call to a `FailureKind` that
495
+ * the route-failover controller understands. Hard 4xx (other than
496
+ * auth/insufficient) means the seller is wrong for the request; 5xx
497
+ * and 429 are treated as transient and eligible for the soft-failure
498
+ * retry budget. The v1.1 "insufficient funds" check stays on the
499
+ * caller side because it short-circuits the failure path with a
500
+ * re-purchase.
501
+ */
502
+ private classifyFailureStatus(status: number): FailureKind {
503
+ if (status === 401 || status === 403) {
504
+ return "auth_invalid";
505
+ }
506
+ if (status === 402) {
507
+ return "insufficient_funds";
508
+ }
509
+ if (status === 400 || status === 404 || status === 422) {
510
+ return "hard_4xx";
511
+ }
512
+ return "soft_5xx";
625
513
  }
626
514
 
627
- private logFailover(
628
- route: SellerRoute,
629
- endpoint: string,
630
- routeIndex: number,
631
- reason: string,
632
- status?: number
515
+ /**
516
+ * Emit the structured failover log line. The decision itself is
517
+ * produced by `RouteFailover.decide`; this helper exists only to keep
518
+ * the controller loop readable.
519
+ */
520
+ private handleFailoverDecision(
521
+ decision: FailoverDecision,
522
+ context: { sellerKey: string; endpoint: string; routeIndex: number; status?: number; reason?: string }
633
523
  ): void {
634
- logger.warn("route.failover.triggered", "seller route failed over to backup candidate", {
635
- sellerKey: route.seller.id,
636
- model: route.modelId,
637
- endpoint,
638
- routeIndex,
639
- reason,
640
- status
641
- });
642
- }
643
-
644
- private failoverErrorMessage(error: unknown): string {
645
- return error instanceof Error ? error.message : String(error);
646
- }
647
-
648
- private async selectSeller(endpoint: string, modelId: string): Promise<SellerRoute> {
649
- const routes = await this.selectSellerRoutes(endpoint, modelId);
650
- const route = routes[0];
651
- this.logRouteSelected(route, endpoint, 0);
652
- return route;
524
+ if (decision.action === "retry_same_seller") {
525
+ return;
526
+ }
527
+ if (decision.action === "failover_next") {
528
+ logger.warn("route.failover.triggered", "seller route failed over to backup candidate", {
529
+ sellerKey: context.sellerKey,
530
+ endpoint: context.endpoint,
531
+ routeIndex: context.routeIndex,
532
+ reason: decision.reason,
533
+ status: context.status,
534
+ wastedCreditMicros: decision.wastedCreditMicros,
535
+ freshPurchase: decision.freshPurchase,
536
+ retryAttemptsBeforeFailover: decision.retryAttemptsBeforeFailover
537
+ });
538
+ }
653
539
  }
654
540
 
655
541
  private async listSellerBackedModels(): Promise<{
@@ -851,13 +737,60 @@ export class TokenbuddyDaemon {
851
737
  return parsed;
852
738
  }
853
739
 
740
+ /**
741
+ * v1.2 §8: hard per-request deadline. The buyer refuses to wait longer
742
+ * than this for a single seller; on expiry the request is aborted and
743
+ * the route-failover controller can either retry the same seller with
744
+ * a smaller body or fail over. Configurable via
745
+ * `TB_PROXYD_REQUEST_DEADLINE_MS` (default 30s).
746
+ */
747
+ private requestDeadlineMs(): number {
748
+ const raw = process.env.TB_PROXYD_REQUEST_DEADLINE_MS;
749
+ if (!raw) {
750
+ return 30_000;
751
+ }
752
+ const parsed = Number(raw);
753
+ if (!Number.isInteger(parsed) || parsed < 1000) {
754
+ return 30_000;
755
+ }
756
+ return parsed;
757
+ }
758
+
759
+ /**
760
+ * Safety margin subtracted from the cached token's `expiresAt` before
761
+ * deciding to reuse it. Buying a new token 60s before expiry gives the
762
+ * upstream enough headroom to reject any in-flight calls under the old
763
+ * token before the buyer assumes the new one is valid.
764
+ */
765
+ private tokenExpirySafetyMarginMs(): number {
766
+ const raw = process.env.TB_PROXYD_TOKEN_EXPIRY_SAFETY_MARGIN_MS;
767
+ if (!raw) {
768
+ return 60_000;
769
+ }
770
+ const parsed = Number(raw);
771
+ if (!Number.isInteger(parsed) || parsed < 0) {
772
+ return 60_000;
773
+ }
774
+ return parsed;
775
+ }
776
+
854
777
  private async getOrPurchaseToken(route: SellerRoute): Promise<string> {
855
778
  const sellerKey = route.seller.id;
856
779
  const sellerUrl = normalizeSellerUrl(route.seller);
857
780
  const { modelId, paymentMethod } = route;
858
781
  const cached = this.tokenStore.getToken(sellerKey);
859
782
  const rebuyMinBalanceMicros = this.tokenRebuyMinBalanceMicros();
860
- if (cached && cached.balanceMicros > rebuyMinBalanceMicros) {
783
+ // v1.2 PR-fix (2026-06-02): reject cached tokens that are inside the
784
+ // safety margin of their seller-assigned expiry. The previous
785
+ // implementation only checked `balanceMicros`, which let the buyer
786
+ // keep serving 24h-expired access tokens to the upstream and
787
+ // produced 401 "Bearer token is invalid or expired" errors. The
788
+ // `expiresAt` field is sourced from the seller's
789
+ // `/purchase/complete` response and is part of the `saveToken`
790
+ // contract.
791
+ const expiresAtMs = cached?.expiresAt ? Date.parse(cached.expiresAt) : NaN;
792
+ const tokenStillFresh = Number.isFinite(expiresAtMs) && Date.now() + this.tokenExpirySafetyMarginMs() < expiresAtMs;
793
+ if (cached && tokenStillFresh && cached.balanceMicros > rebuyMinBalanceMicros) {
861
794
  logger.info("token.cache.hit", "seller token cache hit", {
862
795
  sellerKey,
863
796
  model: modelId,
@@ -870,7 +803,8 @@ export class TokenbuddyDaemon {
870
803
  sellerKey,
871
804
  model: modelId,
872
805
  balanceMicros: cached?.balanceMicros || 0,
873
- rebuyMinBalanceMicros
806
+ rebuyMinBalanceMicros,
807
+ expired: Boolean(cached) && !tokenStillFresh
874
808
  });
875
809
 
876
810
  const purchaseKey = `${sellerKey}:${modelId}:${paymentMethod}`;
@@ -982,6 +916,9 @@ export class TokenbuddyDaemon {
982
916
  paymentReference: completeData.paymentReference || completeData.payment_reference,
983
917
  completedAt: new Date().toISOString()
984
918
  });
919
+ // v1.1: feed the credit tracker so the route-failover controller
920
+ // knows the seller is inside the fresh-purchase window.
921
+ this.creditTracker.recordPurchase(sellerKey, creditMicros, creditMicros);
985
922
  logger.info("purchase.token.succeeded", "seller token purchased", {
986
923
  sellerKey,
987
924
  model: modelId,
@@ -1157,58 +1094,141 @@ export class TokenbuddyDaemon {
1157
1094
  for (let routeIndex = 0; routeIndex < routes.length; routeIndex += 1) {
1158
1095
  const route = routes[routeIndex];
1159
1096
  const sellerKey = route.seller.id;
1160
- this.logRouteSelected(route, endpoint, routeIndex);
1161
- try {
1162
- logger.info("proxy.request.started", "proxy request started", {
1163
- requestId,
1164
- sellerKey,
1165
- model: modelId,
1166
- requestedModel: requestedModelId,
1167
- endpoint,
1168
- stream: Boolean((body as { stream?: unknown }).stream)
1169
- });
1170
- const sellerUrl = normalizeSellerUrl(route.seller);
1171
- const upstreamBody = this.applyResolvedModelToBody(endpoint, {
1172
- ...(body as Record<string, unknown>),
1173
- requestId
1174
- }, modelId);
1175
- const sendSellerRequest = async (token: string) => fetch(`${sellerUrl}${endpoint}`, {
1176
- method: "POST",
1177
- headers: {
1097
+ logger.info("route.selected", "seller route selected", {
1098
+ sellerKey,
1099
+ model: modelId,
1100
+ endpoint,
1101
+ protocol: route.protocol,
1102
+ paymentMethod: route.paymentMethod,
1103
+ routeIndex,
1104
+ backup: routeIndex > 0
1105
+ });
1106
+ let attempt = 0;
1107
+ // Soft-failure retry budget; the route-failover controller decides
1108
+ // whether the same seller should be retried or we move on. The
1109
+ // v1 "1 retry for 4xx fallback" loop is replaced with a
1110
+ // stateful decision per attempt.
1111
+ // eslint-disable-next-line no-constant-condition
1112
+ while (true) {
1113
+ try {
1114
+ logger.info("proxy.request.started", "proxy request started", {
1115
+ requestId,
1116
+ sellerKey,
1117
+ model: modelId,
1118
+ requestedModel: requestedModelId,
1119
+ endpoint,
1120
+ stream: Boolean((body as { stream?: unknown }).stream),
1121
+ attempt
1122
+ });
1123
+ const sellerUrl = normalizeSellerUrl(route.seller);
1124
+ const upstreamBody = this.applyResolvedModelToBody(endpoint, {
1125
+ ...(body as Record<string, unknown>),
1126
+ requestId
1127
+ }, modelId);
1128
+
1129
+ logger.info("proxy.upstream_fetch.started", "proxy upstream fetch started", {
1130
+ requestId,
1131
+ sellerKey,
1132
+ model: modelId,
1133
+ endpoint,
1134
+ stream: Boolean((body as { stream?: unknown }).stream),
1135
+ upstreamBody
1136
+ });
1137
+ // v1.1 §17.5: refuse to auto-purchase once the session budget is
1138
+ // exhausted. The seller is treated as "no auto-purchase available"
1139
+ // and the request fails over to the next candidate.
1140
+ if (!this.routeFailover.canAutoPurchase()) {
1141
+ logger.warn("purchase.budget.exceeded", "session auto-purchase budget exhausted; failing over without buying", {
1142
+ requestId,
1143
+ sellerKey,
1144
+ model: modelId,
1145
+ endpoint,
1146
+ routeIndex
1147
+ });
1148
+ lastError = new Error("auto-purchase budget exceeded for this session");
1149
+ break;
1150
+ }
1151
+ // v1.1: a purchase failure means the seller is unreachable for
1152
+ // payment, not "transiently flapping". Do not retry the same
1153
+ // seller; transfer leftover to wasted and fail over immediately.
1154
+ let token: string;
1155
+ try {
1156
+ token = await this.getOrPurchaseToken(route);
1157
+ } catch (purchaseError) {
1158
+ logger.warn("purchase.failed", "seller auto-purchase failed; failing over without retry", {
1159
+ requestId,
1160
+ sellerKey,
1161
+ model: modelId,
1162
+ endpoint,
1163
+ errorMessage: this.failoverErrorMessage(purchaseError)
1164
+ });
1165
+ this.routeFailover.decide(
1166
+ {
1167
+ sellerId: sellerKey,
1168
+ errorKind: "deadline",
1169
+ errorMessage: this.failoverErrorMessage(purchaseError),
1170
+ attempt
1171
+ },
1172
+ routes.length - routeIndex
1173
+ );
1174
+ lastError = purchaseError;
1175
+ break;
1176
+ }
1177
+ // v1.2 §8: enforce a hard per-request deadline so a slow
1178
+ // upstream cannot hang the buyer. The deadline is honored by
1179
+ // the AbortController passed to `fetch`; sellers that observe
1180
+ // the `X-TokenBuddy-Deadline-Ms` header (PR-6) can propagate
1181
+ // it to their own upstream fetch via the same signal.
1182
+ const deadlineMs = this.requestDeadlineMs();
1183
+ const requestAc = new AbortController();
1184
+ const requestTimer = setTimeout(() => requestAc.abort(new Error("buyer deadline exceeded")), deadlineMs);
1185
+ const sendSellerRequest = async (token: string) => {
1186
+ const headers: Record<string, string> = {
1178
1187
  "Content-Type": "application/json",
1179
1188
  "Authorization": `Bearer ${token}`,
1180
1189
  "X-Request-Id": requestId,
1181
1190
  "Idempotency-Key": idempotencyKey
1182
- },
1183
- body: JSON.stringify(upstreamBody)
1184
- });
1185
-
1186
- logger.info("proxy.upstream_fetch.started", "proxy upstream fetch started", {
1187
- requestId,
1188
- sellerKey,
1189
- model: modelId,
1190
- endpoint,
1191
- stream: Boolean((body as { stream?: unknown }).stream)
1192
- });
1193
- let token = await this.getOrPurchaseToken(route);
1191
+ };
1192
+ headers["X-TokenBuddy-Deadline-Ms"] = String(deadlineMs);
1193
+ return fetch(`${sellerUrl}${endpoint}`, {
1194
+ method: "POST",
1195
+ headers,
1196
+ body: JSON.stringify(upstreamBody),
1197
+ signal: requestAc.signal
1198
+ });
1199
+ };
1194
1200
  let upstreamResponse = await sendSellerRequest(token);
1195
1201
 
1196
- if (!upstreamResponse.ok) {
1197
- const errorBody = await upstreamResponse.text();
1198
- if (this.isInsufficientFundsResponse(upstreamResponse.status, errorBody)) {
1199
- token = await this.recoverFromInsufficientFunds(route, token);
1200
- upstreamResponse = await sendSellerRequest(token);
1201
- if (upstreamResponse.ok) {
1202
- logger.info("proxy.retry_after_402.succeeded", "seller request succeeded after one-shot auto purchase retry", {
1203
- requestId,
1204
- sellerKey,
1205
- model: modelId,
1206
- endpoint,
1207
- durationMs: Date.now() - startedAt
1208
- });
1202
+ if (!upstreamResponse.ok) {
1203
+ const errorBody = await upstreamResponse.text();
1204
+ if (this.isInsufficientFundsResponse(upstreamResponse.status, errorBody)) {
1205
+ token = await this.recoverFromInsufficientFunds(route, token);
1206
+ upstreamResponse = await sendSellerRequest(token);
1207
+ if (upstreamResponse.ok) {
1208
+ logger.info("proxy.retry_after_402.succeeded", "seller request succeeded after one-shot auto purchase retry", {
1209
+ requestId,
1210
+ sellerKey,
1211
+ model: modelId,
1212
+ endpoint,
1213
+ durationMs: Date.now() - startedAt
1214
+ });
1215
+ } else {
1216
+ const retryErrorBody = await upstreamResponse.text();
1217
+ logger.warn("proxy.retry_after_402.failed", "seller request still failed after one-shot auto purchase retry", {
1218
+ requestId,
1219
+ sellerKey,
1220
+ model: modelId,
1221
+ endpoint,
1222
+ status: upstreamResponse.status,
1223
+ durationMs: Date.now() - startedAt
1224
+ });
1225
+ this.copyUpstreamHeaders(upstreamResponse, res);
1226
+ res.status(upstreamResponse.status);
1227
+ res.send(retryErrorBody);
1228
+ return;
1229
+ }
1209
1230
  } else {
1210
- const retryErrorBody = await upstreamResponse.text();
1211
- logger.warn("proxy.retry_after_402.failed", "seller request still failed after one-shot auto purchase retry", {
1231
+ logger.warn("proxy.upstream_fetch.failed", "proxy upstream fetch returned non-ok status", {
1212
1232
  requestId,
1213
1233
  sellerKey,
1214
1234
  model: modelId,
@@ -1216,131 +1236,149 @@ export class TokenbuddyDaemon {
1216
1236
  status: upstreamResponse.status,
1217
1237
  durationMs: Date.now() - startedAt
1218
1238
  });
1219
- this.copyUpstreamHeaders(upstreamResponse, res);
1220
- res.status(upstreamResponse.status);
1221
- res.send(retryErrorBody);
1222
- return;
1239
+ const kind: FailureKind = this.classifyFailureStatus(upstreamResponse.status);
1240
+ const decision = this.routeFailover.decide(
1241
+ {
1242
+ sellerId: sellerKey,
1243
+ status: upstreamResponse.status,
1244
+ errorKind: kind,
1245
+ errorMessage: errorBody,
1246
+ attempt
1247
+ },
1248
+ routes.length - routeIndex
1249
+ );
1250
+ this.handleFailoverDecision(decision, { sellerKey, endpoint, routeIndex });
1251
+ if (decision.action === "fail_fast" || decision.action === "abort") {
1252
+ this.copyUpstreamHeaders(upstreamResponse, res);
1253
+ res.status(upstreamResponse.status);
1254
+ res.send(errorBody);
1255
+ return;
1256
+ }
1257
+ if (decision.action === "retry_same_seller") {
1258
+ attempt += 1;
1259
+ if (decision.retryDelayMs) {
1260
+ await new Promise<void>((resolve) => setTimeout(resolve, decision.retryDelayMs));
1261
+ }
1262
+ continue;
1263
+ }
1264
+ // failover_next
1265
+ lastError = new Error(`seller ${sellerKey} returned ${upstreamResponse.status}`);
1266
+ break;
1223
1267
  }
1224
- } else {
1225
- logger.warn("proxy.upstream_fetch.failed", "proxy upstream fetch returned non-ok status", {
1268
+ }
1269
+
1270
+ // Successful response: stream or buffer.
1271
+ this.copyUpstreamHeaders(upstreamResponse, res);
1272
+ res.status(upstreamResponse.status);
1273
+ logger.info("proxy.upstream_fetch.succeeded", "proxy upstream fetch succeeded", {
1226
1274
  requestId,
1227
1275
  sellerKey,
1228
1276
  model: modelId,
1229
1277
  endpoint,
1230
1278
  status: upstreamResponse.status,
1231
- durationMs: Date.now() - startedAt
1279
+ stream: Boolean((body as { stream?: unknown }).stream)
1232
1280
  });
1233
- if (this.shouldFailoverStatus(upstreamResponse.status) && routeIndex < routes.length - 1) {
1234
- lastError = new Error(`seller ${sellerKey} returned ${upstreamResponse.status}`);
1235
- this.logFailover(route, endpoint, routeIndex, "upstream_status", upstreamResponse.status);
1236
- continue;
1237
- }
1238
- this.copyUpstreamHeaders(upstreamResponse, res);
1239
- res.status(upstreamResponse.status);
1240
- res.send(errorBody);
1241
- return;
1242
- }
1243
- }
1244
-
1245
- this.copyUpstreamHeaders(upstreamResponse, res);
1246
- res.status(upstreamResponse.status);
1247
- logger.info("proxy.upstream_fetch.succeeded", "proxy upstream fetch succeeded", {
1248
- requestId,
1249
- sellerKey,
1250
- model: modelId,
1251
- endpoint,
1252
- status: upstreamResponse.status,
1253
- stream: Boolean((body as { stream?: unknown }).stream)
1254
- });
1255
1281
 
1256
- const contentType = upstreamResponse.headers.get("content-type") || "";
1257
- if (contentType.includes("text/event-stream") || Boolean((body as { stream?: unknown }).stream)) {
1258
- const reader = upstreamResponse.body?.getReader();
1259
- if (!reader) {
1260
- res.end();
1261
- return;
1262
- }
1263
- let bytes = 0;
1264
- const decoder = new TextDecoder();
1265
- const responsesStreamNormalizer = new ResponsesStreamNormalizer();
1266
- const settlementExtractor = new SellerSettlementStreamExtractor();
1267
- while (true) {
1268
- const { done, value } = await reader.read();
1269
- if (done) {
1270
- break;
1271
- }
1272
- bytes += value.byteLength;
1273
- const chunk = decoder.decode(value, { stream: true });
1274
- const sellerChunk = settlementExtractor.push(chunk);
1275
- if (sellerChunk.length === 0) {
1276
- continue;
1282
+ const contentType = upstreamResponse.headers.get("content-type") || "";
1283
+ if (contentType.includes("text/event-stream") || Boolean((body as { stream?: unknown }).stream)) {
1284
+ const reader = upstreamResponse.body?.getReader();
1285
+ if (!reader) {
1286
+ res.end();
1287
+ return;
1277
1288
  }
1278
- if (endpoint === "/v1/responses") {
1279
- const normalized = responsesStreamNormalizer.push(sellerChunk);
1280
- if (normalized.length > 0) {
1281
- res.write(`${normalized}\n\n`);
1289
+ let bytes = 0;
1290
+ const decoder = new TextDecoder();
1291
+ const settlementExtractor = new SellerSettlementStreamExtractor();
1292
+ while (true) {
1293
+ const { done, value } = await reader.read();
1294
+ if (done) {
1295
+ break;
1296
+ }
1297
+ bytes += value.byteLength;
1298
+ const chunk = decoder.decode(value, { stream: true });
1299
+ // 透明代理:把 seller 的 SSE 字节原样转给客户端,只剥离我们注入的
1300
+ // tokenbuddy.settlement 事件(不让客户端看到内部记账字段)。除此之外
1301
+ // 不做任何协议转换——卖方格式 bug(如 chat.completion.chunk prefix、
1302
+ // 缺 event: 行)由卖方修,buyer 不兜底。
1303
+ const sellerChunk = settlementExtractor.push(chunk);
1304
+ if (sellerChunk.length > 0) {
1305
+ res.write(sellerChunk);
1282
1306
  }
1283
- } else {
1284
- res.write(sellerChunk);
1285
1307
  }
1286
- }
1287
- const settlementTrailing = settlementExtractor.finish();
1288
- if (settlementTrailing.downstream.length > 0) {
1289
- if (endpoint === "/v1/responses") {
1290
- const normalized = responsesStreamNormalizer.push(settlementTrailing.downstream);
1291
- if (normalized.length > 0) {
1292
- res.write(`${normalized}\n\n`);
1308
+ // flush TextDecoder 内部 buffer:stream:true 模式下最后可能留有几个字节的
1309
+ // 不完整 UTF-8 序列(多字节字符被切到下一 chunk 的场景),不调 stream:false
1310
+ // flush break 会丢这批字节。上面的 stream 末尾事件(done / completed)
1311
+ // 之前被吞掉就是这个原因。
1312
+ const decoderTail = decoder.decode();
1313
+ if (decoderTail.length > 0) {
1314
+ const sellerTail = settlementExtractor.push(decoderTail);
1315
+ if (sellerTail.length > 0) {
1316
+ res.write(sellerTail);
1293
1317
  }
1294
- } else {
1295
- res.write(settlementTrailing.downstream);
1296
1318
  }
1297
- }
1298
- if (endpoint === "/v1/responses") {
1299
- const trailing = responsesStreamNormalizer.finish();
1300
- if (trailing.length > 0) {
1301
- res.write(`${trailing}\n\n`);
1319
+ const settlementTrailing = settlementExtractor.finish();
1320
+ if (settlementTrailing.downstream.length > 0) {
1321
+ res.write(settlementTrailing.downstream);
1302
1322
  }
1323
+ res.end();
1324
+ this.recordReconciledInference(
1325
+ route,
1326
+ endpoint,
1327
+ requestId,
1328
+ { promptTokens: 0, completionTokens: 0, billedMicros: Math.max(1, bytes) },
1329
+ this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(),
1330
+ this.inferPromptForHash(body)
1331
+ );
1332
+ return;
1303
1333
  }
1304
- res.end();
1334
+
1335
+ const responseBody = await upstreamResponse.text();
1336
+ res.send(responseBody);
1337
+ const usage = this.readUsage(responseBody);
1305
1338
  this.recordReconciledInference(
1306
1339
  route,
1307
1340
  endpoint,
1308
1341
  requestId,
1309
- { promptTokens: 0, completionTokens: 0, billedMicros: Math.max(1, bytes) },
1310
- this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(),
1311
- this.inferPromptForHash(body)
1342
+ usage,
1343
+ this.parseSellerSettlementSummary(upstreamResponse.headers),
1344
+ this.inferPromptForHash(body),
1345
+ responseBody
1312
1346
  );
1313
1347
  return;
1348
+ } catch (routeError: unknown) {
1349
+ lastError = routeError;
1350
+ const kind: FailureKind = "deadline";
1351
+ const decision = this.routeFailover.decide(
1352
+ {
1353
+ sellerId: sellerKey,
1354
+ errorKind: kind,
1355
+ errorMessage: this.failoverErrorMessage(routeError),
1356
+ attempt
1357
+ },
1358
+ routes.length - routeIndex
1359
+ );
1360
+ this.handleFailoverDecision(decision, { sellerKey, endpoint, routeIndex, reason: "exception" });
1361
+ logger.warn("proxy.route.failed", "seller route failed before response", {
1362
+ requestId,
1363
+ sellerKey,
1364
+ model: modelId,
1365
+ endpoint,
1366
+ errorMessage: this.failoverErrorMessage(routeError),
1367
+ durationMs: Date.now() - startedAt
1368
+ });
1369
+ if (decision.action === "retry_same_seller") {
1370
+ attempt += 1;
1371
+ if (decision.retryDelayMs) {
1372
+ await new Promise<void>((resolve) => setTimeout(resolve, decision.retryDelayMs));
1373
+ }
1374
+ continue;
1375
+ }
1376
+ if (decision.action === "fail_fast" || decision.action === "abort") {
1377
+ throw routeError;
1378
+ }
1379
+ // failover_next
1380
+ break;
1314
1381
  }
1315
-
1316
- const responseBody = await upstreamResponse.text();
1317
- res.send(responseBody);
1318
- const usage = this.readUsage(responseBody);
1319
- this.recordReconciledInference(
1320
- route,
1321
- endpoint,
1322
- requestId,
1323
- usage,
1324
- this.parseSellerSettlementSummary(upstreamResponse.headers),
1325
- this.inferPromptForHash(body),
1326
- responseBody
1327
- );
1328
- return;
1329
- } catch (routeError: unknown) {
1330
- lastError = routeError;
1331
- logger.warn("proxy.route.failed", "seller route failed before response", {
1332
- requestId,
1333
- sellerKey,
1334
- model: modelId,
1335
- endpoint,
1336
- errorMessage: this.failoverErrorMessage(routeError),
1337
- durationMs: Date.now() - startedAt
1338
- });
1339
- if (!res.headersSent && routeIndex < routes.length - 1) {
1340
- this.logFailover(route, endpoint, routeIndex, "exception");
1341
- continue;
1342
- }
1343
- throw routeError;
1344
1382
  }
1345
1383
  }
1346
1384
 
@@ -1419,6 +1457,47 @@ export class TokenbuddyDaemon {
1419
1457
  });
1420
1458
  });
1421
1459
 
1460
+ // v1.2 §18.11: control plane snapshot of the prewarm cache + seller
1461
+ // pool + credit tracker. `tb doctor` reads this to render the
1462
+ // recovery / prewarm / credit summary block.
1463
+ controlApp.get("/v1.2/prewarm", (req, res) => {
1464
+ const prewarmEntries = this.prewarmCache.snapshot().map((entry) => ({
1465
+ modelId: entry.modelId,
1466
+ protocol: entry.protocol,
1467
+ paymentMethod: entry.paymentMethod,
1468
+ state: entry.state,
1469
+ candidateCount: entry.candidates.length,
1470
+ warmedAt: entry.warmedAt,
1471
+ ttlMs: entry.ttlMs,
1472
+ consecutiveWarmingFailures: entry.consecutiveWarmingFailures
1473
+ }));
1474
+ const poolSnapshot = this.sellerPool.snapshot().map((entry) => ({
1475
+ sellerId: entry.sellerId,
1476
+ url: entry.url,
1477
+ circuit: entry.circuit,
1478
+ consecutiveFailures: entry.consecutiveFailures,
1479
+ lastSuccessAt: entry.lastSuccessAt,
1480
+ lastFailAt: entry.lastFailAt,
1481
+ healthScore: entry.healthScore
1482
+ }));
1483
+ const creditSummary = this.creditTracker.summary();
1484
+ const focusSet = this.resolveFocusSet();
1485
+ const schedulerStats = this.prewarmScheduler.stats();
1486
+ res.status(200).json({
1487
+ prewarm: {
1488
+ entries: prewarmEntries,
1489
+ size: prewarmEntries.length
1490
+ },
1491
+ pool: {
1492
+ size: poolSnapshot.length,
1493
+ entries: poolSnapshot
1494
+ },
1495
+ credit: creditSummary,
1496
+ focusSet,
1497
+ scheduler: schedulerStats
1498
+ });
1499
+ });
1500
+
1422
1501
  controlApp.get("/sellers", async (req, res) => {
1423
1502
  try {
1424
1503
  const registry = await this.fetchRegistry();
@@ -1632,11 +1711,56 @@ export class TokenbuddyDaemon {
1632
1711
  sellerRegistryUrl: this.config.sellerRegistryUrl,
1633
1712
  selectionMode: this.selectionMode
1634
1713
  });
1714
+
1715
+ // v1.2 §18.5: kick off the on-demand prewarm pipeline. The startup
1716
+ // sweep runs after the configured jitter window (5-10s by default);
1717
+ // subsequent refreshes run on the `idleIntervalMs` cadence and the
1718
+ // `forwardProxyRequest` hot path can dispatch lazy prewarms on miss.
1719
+ this.prewarmScheduler.start();
1720
+ void this.runStartupPrewarmSweep();
1721
+ }
1722
+
1723
+ /**
1724
+ * v1.2 §18.4: build the focus set from the explicit config, the env
1725
+ * override, and the historical usage in the buyer store. The order of
1726
+ * precedence: explicit config > env > historical > empty.
1727
+ */
1728
+ private resolveFocusSet(): string[] {
1729
+ const explicit = this.config.warmupModels ?? [];
1730
+ if (explicit.length > 0) {
1731
+ return explicit;
1732
+ }
1733
+ const envRaw = process.env.TB_BUYER_WARMUP_MODELS || "";
1734
+ const envModels = envRaw.split(",").map((s) => s.trim()).filter(Boolean);
1735
+ if (envModels.length > 0) {
1736
+ return envModels;
1737
+ }
1738
+ return this.tokenStore.recentModels(7, 5);
1739
+ }
1740
+
1741
+ private async runStartupPrewarmSweep(): Promise<void> {
1742
+ const focusSet = this.resolveFocusSet();
1743
+ if (focusSet.length === 0) {
1744
+ logger.info("prewarm.startup.skipped", "no focus set configured; relying on lazy prewarms", {});
1745
+ return;
1746
+ }
1747
+ logger.info("prewarm.startup.scheduled", "startup prewarm sweep scheduled", {
1748
+ focusSetSize: focusSet.length,
1749
+ focusSet: focusSet.slice(0, 20)
1750
+ });
1751
+ try {
1752
+ await this.prewarmScheduler.runStartupPrewarm(focusSet);
1753
+ } catch (err) {
1754
+ logger.warn("prewarm.startup.failed", "startup prewarm sweep failed", {
1755
+ errorMessage: err instanceof Error ? err.message : String(err)
1756
+ });
1757
+ }
1635
1758
  }
1636
1759
 
1637
1760
  public stop() {
1638
1761
  if (this.controlServer) this.controlServer.close();
1639
1762
  if (this.proxyServer) this.proxyServer.close();
1763
+ void this.prewarmScheduler.stop();
1640
1764
  this.tokenStore.close();
1641
1765
  }
1642
1766
  }