@tokenbuddy/tokenbuddy 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/src/buyer-store.d.ts +13 -0
  2. package/dist/src/buyer-store.d.ts.map +1 -1
  3. package/dist/src/buyer-store.js +21 -2
  4. package/dist/src/buyer-store.js.map +1 -1
  5. package/dist/src/cli.d.ts.map +1 -1
  6. package/dist/src/cli.js +54 -0
  7. package/dist/src/cli.js.map +1 -1
  8. package/dist/src/credit-tracker.d.ts +118 -0
  9. package/dist/src/credit-tracker.d.ts.map +1 -0
  10. package/dist/src/credit-tracker.js +220 -0
  11. package/dist/src/credit-tracker.js.map +1 -0
  12. package/dist/src/daemon.d.ts +49 -4
  13. package/dist/src/daemon.d.ts.map +1 -1
  14. package/dist/src/daemon.js +541 -405
  15. package/dist/src/daemon.js.map +1 -1
  16. package/dist/src/model-index.d.ts +86 -0
  17. package/dist/src/model-index.d.ts.map +1 -0
  18. package/dist/src/model-index.js +214 -0
  19. package/dist/src/model-index.js.map +1 -0
  20. package/dist/src/prewarm-cache.d.ts +149 -0
  21. package/dist/src/prewarm-cache.d.ts.map +1 -0
  22. package/dist/src/prewarm-cache.js +288 -0
  23. package/dist/src/prewarm-cache.js.map +1 -0
  24. package/dist/src/prewarm-scheduler.d.ts +150 -0
  25. package/dist/src/prewarm-scheduler.d.ts.map +1 -0
  26. package/dist/src/prewarm-scheduler.js +484 -0
  27. package/dist/src/prewarm-scheduler.js.map +1 -0
  28. package/dist/src/provider-install.d.ts.map +1 -1
  29. package/dist/src/provider-install.js +9 -1
  30. package/dist/src/provider-install.js.map +1 -1
  31. package/dist/src/route-failover.d.ts +96 -0
  32. package/dist/src/route-failover.d.ts.map +1 -0
  33. package/dist/src/route-failover.js +177 -0
  34. package/dist/src/route-failover.js.map +1 -0
  35. package/dist/src/seller-catalog.d.ts +26 -0
  36. package/dist/src/seller-catalog.d.ts.map +1 -1
  37. package/dist/src/seller-catalog.js +40 -0
  38. package/dist/src/seller-catalog.js.map +1 -1
  39. package/dist/src/seller-pool.d.ts +127 -0
  40. package/dist/src/seller-pool.d.ts.map +1 -0
  41. package/dist/src/seller-pool.js +243 -0
  42. package/dist/src/seller-pool.js.map +1 -0
  43. package/dist/src/stream-failover.d.ts +78 -0
  44. package/dist/src/stream-failover.d.ts.map +1 -0
  45. package/dist/src/stream-failover.js +93 -0
  46. package/dist/src/stream-failover.js.map +1 -0
  47. package/package.json +1 -1
  48. package/src/buyer-store.ts +32 -2
  49. package/src/cli.ts +61 -0
  50. package/src/credit-tracker.test.ts +165 -0
  51. package/src/credit-tracker.ts +269 -0
  52. package/src/daemon.ts +569 -445
  53. package/src/model-index.test.ts +184 -0
  54. package/src/model-index.ts +266 -0
  55. package/src/prewarm-cache.test.ts +281 -0
  56. package/src/prewarm-cache.ts +373 -0
  57. package/src/prewarm-scheduler.test.ts +367 -0
  58. package/src/prewarm-scheduler.ts +581 -0
  59. package/src/provider-install.ts +9 -1
  60. package/src/route-failover.test.ts +193 -0
  61. package/src/route-failover.ts +233 -0
  62. package/src/seller-catalog-413.test.ts +61 -0
  63. package/src/seller-catalog.ts +47 -0
  64. package/src/seller-pool.test.ts +231 -0
  65. package/src/seller-pool.ts +333 -0
  66. package/src/stream-failover.test.ts +52 -0
  67. package/src/stream-failover.ts +129 -0
  68. package/src/thousand-seller.test.ts +151 -0
  69. package/tests/daemon-413-fallback.test.ts +92 -0
  70. package/tests/e2e.test.ts +3 -2
  71. package/tests/tokenbuddy.test.ts +68 -11
@@ -5,7 +5,13 @@ import * as fs from "fs";
5
5
  import { createModuleLogger } from "@tokenbuddy/logging";
6
6
  import { BuyerStore } from "./buyer-store.js";
7
7
  import { applyProviderInstall, detectProviders, previewProviderInstall, rollbackProviderInstall, } from "./provider-install.js";
8
- import { discoverSellerBackedModels, fetchSellerManifest, fetchSellerRegistry, manifestModelIds, manifestPaymentMethods, manifestProtocols, normalizeSellerUrl, } from "./seller-catalog.js";
8
+ import { discoverSellerBackedModels, fetchSellerRegistry, normalizeSellerUrl, RegistryTooLargeError, } from "./seller-catalog.js";
9
+ import { ModelIndex } from "./model-index.js";
10
+ import { PrewarmCache } from "./prewarm-cache.js";
11
+ import { CreditTracker } from "./credit-tracker.js";
12
+ import { SellerPool } from "./seller-pool.js";
13
+ import { RouteFailover } from "./route-failover.js";
14
+ import { PrewarmScheduler } from "./prewarm-scheduler.js";
9
15
  const logger = createModuleLogger("tb-proxyd");
10
16
  const PROXY_JSON_BODY_LIMIT = "10mb";
11
17
  function numericHeaderField(value) {
@@ -18,197 +24,6 @@ function numericHeaderField(value) {
18
24
  }
19
25
  return undefined;
20
26
  }
21
- class ResponsesStreamNormalizer {
22
- pending = "";
23
- state = new Map();
24
- push(chunk) {
25
- this.pending += chunk;
26
- const blocks = this.pending.split("\n\n");
27
- this.pending = blocks.pop() || "";
28
- return blocks
29
- .map((block) => this.normalizeBlock(block))
30
- .filter((block) => block.length > 0)
31
- .join("\n\n");
32
- }
33
- finish() {
34
- if (!this.pending.trim()) {
35
- return "";
36
- }
37
- const block = this.normalizeBlock(this.pending);
38
- this.pending = "";
39
- return block;
40
- }
41
- normalizeBlock(block) {
42
- if (!block.trim()) {
43
- return "";
44
- }
45
- // Each \n\n separates an event in SSE format
46
- const subBlocks = block.split("\n\n");
47
- const output = [];
48
- for (const sub of subBlocks) {
49
- if (!sub.trim() || sub.trim() === "data: [DONE]") {
50
- if (sub.trim())
51
- output.push(sub);
52
- continue;
53
- }
54
- const lines = sub.split("\n");
55
- const eventLine = lines.find((l) => l.startsWith("event:"));
56
- const dataLine = lines.find((l) => l.startsWith("data:"));
57
- if (!dataLine) {
58
- output.push(sub);
59
- continue;
60
- }
61
- const rawData = dataLine.replace(/^data:\s?/, "");
62
- if (rawData === "[DONE]") {
63
- output.push(sub);
64
- continue;
65
- }
66
- let payload;
67
- try {
68
- payload = JSON.parse(rawData);
69
- }
70
- catch {
71
- output.push(sub);
72
- continue;
73
- }
74
- const eventName = (eventLine?.replace(/^event:\s?/, "") || payload?.type);
75
- if (!eventName || !eventName.startsWith("response.")) {
76
- output.push(sub);
77
- continue;
78
- }
79
- // When upstream already sends content_part.added, record it in state
80
- if (eventName === "response.content_part.added" &&
81
- payload?.item_id) {
82
- const current = this.state.get(payload.item_id);
83
- if (current)
84
- current.contentPartStarted = true;
85
- output.push(sub);
86
- continue;
87
- }
88
- // response.output_item.added: inject content_part.added only if upstream hasn't
89
- if (eventName === "response.output_item.added" &&
90
- payload?.item?.type === "message" &&
91
- payload?.item?.id) {
92
- const itemId = payload.item.id;
93
- const current = this.getState(itemId);
94
- const item = { ...payload.item };
95
- item.content = [{ type: "output_text", text: "", annotations: [] }];
96
- output.push(this.serializeEvent(eventName, {
97
- ...payload,
98
- output_index: payload.output_index ?? 0,
99
- item
100
- }));
101
- if (!current.contentPartStarted) {
102
- current.contentPartStarted = true;
103
- output.push(this.serializeEvent("response.content_part.added", {
104
- type: "response.content_part.added",
105
- item_id: itemId,
106
- output_index: payload.output_index ?? 0,
107
- content_index: 0,
108
- part: { type: "output_text", text: "", annotations: [] }
109
- }));
110
- }
111
- continue;
112
- }
113
- // response.output_text.delta: inject content_part.added if missing
114
- if (eventName === "response.output_text.delta" && payload?.item_id) {
115
- const itemId = payload.item_id;
116
- const current = this.getState(itemId);
117
- if (!current.contentPartStarted) {
118
- current.contentPartStarted = true;
119
- output.push(this.serializeEvent("response.content_part.added", {
120
- type: "response.content_part.added",
121
- item_id: itemId,
122
- output_index: payload.output_index ?? 0,
123
- content_index: payload.content_index ?? 0,
124
- part: { type: "output_text", text: "", annotations: [] }
125
- }));
126
- }
127
- const deltaText = typeof payload.delta === "string"
128
- ? payload.delta
129
- : typeof payload.delta?.text === "string"
130
- ? payload.delta.text
131
- : "";
132
- current.text += deltaText;
133
- output.push(this.serializeEvent(eventName, {
134
- ...payload,
135
- output_index: payload.output_index ?? 0,
136
- content_index: payload.content_index ?? 0
137
- }));
138
- continue;
139
- }
140
- // response.output_text.done: also emit content_part.done
141
- if (eventName === "response.output_text.done" && payload?.item_id) {
142
- const itemId = payload.item_id;
143
- const current = this.getState(itemId);
144
- output.push(this.serializeEvent(eventName, {
145
- ...payload,
146
- output_index: payload.output_index ?? 0,
147
- content_index: payload.content_index ?? 0
148
- }));
149
- output.push(this.serializeEvent("response.content_part.done", {
150
- type: "response.content_part.done",
151
- item_id: itemId,
152
- output_index: payload.output_index ?? 0,
153
- content_index: payload.content_index ?? 0,
154
- part: { type: "output_text", text: current.text, annotations: [] }
155
- }));
156
- continue;
157
- }
158
- // response.output_item.done: normalize content to output_text type
159
- if (eventName === "response.output_item.done" &&
160
- payload?.item?.type === "message" &&
161
- payload?.item?.id) {
162
- const itemId = payload.item.id;
163
- const current = this.getState(itemId);
164
- const item = {
165
- ...payload.item,
166
- content: [{ type: "output_text", text: current.text, annotations: [] }]
167
- };
168
- output.push(this.serializeEvent(eventName, {
169
- ...payload,
170
- output_index: payload.output_index ?? 0,
171
- item
172
- }));
173
- continue;
174
- }
175
- // response.completed: patch output if empty
176
- if (eventName === "response.completed" && payload?.response) {
177
- const response = { ...payload.response };
178
- if (!Array.isArray(response.output) || response.output.length === 0) {
179
- const first = this.state.values().next()
180
- .value;
181
- if (first) {
182
- response.output = [{
183
- id: first.itemId,
184
- type: "message",
185
- status: "completed",
186
- role: "assistant",
187
- content: [{ type: "output_text", text: first.text, annotations: [] }]
188
- }];
189
- response.output_text = first.text;
190
- }
191
- }
192
- output.push(this.serializeEvent(eventName, { ...payload, response }));
193
- continue;
194
- }
195
- // All other events: pass through unchanged
196
- output.push(sub);
197
- }
198
- return output.join("\n\n");
199
- }
200
- getState(itemId) {
201
- const current = this.state.get(itemId);
202
- if (current)
203
- return current;
204
- const created = { itemId, text: "", contentPartStarted: false };
205
- this.state.set(itemId, created);
206
- return created;
207
- }
208
- serializeEvent(name, data) {
209
- return `event: ${name}\ndata: ${JSON.stringify(data)}`;
210
- }
211
- }
212
27
  class SellerSettlementStreamExtractor {
213
28
  pending = "";
214
29
  settlement;
@@ -289,6 +104,25 @@ export class TokenbuddyDaemon {
289
104
  selectionMode;
290
105
  selectedSellerId;
291
106
  activePurchases = new Map();
107
+ // v1.2 fallback pipeline: model-index, prewarm-cache, credit-tracker,
108
+ // pool, and route-failover together replace the v1
109
+ // "fetchRegistry + manifest per request" path.
110
+ modelIndex = new ModelIndex();
111
+ prewarmCache = new PrewarmCache();
112
+ creditTracker = new CreditTracker();
113
+ sellerPool = new SellerPool({
114
+ modelIndex: this.modelIndex,
115
+ cache: this.prewarmCache,
116
+ creditTracker: this.creditTracker
117
+ });
118
+ routeFailover = new RouteFailover({
119
+ pool: this.sellerPool,
120
+ creditTracker: this.creditTracker
121
+ });
122
+ // v1.2 §18.5: assigned in the constructor because the scheduler needs
123
+ // config-derived knobs. The `!` opts out of strict-initialization so the
124
+ // rest of the class can treat it as non-nullable.
125
+ prewarmScheduler;
292
126
  constructor(config) {
293
127
  this.tokenStore = new BuyerStore({ dbPath: config.dbPath });
294
128
  const routingPreference = this.tokenStore.getDaemonRuntimeConfig("routing")
@@ -300,6 +134,43 @@ export class TokenbuddyDaemon {
300
134
  "auto";
301
135
  this.selectedSellerId =
302
136
  config.selectedSellerId || routingPreference?.sellerId;
137
+ // v1.2 §18.5: scheduler is created here (not in the field initializer)
138
+ // because it needs the config-derived prober + idle interval.
139
+ Object.assign(this, {
140
+ prewarmScheduler: new PrewarmScheduler({
141
+ modelIndex: this.modelIndex,
142
+ cache: this.prewarmCache,
143
+ prober: this.buildHealthProber(config.warmupProbeTimeoutMs ?? 3000),
144
+ idleIntervalMs: (config.warmupRefreshIntervalSecs ?? 60) * 1000
145
+ })
146
+ });
147
+ }
148
+ buildHealthProber(timeoutMs) {
149
+ return async (seller, signal) => {
150
+ try {
151
+ const ac = new AbortController();
152
+ const timer = setTimeout(() => ac.abort(new Error("healthz timeout")), timeoutMs);
153
+ if (signal) {
154
+ if (signal.aborted) {
155
+ ac.abort(signal.reason);
156
+ }
157
+ else {
158
+ signal.addEventListener("abort", () => ac.abort(signal.reason), { once: true });
159
+ }
160
+ }
161
+ const startedAt = Date.now();
162
+ const res = await fetch(`${seller.url.replace(/\/+$/, "")}/healthz`, { signal: ac.signal });
163
+ clearTimeout(timer);
164
+ if (!res.ok) {
165
+ return { ok: false, latencyMs: Date.now() - startedAt, httpStatus: res.status, errorMessage: `healthz returned ${res.status}` };
166
+ }
167
+ return { ok: true, latencyMs: Date.now() - startedAt, httpStatus: res.status };
168
+ }
169
+ catch (err) {
170
+ const message = err instanceof Error ? err.message : String(err);
171
+ return { ok: false, latencyMs: 0, errorMessage: message };
172
+ }
173
+ };
303
174
  }
304
175
  activeControlPort() {
305
176
  const address = this.controlServer?.address?.();
@@ -309,8 +180,43 @@ export class TokenbuddyDaemon {
309
180
  const address = this.proxyServer?.address?.();
310
181
  return typeof address === "object" && address ? address.port : this.config.proxyPort;
311
182
  }
183
+ // v1.2 §18.9: stale-cache fallback. The buyer remembers the last
184
+ // successfully fetched registry document and reuses it when the
185
+ // bootstrap returns 413 (`X-TokenBuddy-Registry-Too-Large: 1`). This
186
+ // trades freshness for availability: requests still route, but the
187
+ // model set is whatever was cached before the registry outgrew 1MB.
188
+ lastRegistrySnapshot = null;
312
189
  async fetchRegistry() {
313
- return await fetchSellerRegistry(this.config.sellerRegistryUrl);
190
+ try {
191
+ const registry = await fetchSellerRegistry(this.config.sellerRegistryUrl);
192
+ this.modelIndex.rebuild(registry.sellers, {
193
+ registryVersion: registry.version,
194
+ defaultSellerId: registry.defaultSeller
195
+ });
196
+ this.sellerPool.sync();
197
+ this.lastRegistrySnapshot = registry;
198
+ return registry;
199
+ }
200
+ catch (err) {
201
+ // v1.2 §18.9: if the bootstrap returns 413, fall back to the
202
+ // last-known registry document. This keeps the buyer routing even
203
+ // when the registry temporarily outgrows the 1MB cap.
204
+ if (err instanceof RegistryTooLargeError && this.lastRegistrySnapshot) {
205
+ logger.warn("registry.stale_fallback", "registry returned 413; using last-known snapshot for routing", {
206
+ sellerRegistryUrl: this.config.sellerRegistryUrl,
207
+ cachedVersion: this.lastRegistrySnapshot.version,
208
+ cachedSellers: this.lastRegistrySnapshot.sellers.length
209
+ });
210
+ const stale = this.lastRegistrySnapshot;
211
+ this.modelIndex.rebuild(stale.sellers, {
212
+ registryVersion: stale.version,
213
+ defaultSellerId: stale.defaultSeller
214
+ });
215
+ this.sellerPool.sync();
216
+ return stale;
217
+ }
218
+ throw err;
219
+ }
314
220
  }
315
221
  runtimeSummary() {
316
222
  const sellerRoutingMode = this.selectedSellerId ? "fixed" : this.selectionMode;
@@ -413,45 +319,39 @@ export class TokenbuddyDaemon {
413
319
  if (!paymentMethod || !["mock", "clawtip"].includes(paymentMethod)) {
414
320
  throw new Error("mock or clawtip payment method is not configured as an enabled buyer payment method");
415
321
  }
322
+ // v1.2: registry is the source of truth for routing. We rebuild the
323
+ // model-index once per request (cheap; index lookup is in-memory) so
324
+ // the response always reflects the latest seller list. The previous
325
+ // "fetchSellerManifest per candidate" path is removed in favor of
326
+ // pulling `models` directly off the registry entries.
416
327
  const registry = await this.fetchRegistry();
417
- const defaultSellers = registry.sellers.filter((seller) => seller.id === registry.defaultSeller);
418
- const backupSellers = registry.sellers.filter((seller) => seller.id !== registry.defaultSeller);
419
- const manualSellers = this.selectedSellerId
420
- ? registry.sellers.filter((seller) => seller.id === this.selectedSellerId)
421
- : defaultSellers;
422
- const sellers = this.selectionMode === "manual" ? manualSellers : [...defaultSellers, ...backupSellers];
423
- const routes = [];
424
- for (const seller of sellers) {
425
- let manifest;
426
- try {
427
- manifest = await fetchSellerManifest(seller);
428
- }
429
- catch (error) {
430
- logger.warn("route.manifest.failed", "seller manifest unavailable during route selection", {
431
- sellerKey: seller.id,
432
- model: modelId,
433
- endpoint,
434
- errorMessage: error instanceof Error ? error.message : String(error)
435
- });
436
- continue;
437
- }
438
- const protocols = manifestProtocols(manifest, seller);
439
- const paymentMethods = manifestPaymentMethods(manifest, seller);
440
- const modelIds = manifestModelIds(manifest);
441
- if (!protocols.includes(protocol) || !paymentMethods.includes(paymentMethod) || !modelIds.includes(modelId)) {
442
- continue;
443
- }
444
- routes.push({
445
- seller,
446
- manifest,
447
- protocol,
448
- modelId,
449
- paymentMethod
450
- });
328
+ const indexCandidates = this.modelIndex.sellersFor(modelId, { protocol, paymentMethod });
329
+ let ordered = indexCandidates;
330
+ if (this.selectionMode === "manual" && this.selectedSellerId) {
331
+ ordered = indexCandidates.filter((seller) => seller.id === this.selectedSellerId);
332
+ }
333
+ else if (this.selectionMode === "manual" && registry.defaultSeller) {
334
+ ordered = indexCandidates.filter((seller) => seller.id === registry.defaultSeller);
335
+ }
336
+ else if (registry.defaultSeller) {
337
+ // auto mode: default first, then backups in registry order
338
+ ordered = [
339
+ ...indexCandidates.filter((seller) => seller.id === registry.defaultSeller),
340
+ ...indexCandidates.filter((seller) => seller.id !== registry.defaultSeller)
341
+ ];
451
342
  }
452
- if (routes.length === 0) {
343
+ if (ordered.length === 0) {
453
344
  throw new Error(`no compatible seller for ${endpoint} model ${modelId}`);
454
345
  }
346
+ const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
347
+ const routes = ordered.map((seller) => ({
348
+ seller,
349
+ manifest: null,
350
+ protocol,
351
+ modelId,
352
+ paymentMethod,
353
+ poolEntry: poolById.get(seller.id)
354
+ }));
455
355
  logger.info("route.candidates.prewarmed", "seller route candidates prewarmed", {
456
356
  model: modelId,
457
357
  endpoint,
@@ -463,38 +363,51 @@ export class TokenbuddyDaemon {
463
363
  });
464
364
  return routes;
465
365
  }
466
- logRouteSelected(route, endpoint, routeIndex) {
467
- logger.info("route.selected", "seller route selected", {
468
- sellerKey: route.seller.id,
469
- model: route.modelId,
470
- endpoint,
471
- protocol: route.protocol,
472
- paymentMethod: route.paymentMethod,
473
- routeIndex,
474
- backup: routeIndex > 0
475
- });
476
- }
477
- shouldFailoverStatus(status) {
478
- return status === 429 || status >= 500;
479
- }
480
- logFailover(route, endpoint, routeIndex, reason, status) {
481
- logger.warn("route.failover.triggered", "seller route failed over to backup candidate", {
482
- sellerKey: route.seller.id,
483
- model: route.modelId,
484
- endpoint,
485
- routeIndex,
486
- reason,
487
- status
488
- });
489
- }
490
366
  failoverErrorMessage(error) {
491
367
  return error instanceof Error ? error.message : String(error);
492
368
  }
493
- async selectSeller(endpoint, modelId) {
494
- const routes = await this.selectSellerRoutes(endpoint, modelId);
495
- const route = routes[0];
496
- this.logRouteSelected(route, endpoint, 0);
497
- return route;
369
+ /**
370
+ * Map an HTTP status from a failed seller call to a `FailureKind` that
371
+ * the route-failover controller understands. Hard 4xx (other than
372
+ * auth/insufficient) means the seller is wrong for the request; 5xx
373
+ * and 429 are treated as transient and eligible for the soft-failure
374
+ * retry budget. The v1.1 "insufficient funds" check stays on the
375
+ * caller side because it short-circuits the failure path with a
376
+ * re-purchase.
377
+ */
378
+ classifyFailureStatus(status) {
379
+ if (status === 401 || status === 403) {
380
+ return "auth_invalid";
381
+ }
382
+ if (status === 402) {
383
+ return "insufficient_funds";
384
+ }
385
+ if (status === 400 || status === 404 || status === 422) {
386
+ return "hard_4xx";
387
+ }
388
+ return "soft_5xx";
389
+ }
390
+ /**
391
+ * Emit the structured failover log line. The decision itself is
392
+ * produced by `RouteFailover.decide`; this helper exists only to keep
393
+ * the controller loop readable.
394
+ */
395
+ handleFailoverDecision(decision, context) {
396
+ if (decision.action === "retry_same_seller") {
397
+ return;
398
+ }
399
+ if (decision.action === "failover_next") {
400
+ logger.warn("route.failover.triggered", "seller route failed over to backup candidate", {
401
+ sellerKey: context.sellerKey,
402
+ endpoint: context.endpoint,
403
+ routeIndex: context.routeIndex,
404
+ reason: decision.reason,
405
+ status: context.status,
406
+ wastedCreditMicros: decision.wastedCreditMicros,
407
+ freshPurchase: decision.freshPurchase,
408
+ retryAttemptsBeforeFailover: decision.retryAttemptsBeforeFailover
409
+ });
410
+ }
498
411
  }
499
412
  async listSellerBackedModels() {
500
413
  const catalog = await discoverSellerBackedModels(this.config.sellerRegistryUrl);
@@ -668,13 +581,58 @@ export class TokenbuddyDaemon {
668
581
  }
669
582
  return parsed;
670
583
  }
584
+ /**
585
+ * v1.2 §8: hard per-request deadline. The buyer refuses to wait longer
586
+ * than this for a single seller; on expiry the request is aborted and
587
+ * the route-failover controller can either retry the same seller with
588
+ * a smaller body or fail over. Configurable via
589
+ * `TB_PROXYD_REQUEST_DEADLINE_MS` (default 30s).
590
+ */
591
+ requestDeadlineMs() {
592
+ const raw = process.env.TB_PROXYD_REQUEST_DEADLINE_MS;
593
+ if (!raw) {
594
+ return 30_000;
595
+ }
596
+ const parsed = Number(raw);
597
+ if (!Number.isInteger(parsed) || parsed < 1000) {
598
+ return 30_000;
599
+ }
600
+ return parsed;
601
+ }
602
+ /**
603
+ * Safety margin subtracted from the cached token's `expiresAt` before
604
+ * deciding to reuse it. Buying a new token 60s before expiry gives the
605
+ * upstream enough headroom to reject any in-flight calls under the old
606
+ * token before the buyer assumes the new one is valid.
607
+ */
608
+ tokenExpirySafetyMarginMs() {
609
+ const raw = process.env.TB_PROXYD_TOKEN_EXPIRY_SAFETY_MARGIN_MS;
610
+ if (!raw) {
611
+ return 60_000;
612
+ }
613
+ const parsed = Number(raw);
614
+ if (!Number.isInteger(parsed) || parsed < 0) {
615
+ return 60_000;
616
+ }
617
+ return parsed;
618
+ }
671
619
  async getOrPurchaseToken(route) {
672
620
  const sellerKey = route.seller.id;
673
621
  const sellerUrl = normalizeSellerUrl(route.seller);
674
622
  const { modelId, paymentMethod } = route;
675
623
  const cached = this.tokenStore.getToken(sellerKey);
676
624
  const rebuyMinBalanceMicros = this.tokenRebuyMinBalanceMicros();
677
- if (cached && cached.balanceMicros > rebuyMinBalanceMicros) {
625
+ // v1.2 PR-fix (2026-06-02): reject cached tokens that are inside the
626
+ // safety margin of their seller-assigned expiry. The previous
627
+ // implementation only checked `balanceMicros`, which let the buyer
628
+ // keep serving 24h-expired access tokens to the upstream and
629
+ // produced 401 "Bearer token is invalid or expired" errors. The
630
+ // `expiresAt` field is sourced from the seller's
631
+ // `/purchase/complete` response and is part of the `saveToken`
632
+ // contract.
633
+ const expiresAtMs = cached?.expiresAt ? Date.parse(cached.expiresAt) : NaN;
634
+ const tokenStillFresh = Number.isFinite(expiresAtMs) && Date.now() + this.tokenExpirySafetyMarginMs() < expiresAtMs;
635
+ if (cached && tokenStillFresh && cached.balanceMicros > rebuyMinBalanceMicros) {
678
636
  logger.info("token.cache.hit", "seller token cache hit", {
679
637
  sellerKey,
680
638
  model: modelId,
@@ -687,7 +645,8 @@ export class TokenbuddyDaemon {
687
645
  sellerKey,
688
646
  model: modelId,
689
647
  balanceMicros: cached?.balanceMicros || 0,
690
- rebuyMinBalanceMicros
648
+ rebuyMinBalanceMicros,
649
+ expired: Boolean(cached) && !tokenStillFresh
691
650
  });
692
651
  const purchaseKey = `${sellerKey}:${modelId}:${paymentMethod}`;
693
652
  const purchasePromise = this.activePurchases.get(purchaseKey);
@@ -794,6 +753,9 @@ export class TokenbuddyDaemon {
794
753
  paymentReference: completeData.paymentReference || completeData.payment_reference,
795
754
  completedAt: new Date().toISOString()
796
755
  });
756
+ // v1.1: feed the credit tracker so the route-failover controller
757
+ // knows the seller is inside the fresh-purchase window.
758
+ this.creditTracker.recordPurchase(sellerKey, creditMicros, creditMicros);
797
759
  logger.info("purchase.token.succeeded", "seller token purchased", {
798
760
  sellerKey,
799
761
  model: modelId,
@@ -949,57 +911,139 @@ export class TokenbuddyDaemon {
949
911
  for (let routeIndex = 0; routeIndex < routes.length; routeIndex += 1) {
950
912
  const route = routes[routeIndex];
951
913
  const sellerKey = route.seller.id;
952
- this.logRouteSelected(route, endpoint, routeIndex);
953
- try {
954
- logger.info("proxy.request.started", "proxy request started", {
955
- requestId,
956
- sellerKey,
957
- model: modelId,
958
- requestedModel: requestedModelId,
959
- endpoint,
960
- stream: Boolean(body.stream)
961
- });
962
- const sellerUrl = normalizeSellerUrl(route.seller);
963
- const upstreamBody = this.applyResolvedModelToBody(endpoint, {
964
- ...body,
965
- requestId
966
- }, modelId);
967
- const sendSellerRequest = async (token) => fetch(`${sellerUrl}${endpoint}`, {
968
- method: "POST",
969
- headers: {
970
- "Content-Type": "application/json",
971
- "Authorization": `Bearer ${token}`,
972
- "X-Request-Id": requestId,
973
- "Idempotency-Key": idempotencyKey
974
- },
975
- body: JSON.stringify(upstreamBody)
976
- });
977
- logger.info("proxy.upstream_fetch.started", "proxy upstream fetch started", {
978
- requestId,
979
- sellerKey,
980
- model: modelId,
981
- endpoint,
982
- stream: Boolean(body.stream)
983
- });
984
- let token = await this.getOrPurchaseToken(route);
985
- let upstreamResponse = await sendSellerRequest(token);
986
- if (!upstreamResponse.ok) {
987
- const errorBody = await upstreamResponse.text();
988
- if (this.isInsufficientFundsResponse(upstreamResponse.status, errorBody)) {
989
- token = await this.recoverFromInsufficientFunds(route, token);
990
- upstreamResponse = await sendSellerRequest(token);
991
- if (upstreamResponse.ok) {
992
- logger.info("proxy.retry_after_402.succeeded", "seller request succeeded after one-shot auto purchase retry", {
993
- requestId,
994
- sellerKey,
995
- model: modelId,
996
- endpoint,
997
- durationMs: Date.now() - startedAt
998
- });
914
+ logger.info("route.selected", "seller route selected", {
915
+ sellerKey,
916
+ model: modelId,
917
+ endpoint,
918
+ protocol: route.protocol,
919
+ paymentMethod: route.paymentMethod,
920
+ routeIndex,
921
+ backup: routeIndex > 0
922
+ });
923
+ let attempt = 0;
924
+ // Soft-failure retry budget; the route-failover controller decides
925
+ // whether the same seller should be retried or we move on. The
926
+ // v1 "1 retry for 4xx fallback" loop is replaced with a
927
+ // stateful decision per attempt.
928
+ // eslint-disable-next-line no-constant-condition
929
+ while (true) {
930
+ try {
931
+ logger.info("proxy.request.started", "proxy request started", {
932
+ requestId,
933
+ sellerKey,
934
+ model: modelId,
935
+ requestedModel: requestedModelId,
936
+ endpoint,
937
+ stream: Boolean(body.stream),
938
+ attempt
939
+ });
940
+ const sellerUrl = normalizeSellerUrl(route.seller);
941
+ const upstreamBody = this.applyResolvedModelToBody(endpoint, {
942
+ ...body,
943
+ requestId
944
+ }, modelId);
945
+ logger.info("proxy.upstream_fetch.started", "proxy upstream fetch started", {
946
+ requestId,
947
+ sellerKey,
948
+ model: modelId,
949
+ endpoint,
950
+ stream: Boolean(body.stream),
951
+ upstreamBody
952
+ });
953
+ // v1.1 §17.5: refuse to auto-purchase once the session budget is
954
+ // exhausted. The seller is treated as "no auto-purchase available"
955
+ // and the request fails over to the next candidate.
956
+ if (!this.routeFailover.canAutoPurchase()) {
957
+ logger.warn("purchase.budget.exceeded", "session auto-purchase budget exhausted; failing over without buying", {
958
+ requestId,
959
+ sellerKey,
960
+ model: modelId,
961
+ endpoint,
962
+ routeIndex
963
+ });
964
+ lastError = new Error("auto-purchase budget exceeded for this session");
965
+ break;
966
+ }
967
+ // v1.1: a purchase failure means the seller is unreachable for
968
+ // payment, not "transiently flapping". Do not retry the same
969
+ // seller; transfer leftover to wasted and fail over immediately.
970
+ let token;
971
+ try {
972
+ token = await this.getOrPurchaseToken(route);
973
+ }
974
+ catch (purchaseError) {
975
+ logger.warn("purchase.failed", "seller auto-purchase failed; failing over without retry", {
976
+ requestId,
977
+ sellerKey,
978
+ model: modelId,
979
+ endpoint,
980
+ errorMessage: this.failoverErrorMessage(purchaseError)
981
+ });
982
+ this.routeFailover.decide({
983
+ sellerId: sellerKey,
984
+ errorKind: "deadline",
985
+ errorMessage: this.failoverErrorMessage(purchaseError),
986
+ attempt
987
+ }, routes.length - routeIndex);
988
+ lastError = purchaseError;
989
+ break;
990
+ }
991
+ // v1.2 §8: enforce a hard per-request deadline so a slow
992
+ // upstream cannot hang the buyer. The deadline is honored by
993
+ // the AbortController passed to `fetch`; sellers that observe
994
+ // the `X-TokenBuddy-Deadline-Ms` header (PR-6) can propagate
995
+ // it to their own upstream fetch via the same signal.
996
+ const deadlineMs = this.requestDeadlineMs();
997
+ const requestAc = new AbortController();
998
+ const requestTimer = setTimeout(() => requestAc.abort(new Error("buyer deadline exceeded")), deadlineMs);
999
+ const sendSellerRequest = async (token) => {
1000
+ const headers = {
1001
+ "Content-Type": "application/json",
1002
+ "Authorization": `Bearer ${token}`,
1003
+ "X-Request-Id": requestId,
1004
+ "Idempotency-Key": idempotencyKey
1005
+ };
1006
+ headers["X-TokenBuddy-Deadline-Ms"] = String(deadlineMs);
1007
+ return fetch(`${sellerUrl}${endpoint}`, {
1008
+ method: "POST",
1009
+ headers,
1010
+ body: JSON.stringify(upstreamBody),
1011
+ signal: requestAc.signal
1012
+ });
1013
+ };
1014
+ let upstreamResponse = await sendSellerRequest(token);
1015
+ if (!upstreamResponse.ok) {
1016
+ const errorBody = await upstreamResponse.text();
1017
+ if (this.isInsufficientFundsResponse(upstreamResponse.status, errorBody)) {
1018
+ token = await this.recoverFromInsufficientFunds(route, token);
1019
+ upstreamResponse = await sendSellerRequest(token);
1020
+ if (upstreamResponse.ok) {
1021
+ logger.info("proxy.retry_after_402.succeeded", "seller request succeeded after one-shot auto purchase retry", {
1022
+ requestId,
1023
+ sellerKey,
1024
+ model: modelId,
1025
+ endpoint,
1026
+ durationMs: Date.now() - startedAt
1027
+ });
1028
+ }
1029
+ else {
1030
+ const retryErrorBody = await upstreamResponse.text();
1031
+ logger.warn("proxy.retry_after_402.failed", "seller request still failed after one-shot auto purchase retry", {
1032
+ requestId,
1033
+ sellerKey,
1034
+ model: modelId,
1035
+ endpoint,
1036
+ status: upstreamResponse.status,
1037
+ durationMs: Date.now() - startedAt
1038
+ });
1039
+ this.copyUpstreamHeaders(upstreamResponse, res);
1040
+ res.status(upstreamResponse.status);
1041
+ res.send(retryErrorBody);
1042
+ return;
1043
+ }
999
1044
  }
1000
1045
  else {
1001
- const retryErrorBody = await upstreamResponse.text();
1002
- logger.warn("proxy.retry_after_402.failed", "seller request still failed after one-shot auto purchase retry", {
1046
+ logger.warn("proxy.upstream_fetch.failed", "proxy upstream fetch returned non-ok status", {
1003
1047
  requestId,
1004
1048
  sellerKey,
1005
1049
  model: modelId,
@@ -1007,117 +1051,126 @@ export class TokenbuddyDaemon {
1007
1051
  status: upstreamResponse.status,
1008
1052
  durationMs: Date.now() - startedAt
1009
1053
  });
1010
- this.copyUpstreamHeaders(upstreamResponse, res);
1011
- res.status(upstreamResponse.status);
1012
- res.send(retryErrorBody);
1013
- return;
1014
- }
1015
- }
1016
- else {
1017
- logger.warn("proxy.upstream_fetch.failed", "proxy upstream fetch returned non-ok status", {
1018
- requestId,
1019
- sellerKey,
1020
- model: modelId,
1021
- endpoint,
1022
- status: upstreamResponse.status,
1023
- durationMs: Date.now() - startedAt
1024
- });
1025
- if (this.shouldFailoverStatus(upstreamResponse.status) && routeIndex < routes.length - 1) {
1054
+ const kind = this.classifyFailureStatus(upstreamResponse.status);
1055
+ const decision = this.routeFailover.decide({
1056
+ sellerId: sellerKey,
1057
+ status: upstreamResponse.status,
1058
+ errorKind: kind,
1059
+ errorMessage: errorBody,
1060
+ attempt
1061
+ }, routes.length - routeIndex);
1062
+ this.handleFailoverDecision(decision, { sellerKey, endpoint, routeIndex });
1063
+ if (decision.action === "fail_fast" || decision.action === "abort") {
1064
+ this.copyUpstreamHeaders(upstreamResponse, res);
1065
+ res.status(upstreamResponse.status);
1066
+ res.send(errorBody);
1067
+ return;
1068
+ }
1069
+ if (decision.action === "retry_same_seller") {
1070
+ attempt += 1;
1071
+ if (decision.retryDelayMs) {
1072
+ await new Promise((resolve) => setTimeout(resolve, decision.retryDelayMs));
1073
+ }
1074
+ continue;
1075
+ }
1076
+ // failover_next
1026
1077
  lastError = new Error(`seller ${sellerKey} returned ${upstreamResponse.status}`);
1027
- this.logFailover(route, endpoint, routeIndex, "upstream_status", upstreamResponse.status);
1028
- continue;
1029
- }
1030
- this.copyUpstreamHeaders(upstreamResponse, res);
1031
- res.status(upstreamResponse.status);
1032
- res.send(errorBody);
1033
- return;
1034
- }
1035
- }
1036
- this.copyUpstreamHeaders(upstreamResponse, res);
1037
- res.status(upstreamResponse.status);
1038
- logger.info("proxy.upstream_fetch.succeeded", "proxy upstream fetch succeeded", {
1039
- requestId,
1040
- sellerKey,
1041
- model: modelId,
1042
- endpoint,
1043
- status: upstreamResponse.status,
1044
- stream: Boolean(body.stream)
1045
- });
1046
- const contentType = upstreamResponse.headers.get("content-type") || "";
1047
- if (contentType.includes("text/event-stream") || Boolean(body.stream)) {
1048
- const reader = upstreamResponse.body?.getReader();
1049
- if (!reader) {
1050
- res.end();
1051
- return;
1052
- }
1053
- let bytes = 0;
1054
- const decoder = new TextDecoder();
1055
- const responsesStreamNormalizer = new ResponsesStreamNormalizer();
1056
- const settlementExtractor = new SellerSettlementStreamExtractor();
1057
- while (true) {
1058
- const { done, value } = await reader.read();
1059
- if (done) {
1060
1078
  break;
1061
1079
  }
1062
- bytes += value.byteLength;
1063
- const chunk = decoder.decode(value, { stream: true });
1064
- const sellerChunk = settlementExtractor.push(chunk);
1065
- if (sellerChunk.length === 0) {
1066
- continue;
1080
+ }
1081
+ // Successful response: stream or buffer.
1082
+ this.copyUpstreamHeaders(upstreamResponse, res);
1083
+ res.status(upstreamResponse.status);
1084
+ logger.info("proxy.upstream_fetch.succeeded", "proxy upstream fetch succeeded", {
1085
+ requestId,
1086
+ sellerKey,
1087
+ model: modelId,
1088
+ endpoint,
1089
+ status: upstreamResponse.status,
1090
+ stream: Boolean(body.stream)
1091
+ });
1092
+ const contentType = upstreamResponse.headers.get("content-type") || "";
1093
+ if (contentType.includes("text/event-stream") || Boolean(body.stream)) {
1094
+ const reader = upstreamResponse.body?.getReader();
1095
+ if (!reader) {
1096
+ res.end();
1097
+ return;
1067
1098
  }
1068
- if (endpoint === "/v1/responses") {
1069
- const normalized = responsesStreamNormalizer.push(sellerChunk);
1070
- if (normalized.length > 0) {
1071
- res.write(`${normalized}\n\n`);
1099
+ let bytes = 0;
1100
+ const decoder = new TextDecoder();
1101
+ const settlementExtractor = new SellerSettlementStreamExtractor();
1102
+ while (true) {
1103
+ const { done, value } = await reader.read();
1104
+ if (done) {
1105
+ break;
1106
+ }
1107
+ bytes += value.byteLength;
1108
+ const chunk = decoder.decode(value, { stream: true });
1109
+ // 透明代理:把 seller 的 SSE 字节原样转给客户端,只剥离我们注入的
1110
+ // tokenbuddy.settlement 事件(不让客户端看到内部记账字段)。除此之外
1111
+ // 不做任何协议转换——卖方格式 bug(如 chat.completion.chunk prefix、
1112
+ // 缺 event: 行)由卖方修,buyer 不兜底。
1113
+ const sellerChunk = settlementExtractor.push(chunk);
1114
+ if (sellerChunk.length > 0) {
1115
+ res.write(sellerChunk);
1072
1116
  }
1073
1117
  }
1074
- else {
1075
- res.write(sellerChunk);
1076
- }
1077
- }
1078
- const settlementTrailing = settlementExtractor.finish();
1079
- if (settlementTrailing.downstream.length > 0) {
1080
- if (endpoint === "/v1/responses") {
1081
- const normalized = responsesStreamNormalizer.push(settlementTrailing.downstream);
1082
- if (normalized.length > 0) {
1083
- res.write(`${normalized}\n\n`);
1118
+ // flush TextDecoder 内部 buffer:stream:true 模式下最后可能留有几个字节的
1119
+ // 不完整 UTF-8 序列(多字节字符被切到下一 chunk 的场景),不调 stream:false
1120
+ // flush 就 break 会丢这批字节。上面的 stream 末尾事件(done / completed)
1121
+ // 之前被吞掉就是这个原因。
1122
+ const decoderTail = decoder.decode();
1123
+ if (decoderTail.length > 0) {
1124
+ const sellerTail = settlementExtractor.push(decoderTail);
1125
+ if (sellerTail.length > 0) {
1126
+ res.write(sellerTail);
1084
1127
  }
1085
1128
  }
1086
- else {
1129
+ const settlementTrailing = settlementExtractor.finish();
1130
+ if (settlementTrailing.downstream.length > 0) {
1087
1131
  res.write(settlementTrailing.downstream);
1088
1132
  }
1133
+ res.end();
1134
+ this.recordReconciledInference(route, endpoint, requestId, { promptTokens: 0, completionTokens: 0, billedMicros: Math.max(1, bytes) }, this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(), this.inferPromptForHash(body));
1135
+ return;
1089
1136
  }
1090
- if (endpoint === "/v1/responses") {
1091
- const trailing = responsesStreamNormalizer.finish();
1092
- if (trailing.length > 0) {
1093
- res.write(`${trailing}\n\n`);
1094
- }
1095
- }
1096
- res.end();
1097
- this.recordReconciledInference(route, endpoint, requestId, { promptTokens: 0, completionTokens: 0, billedMicros: Math.max(1, bytes) }, this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(), this.inferPromptForHash(body));
1137
+ const responseBody = await upstreamResponse.text();
1138
+ res.send(responseBody);
1139
+ const usage = this.readUsage(responseBody);
1140
+ this.recordReconciledInference(route, endpoint, requestId, usage, this.parseSellerSettlementSummary(upstreamResponse.headers), this.inferPromptForHash(body), responseBody);
1098
1141
  return;
1099
1142
  }
1100
- const responseBody = await upstreamResponse.text();
1101
- res.send(responseBody);
1102
- const usage = this.readUsage(responseBody);
1103
- this.recordReconciledInference(route, endpoint, requestId, usage, this.parseSellerSettlementSummary(upstreamResponse.headers), this.inferPromptForHash(body), responseBody);
1104
- return;
1105
- }
1106
- catch (routeError) {
1107
- lastError = routeError;
1108
- logger.warn("proxy.route.failed", "seller route failed before response", {
1109
- requestId,
1110
- sellerKey,
1111
- model: modelId,
1112
- endpoint,
1113
- errorMessage: this.failoverErrorMessage(routeError),
1114
- durationMs: Date.now() - startedAt
1115
- });
1116
- if (!res.headersSent && routeIndex < routes.length - 1) {
1117
- this.logFailover(route, endpoint, routeIndex, "exception");
1118
- continue;
1143
+ catch (routeError) {
1144
+ lastError = routeError;
1145
+ const kind = "deadline";
1146
+ const decision = this.routeFailover.decide({
1147
+ sellerId: sellerKey,
1148
+ errorKind: kind,
1149
+ errorMessage: this.failoverErrorMessage(routeError),
1150
+ attempt
1151
+ }, routes.length - routeIndex);
1152
+ this.handleFailoverDecision(decision, { sellerKey, endpoint, routeIndex, reason: "exception" });
1153
+ logger.warn("proxy.route.failed", "seller route failed before response", {
1154
+ requestId,
1155
+ sellerKey,
1156
+ model: modelId,
1157
+ endpoint,
1158
+ errorMessage: this.failoverErrorMessage(routeError),
1159
+ durationMs: Date.now() - startedAt
1160
+ });
1161
+ if (decision.action === "retry_same_seller") {
1162
+ attempt += 1;
1163
+ if (decision.retryDelayMs) {
1164
+ await new Promise((resolve) => setTimeout(resolve, decision.retryDelayMs));
1165
+ }
1166
+ continue;
1167
+ }
1168
+ if (decision.action === "fail_fast" || decision.action === "abort") {
1169
+ throw routeError;
1170
+ }
1171
+ // failover_next
1172
+ break;
1119
1173
  }
1120
- throw routeError;
1121
1174
  }
1122
1175
  }
1123
1176
  throw lastError instanceof Error ? lastError : new Error("all seller routes failed");
@@ -1189,6 +1242,46 @@ export class TokenbuddyDaemon {
1189
1242
  inferences: this.tokenStore.listInferenceLedger()
1190
1243
  });
1191
1244
  });
1245
+ // v1.2 §18.11: control plane snapshot of the prewarm cache + seller
1246
+ // pool + credit tracker. `tb doctor` reads this to render the
1247
+ // recovery / prewarm / credit summary block.
1248
+ controlApp.get("/v1.2/prewarm", (req, res) => {
1249
+ const prewarmEntries = this.prewarmCache.snapshot().map((entry) => ({
1250
+ modelId: entry.modelId,
1251
+ protocol: entry.protocol,
1252
+ paymentMethod: entry.paymentMethod,
1253
+ state: entry.state,
1254
+ candidateCount: entry.candidates.length,
1255
+ warmedAt: entry.warmedAt,
1256
+ ttlMs: entry.ttlMs,
1257
+ consecutiveWarmingFailures: entry.consecutiveWarmingFailures
1258
+ }));
1259
+ const poolSnapshot = this.sellerPool.snapshot().map((entry) => ({
1260
+ sellerId: entry.sellerId,
1261
+ url: entry.url,
1262
+ circuit: entry.circuit,
1263
+ consecutiveFailures: entry.consecutiveFailures,
1264
+ lastSuccessAt: entry.lastSuccessAt,
1265
+ lastFailAt: entry.lastFailAt,
1266
+ healthScore: entry.healthScore
1267
+ }));
1268
+ const creditSummary = this.creditTracker.summary();
1269
+ const focusSet = this.resolveFocusSet();
1270
+ const schedulerStats = this.prewarmScheduler.stats();
1271
+ res.status(200).json({
1272
+ prewarm: {
1273
+ entries: prewarmEntries,
1274
+ size: prewarmEntries.length
1275
+ },
1276
+ pool: {
1277
+ size: poolSnapshot.length,
1278
+ entries: poolSnapshot
1279
+ },
1280
+ credit: creditSummary,
1281
+ focusSet,
1282
+ scheduler: schedulerStats
1283
+ });
1284
+ });
1192
1285
  controlApp.get("/sellers", async (req, res) => {
1193
1286
  try {
1194
1287
  const registry = await this.fetchRegistry();
@@ -1399,12 +1492,55 @@ export class TokenbuddyDaemon {
1399
1492
  sellerRegistryUrl: this.config.sellerRegistryUrl,
1400
1493
  selectionMode: this.selectionMode
1401
1494
  });
1495
+ // v1.2 §18.5: kick off the on-demand prewarm pipeline. The startup
1496
+ // sweep runs after the configured jitter window (5-10s by default);
1497
+ // subsequent refreshes run on the `idleIntervalMs` cadence and the
1498
+ // `forwardProxyRequest` hot path can dispatch lazy prewarms on miss.
1499
+ this.prewarmScheduler.start();
1500
+ void this.runStartupPrewarmSweep();
1501
+ }
1502
+ /**
1503
+ * v1.2 §18.4: build the focus set from the explicit config, the env
1504
+ * override, and the historical usage in the buyer store. The order of
1505
+ * precedence: explicit config > env > historical > empty.
1506
+ */
1507
+ resolveFocusSet() {
1508
+ const explicit = this.config.warmupModels ?? [];
1509
+ if (explicit.length > 0) {
1510
+ return explicit;
1511
+ }
1512
+ const envRaw = process.env.TB_BUYER_WARMUP_MODELS || "";
1513
+ const envModels = envRaw.split(",").map((s) => s.trim()).filter(Boolean);
1514
+ if (envModels.length > 0) {
1515
+ return envModels;
1516
+ }
1517
+ return this.tokenStore.recentModels(7, 5);
1518
+ }
1519
+ async runStartupPrewarmSweep() {
1520
+ const focusSet = this.resolveFocusSet();
1521
+ if (focusSet.length === 0) {
1522
+ logger.info("prewarm.startup.skipped", "no focus set configured; relying on lazy prewarms", {});
1523
+ return;
1524
+ }
1525
+ logger.info("prewarm.startup.scheduled", "startup prewarm sweep scheduled", {
1526
+ focusSetSize: focusSet.length,
1527
+ focusSet: focusSet.slice(0, 20)
1528
+ });
1529
+ try {
1530
+ await this.prewarmScheduler.runStartupPrewarm(focusSet);
1531
+ }
1532
+ catch (err) {
1533
+ logger.warn("prewarm.startup.failed", "startup prewarm sweep failed", {
1534
+ errorMessage: err instanceof Error ? err.message : String(err)
1535
+ });
1536
+ }
1402
1537
  }
1403
1538
  stop() {
1404
1539
  if (this.controlServer)
1405
1540
  this.controlServer.close();
1406
1541
  if (this.proxyServer)
1407
1542
  this.proxyServer.close();
1543
+ void this.prewarmScheduler.stop();
1408
1544
  this.tokenStore.close();
1409
1545
  }
1410
1546
  }