@tokenbuddy/tokenbuddy 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +13 -0
- package/dist/src/buyer-store.d.ts.map +1 -1
- package/dist/src/buyer-store.js +21 -2
- package/dist/src/buyer-store.js.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +54 -0
- package/dist/src/cli.js.map +1 -1
- package/dist/src/credit-tracker.d.ts +118 -0
- package/dist/src/credit-tracker.d.ts.map +1 -0
- package/dist/src/credit-tracker.js +220 -0
- package/dist/src/credit-tracker.js.map +1 -0
- package/dist/src/daemon.d.ts +49 -4
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +541 -405
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/model-index.d.ts +86 -0
- package/dist/src/model-index.d.ts.map +1 -0
- package/dist/src/model-index.js +214 -0
- package/dist/src/model-index.js.map +1 -0
- package/dist/src/prewarm-cache.d.ts +149 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -0
- package/dist/src/prewarm-cache.js +288 -0
- package/dist/src/prewarm-cache.js.map +1 -0
- package/dist/src/prewarm-scheduler.d.ts +150 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -0
- package/dist/src/prewarm-scheduler.js +484 -0
- package/dist/src/prewarm-scheduler.js.map +1 -0
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +9 -1
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/route-failover.d.ts +96 -0
- package/dist/src/route-failover.d.ts.map +1 -0
- package/dist/src/route-failover.js +177 -0
- package/dist/src/route-failover.js.map +1 -0
- package/dist/src/seller-catalog.d.ts +26 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js +40 -0
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +127 -0
- package/dist/src/seller-pool.d.ts.map +1 -0
- package/dist/src/seller-pool.js +243 -0
- package/dist/src/seller-pool.js.map +1 -0
- package/dist/src/stream-failover.d.ts +78 -0
- package/dist/src/stream-failover.d.ts.map +1 -0
- package/dist/src/stream-failover.js +93 -0
- package/dist/src/stream-failover.js.map +1 -0
- package/package.json +1 -1
- package/src/buyer-store.ts +32 -2
- package/src/cli.ts +61 -0
- package/src/credit-tracker.test.ts +165 -0
- package/src/credit-tracker.ts +269 -0
- package/src/daemon.ts +569 -445
- package/src/model-index.test.ts +184 -0
- package/src/model-index.ts +266 -0
- package/src/prewarm-cache.test.ts +281 -0
- package/src/prewarm-cache.ts +373 -0
- package/src/prewarm-scheduler.test.ts +367 -0
- package/src/prewarm-scheduler.ts +581 -0
- package/src/provider-install.ts +9 -1
- package/src/route-failover.test.ts +193 -0
- package/src/route-failover.ts +233 -0
- package/src/seller-catalog-413.test.ts +61 -0
- package/src/seller-catalog.ts +47 -0
- package/src/seller-pool.test.ts +231 -0
- package/src/seller-pool.ts +333 -0
- package/src/stream-failover.test.ts +52 -0
- package/src/stream-failover.ts +129 -0
- package/src/thousand-seller.test.ts +151 -0
- package/tests/daemon-413-fallback.test.ts +92 -0
- package/tests/e2e.test.ts +3 -2
- package/tests/tokenbuddy.test.ts +68 -11
package/src/daemon.ts
CHANGED
|
@@ -14,17 +14,24 @@ import {
|
|
|
14
14
|
} from "./provider-install.js";
|
|
15
15
|
import {
|
|
16
16
|
discoverSellerBackedModels,
|
|
17
|
-
fetchSellerManifest,
|
|
18
17
|
fetchSellerRegistry,
|
|
19
18
|
manifestModelIds,
|
|
20
19
|
manifestPaymentMethods,
|
|
21
20
|
manifestProtocols,
|
|
22
21
|
normalizeSellerUrl,
|
|
22
|
+
RegistryTooLargeError,
|
|
23
23
|
type RegistrySeller,
|
|
24
24
|
type SellerManifest,
|
|
25
25
|
type SellerRegistryDocument,
|
|
26
26
|
type SellerRoutingPreference,
|
|
27
27
|
} from "./seller-catalog.js";
|
|
28
|
+
import { ModelIndex } from "./model-index.js";
|
|
29
|
+
import { PrewarmCache } from "./prewarm-cache.js";
|
|
30
|
+
import { CreditTracker } from "./credit-tracker.js";
|
|
31
|
+
import { SellerPool, type FailureKind } from "./seller-pool.js";
|
|
32
|
+
import { RouteFailover, type FailoverDecision, type RouteCandidate } from "./route-failover.js";
|
|
33
|
+
import { PrewarmScheduler, type SellerProber } from "./prewarm-scheduler.js";
|
|
34
|
+
import type { PoolEntry } from "./seller-pool.js";
|
|
28
35
|
|
|
29
36
|
const logger = createModuleLogger("tb-proxyd");
|
|
30
37
|
const PROXY_JSON_BODY_LIMIT = "10mb";
|
|
@@ -36,14 +43,21 @@ export interface DaemonConfig {
|
|
|
36
43
|
sellerRegistryUrl: string;
|
|
37
44
|
selectionMode?: "auto" | "manual";
|
|
38
45
|
selectedSellerId?: string;
|
|
46
|
+
// v1.2 §18.4: focus-set override. When omitted, the daemon derives the
|
|
47
|
+
// focus set from the BuyerStore's historical model usage and the
|
|
48
|
+
// `TB_BUYER_WARMUP_MODELS` env var (comma-separated).
|
|
49
|
+
warmupModels?: string[];
|
|
50
|
+
warmupRefreshIntervalSecs?: number;
|
|
51
|
+
warmupProbeTimeoutMs?: number;
|
|
39
52
|
}
|
|
40
53
|
|
|
41
54
|
interface SellerRoute {
|
|
42
55
|
seller: RegistrySeller;
|
|
43
|
-
manifest: SellerManifest;
|
|
56
|
+
manifest: SellerManifest | null;
|
|
44
57
|
protocol: string;
|
|
45
58
|
modelId: string;
|
|
46
59
|
paymentMethod: string;
|
|
60
|
+
poolEntry?: PoolEntry;
|
|
47
61
|
}
|
|
48
62
|
|
|
49
63
|
interface UsageSummary {
|
|
@@ -80,225 +94,6 @@ function numericHeaderField(value: unknown): number | undefined {
|
|
|
80
94
|
return undefined;
|
|
81
95
|
}
|
|
82
96
|
|
|
83
|
-
interface ResponsesStreamState {
|
|
84
|
-
itemId: string;
|
|
85
|
-
text: string;
|
|
86
|
-
contentPartStarted: boolean;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
class ResponsesStreamNormalizer {
|
|
90
|
-
private pending = "";
|
|
91
|
-
private readonly state = new Map<string, ResponsesStreamState>();
|
|
92
|
-
|
|
93
|
-
public push(chunk: string): string {
|
|
94
|
-
this.pending += chunk;
|
|
95
|
-
const blocks = this.pending.split("\n\n");
|
|
96
|
-
this.pending = blocks.pop() || "";
|
|
97
|
-
return blocks
|
|
98
|
-
.map((block) => this.normalizeBlock(block))
|
|
99
|
-
.filter((block) => block.length > 0)
|
|
100
|
-
.join("\n\n");
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
public finish(): string {
|
|
104
|
-
if (!this.pending.trim()) {
|
|
105
|
-
return "";
|
|
106
|
-
}
|
|
107
|
-
const block = this.normalizeBlock(this.pending);
|
|
108
|
-
this.pending = "";
|
|
109
|
-
return block;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
private normalizeBlock(block: string): string {
|
|
113
|
-
if (!block.trim()) {
|
|
114
|
-
return "";
|
|
115
|
-
}
|
|
116
|
-
// Each \n\n separates an event in SSE format
|
|
117
|
-
const subBlocks = block.split("\n\n");
|
|
118
|
-
const output: string[] = [];
|
|
119
|
-
|
|
120
|
-
for (const sub of subBlocks) {
|
|
121
|
-
if (!sub.trim() || sub.trim() === "data: [DONE]") {
|
|
122
|
-
if (sub.trim()) output.push(sub);
|
|
123
|
-
continue;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
const lines = sub.split("\n");
|
|
127
|
-
const eventLine = lines.find((l) => l.startsWith("event:"));
|
|
128
|
-
const dataLine = lines.find((l) => l.startsWith("data:"));
|
|
129
|
-
if (!dataLine) {
|
|
130
|
-
output.push(sub);
|
|
131
|
-
continue;
|
|
132
|
-
}
|
|
133
|
-
const rawData = dataLine.replace(/^data:\s?/, "");
|
|
134
|
-
if (rawData === "[DONE]") {
|
|
135
|
-
output.push(sub);
|
|
136
|
-
continue;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
let payload: any;
|
|
140
|
-
try {
|
|
141
|
-
payload = JSON.parse(rawData);
|
|
142
|
-
} catch {
|
|
143
|
-
output.push(sub);
|
|
144
|
-
continue;
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
const eventName =
|
|
148
|
-
(eventLine?.replace(/^event:\s?/, "") || payload?.type) as string;
|
|
149
|
-
if (!eventName || !eventName.startsWith("response.")) {
|
|
150
|
-
output.push(sub);
|
|
151
|
-
continue;
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
// When upstream already sends content_part.added, record it in state
|
|
155
|
-
if (
|
|
156
|
-
eventName === "response.content_part.added" &&
|
|
157
|
-
payload?.item_id
|
|
158
|
-
) {
|
|
159
|
-
const current = this.state.get(payload.item_id as string);
|
|
160
|
-
if (current) current.contentPartStarted = true;
|
|
161
|
-
output.push(sub);
|
|
162
|
-
continue;
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
// response.output_item.added: inject content_part.added only if upstream hasn't
|
|
166
|
-
if (
|
|
167
|
-
eventName === "response.output_item.added" &&
|
|
168
|
-
payload?.item?.type === "message" &&
|
|
169
|
-
payload?.item?.id
|
|
170
|
-
) {
|
|
171
|
-
const itemId = payload.item.id as string;
|
|
172
|
-
const current = this.getState(itemId);
|
|
173
|
-
const item = { ...payload.item };
|
|
174
|
-
item.content = [{ type: "output_text", text: "", annotations: [] }];
|
|
175
|
-
output.push(this.serializeEvent(eventName, {
|
|
176
|
-
...payload,
|
|
177
|
-
output_index: payload.output_index ?? 0,
|
|
178
|
-
item
|
|
179
|
-
}));
|
|
180
|
-
if (!current.contentPartStarted) {
|
|
181
|
-
current.contentPartStarted = true;
|
|
182
|
-
output.push(this.serializeEvent("response.content_part.added", {
|
|
183
|
-
type: "response.content_part.added",
|
|
184
|
-
item_id: itemId,
|
|
185
|
-
output_index: payload.output_index ?? 0,
|
|
186
|
-
content_index: 0,
|
|
187
|
-
part: { type: "output_text", text: "", annotations: [] }
|
|
188
|
-
}));
|
|
189
|
-
}
|
|
190
|
-
continue;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// response.output_text.delta: inject content_part.added if missing
|
|
194
|
-
if (eventName === "response.output_text.delta" && payload?.item_id) {
|
|
195
|
-
const itemId = payload.item_id as string;
|
|
196
|
-
const current = this.getState(itemId);
|
|
197
|
-
if (!current.contentPartStarted) {
|
|
198
|
-
current.contentPartStarted = true;
|
|
199
|
-
output.push(this.serializeEvent("response.content_part.added", {
|
|
200
|
-
type: "response.content_part.added",
|
|
201
|
-
item_id: itemId,
|
|
202
|
-
output_index: payload.output_index ?? 0,
|
|
203
|
-
content_index: payload.content_index ?? 0,
|
|
204
|
-
part: { type: "output_text", text: "", annotations: [] }
|
|
205
|
-
}));
|
|
206
|
-
}
|
|
207
|
-
const deltaText =
|
|
208
|
-
typeof payload.delta === "string"
|
|
209
|
-
? payload.delta
|
|
210
|
-
: typeof payload.delta?.text === "string"
|
|
211
|
-
? payload.delta.text
|
|
212
|
-
: "";
|
|
213
|
-
current.text += deltaText;
|
|
214
|
-
output.push(this.serializeEvent(eventName, {
|
|
215
|
-
...payload,
|
|
216
|
-
output_index: payload.output_index ?? 0,
|
|
217
|
-
content_index: payload.content_index ?? 0
|
|
218
|
-
}));
|
|
219
|
-
continue;
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
// response.output_text.done: also emit content_part.done
|
|
223
|
-
if (eventName === "response.output_text.done" && payload?.item_id) {
|
|
224
|
-
const itemId = payload.item_id as string;
|
|
225
|
-
const current = this.getState(itemId);
|
|
226
|
-
output.push(this.serializeEvent(eventName, {
|
|
227
|
-
...payload,
|
|
228
|
-
output_index: payload.output_index ?? 0,
|
|
229
|
-
content_index: payload.content_index ?? 0
|
|
230
|
-
}));
|
|
231
|
-
output.push(this.serializeEvent("response.content_part.done", {
|
|
232
|
-
type: "response.content_part.done",
|
|
233
|
-
item_id: itemId,
|
|
234
|
-
output_index: payload.output_index ?? 0,
|
|
235
|
-
content_index: payload.content_index ?? 0,
|
|
236
|
-
part: { type: "output_text", text: current.text, annotations: [] }
|
|
237
|
-
}));
|
|
238
|
-
continue;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
// response.output_item.done: normalize content to output_text type
|
|
242
|
-
if (
|
|
243
|
-
eventName === "response.output_item.done" &&
|
|
244
|
-
payload?.item?.type === "message" &&
|
|
245
|
-
payload?.item?.id
|
|
246
|
-
) {
|
|
247
|
-
const itemId = payload.item.id as string;
|
|
248
|
-
const current = this.getState(itemId);
|
|
249
|
-
const item = {
|
|
250
|
-
...payload.item,
|
|
251
|
-
content: [{ type: "output_text", text: current.text, annotations: [] }]
|
|
252
|
-
};
|
|
253
|
-
output.push(this.serializeEvent(eventName, {
|
|
254
|
-
...payload,
|
|
255
|
-
output_index: payload.output_index ?? 0,
|
|
256
|
-
item
|
|
257
|
-
}));
|
|
258
|
-
continue;
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
// response.completed: patch output if empty
|
|
262
|
-
if (eventName === "response.completed" && payload?.response) {
|
|
263
|
-
const response = { ...payload.response };
|
|
264
|
-
if (!Array.isArray(response.output) || response.output.length === 0) {
|
|
265
|
-
const first = this.state.values().next()
|
|
266
|
-
.value as ResponsesStreamState | undefined;
|
|
267
|
-
if (first) {
|
|
268
|
-
response.output = [{
|
|
269
|
-
id: first.itemId,
|
|
270
|
-
type: "message",
|
|
271
|
-
status: "completed",
|
|
272
|
-
role: "assistant",
|
|
273
|
-
content: [{ type: "output_text", text: first.text, annotations: [] }]
|
|
274
|
-
}];
|
|
275
|
-
response.output_text = first.text;
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
output.push(this.serializeEvent(eventName, { ...payload, response }));
|
|
279
|
-
continue;
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
// All other events: pass through unchanged
|
|
283
|
-
output.push(sub);
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
return output.join("\n\n");
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
private getState(itemId: string): ResponsesStreamState {
|
|
290
|
-
const current = this.state.get(itemId);
|
|
291
|
-
if (current) return current;
|
|
292
|
-
const created = { itemId, text: "", contentPartStarted: false };
|
|
293
|
-
this.state.set(itemId, created);
|
|
294
|
-
return created;
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
private serializeEvent(name: string, data: any): string {
|
|
298
|
-
return `event: ${name}\ndata: ${JSON.stringify(data)}`;
|
|
299
|
-
}
|
|
300
|
-
}
|
|
301
|
-
|
|
302
97
|
class SellerSettlementStreamExtractor {
|
|
303
98
|
private pending = "";
|
|
304
99
|
private settlement: SellerSettlementSummary | undefined;
|
|
@@ -410,6 +205,26 @@ export class TokenbuddyDaemon {
|
|
|
410
205
|
|
|
411
206
|
private activePurchases = new Map<string, Promise<string>>();
|
|
412
207
|
|
|
208
|
+
// v1.2 fallback pipeline: model-index, prewarm-cache, credit-tracker,
|
|
209
|
+
// pool, and route-failover together replace the v1
|
|
210
|
+
// "fetchRegistry + manifest per request" path.
|
|
211
|
+
private readonly modelIndex = new ModelIndex();
|
|
212
|
+
private readonly prewarmCache = new PrewarmCache();
|
|
213
|
+
private readonly creditTracker = new CreditTracker();
|
|
214
|
+
private readonly sellerPool = new SellerPool({
|
|
215
|
+
modelIndex: this.modelIndex,
|
|
216
|
+
cache: this.prewarmCache,
|
|
217
|
+
creditTracker: this.creditTracker
|
|
218
|
+
});
|
|
219
|
+
private readonly routeFailover = new RouteFailover({
|
|
220
|
+
pool: this.sellerPool,
|
|
221
|
+
creditTracker: this.creditTracker
|
|
222
|
+
});
|
|
223
|
+
// v1.2 §18.5: assigned in the constructor because the scheduler needs
|
|
224
|
+
// config-derived knobs. The `!` opts out of strict-initialization so the
|
|
225
|
+
// rest of the class can treat it as non-nullable.
|
|
226
|
+
private readonly prewarmScheduler!: PrewarmScheduler;
|
|
227
|
+
|
|
413
228
|
constructor(config: DaemonConfig) {
|
|
414
229
|
this.tokenStore = new BuyerStore({ dbPath: config.dbPath });
|
|
415
230
|
const routingPreference =
|
|
@@ -422,6 +237,42 @@ export class TokenbuddyDaemon {
|
|
|
422
237
|
"auto";
|
|
423
238
|
this.selectedSellerId =
|
|
424
239
|
config.selectedSellerId || routingPreference?.sellerId;
|
|
240
|
+
// v1.2 §18.5: scheduler is created here (not in the field initializer)
|
|
241
|
+
// because it needs the config-derived prober + idle interval.
|
|
242
|
+
Object.assign(this, {
|
|
243
|
+
prewarmScheduler: new PrewarmScheduler({
|
|
244
|
+
modelIndex: this.modelIndex,
|
|
245
|
+
cache: this.prewarmCache,
|
|
246
|
+
prober: this.buildHealthProber(config.warmupProbeTimeoutMs ?? 3000),
|
|
247
|
+
idleIntervalMs: (config.warmupRefreshIntervalSecs ?? 60) * 1000
|
|
248
|
+
})
|
|
249
|
+
});
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
private buildHealthProber(timeoutMs: number): SellerProber {
|
|
253
|
+
return async (seller, signal) => {
|
|
254
|
+
try {
|
|
255
|
+
const ac = new AbortController();
|
|
256
|
+
const timer = setTimeout(() => ac.abort(new Error("healthz timeout")), timeoutMs);
|
|
257
|
+
if (signal) {
|
|
258
|
+
if (signal.aborted) {
|
|
259
|
+
ac.abort(signal.reason);
|
|
260
|
+
} else {
|
|
261
|
+
signal.addEventListener("abort", () => ac.abort(signal.reason), { once: true });
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
const startedAt = Date.now();
|
|
265
|
+
const res = await fetch(`${seller.url.replace(/\/+$/, "")}/healthz`, { signal: ac.signal });
|
|
266
|
+
clearTimeout(timer);
|
|
267
|
+
if (!res.ok) {
|
|
268
|
+
return { ok: false, latencyMs: Date.now() - startedAt, httpStatus: res.status, errorMessage: `healthz returned ${res.status}` };
|
|
269
|
+
}
|
|
270
|
+
return { ok: true, latencyMs: Date.now() - startedAt, httpStatus: res.status };
|
|
271
|
+
} catch (err) {
|
|
272
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
273
|
+
return { ok: false, latencyMs: 0, errorMessage: message };
|
|
274
|
+
}
|
|
275
|
+
};
|
|
425
276
|
}
|
|
426
277
|
|
|
427
278
|
private activeControlPort(): number {
|
|
@@ -434,8 +285,43 @@ export class TokenbuddyDaemon {
|
|
|
434
285
|
return typeof address === "object" && address ? address.port : this.config.proxyPort;
|
|
435
286
|
}
|
|
436
287
|
|
|
288
|
+
// v1.2 §18.9: stale-cache fallback. The buyer remembers the last
|
|
289
|
+
// successfully fetched registry document and reuses it when the
|
|
290
|
+
// bootstrap returns 413 (`X-TokenBuddy-Registry-Too-Large: 1`). This
|
|
291
|
+
// trades freshness for availability: requests still route, but the
|
|
292
|
+
// model set is whatever was cached before the registry outgrew 1MB.
|
|
293
|
+
private lastRegistrySnapshot: SellerRegistryDocument | null = null;
|
|
294
|
+
|
|
437
295
|
private async fetchRegistry(): Promise<SellerRegistryDocument> {
|
|
438
|
-
|
|
296
|
+
try {
|
|
297
|
+
const registry = await fetchSellerRegistry(this.config.sellerRegistryUrl);
|
|
298
|
+
this.modelIndex.rebuild(registry.sellers, {
|
|
299
|
+
registryVersion: registry.version,
|
|
300
|
+
defaultSellerId: registry.defaultSeller
|
|
301
|
+
});
|
|
302
|
+
this.sellerPool.sync();
|
|
303
|
+
this.lastRegistrySnapshot = registry;
|
|
304
|
+
return registry;
|
|
305
|
+
} catch (err) {
|
|
306
|
+
// v1.2 §18.9: if the bootstrap returns 413, fall back to the
|
|
307
|
+
// last-known registry document. This keeps the buyer routing even
|
|
308
|
+
// when the registry temporarily outgrows the 1MB cap.
|
|
309
|
+
if (err instanceof RegistryTooLargeError && this.lastRegistrySnapshot) {
|
|
310
|
+
logger.warn("registry.stale_fallback", "registry returned 413; using last-known snapshot for routing", {
|
|
311
|
+
sellerRegistryUrl: this.config.sellerRegistryUrl,
|
|
312
|
+
cachedVersion: this.lastRegistrySnapshot.version,
|
|
313
|
+
cachedSellers: this.lastRegistrySnapshot.sellers.length
|
|
314
|
+
});
|
|
315
|
+
const stale = this.lastRegistrySnapshot;
|
|
316
|
+
this.modelIndex.rebuild(stale.sellers, {
|
|
317
|
+
registryVersion: stale.version,
|
|
318
|
+
defaultSellerId: stale.defaultSeller
|
|
319
|
+
});
|
|
320
|
+
this.sellerPool.sync();
|
|
321
|
+
return stale;
|
|
322
|
+
}
|
|
323
|
+
throw err;
|
|
324
|
+
}
|
|
439
325
|
}
|
|
440
326
|
|
|
441
327
|
private runtimeSummary() {
|
|
@@ -553,49 +439,41 @@ export class TokenbuddyDaemon {
|
|
|
553
439
|
throw new Error("mock or clawtip payment method is not configured as an enabled buyer payment method");
|
|
554
440
|
}
|
|
555
441
|
|
|
442
|
+
// v1.2: registry is the source of truth for routing. We rebuild the
|
|
443
|
+
// model-index once per request (cheap; index lookup is in-memory) so
|
|
444
|
+
// the response always reflects the latest seller list. The previous
|
|
445
|
+
// "fetchSellerManifest per candidate" path is removed in favor of
|
|
446
|
+
// pulling `models` directly off the registry entries.
|
|
556
447
|
const registry = await this.fetchRegistry();
|
|
557
|
-
const defaultSellers = registry.sellers.filter((seller) => seller.id === registry.defaultSeller);
|
|
558
|
-
const backupSellers = registry.sellers.filter((seller) => seller.id !== registry.defaultSeller);
|
|
559
|
-
const manualSellers = this.selectedSellerId
|
|
560
|
-
? registry.sellers.filter((seller) => seller.id === this.selectedSellerId)
|
|
561
|
-
: defaultSellers;
|
|
562
|
-
const sellers = this.selectionMode === "manual" ? manualSellers : [...defaultSellers, ...backupSellers];
|
|
563
|
-
|
|
564
|
-
const routes: SellerRoute[] = [];
|
|
565
|
-
for (const seller of sellers) {
|
|
566
|
-
let manifest: SellerManifest;
|
|
567
|
-
try {
|
|
568
|
-
manifest = await fetchSellerManifest(seller);
|
|
569
|
-
} catch (error: unknown) {
|
|
570
|
-
logger.warn("route.manifest.failed", "seller manifest unavailable during route selection", {
|
|
571
|
-
sellerKey: seller.id,
|
|
572
|
-
model: modelId,
|
|
573
|
-
endpoint,
|
|
574
|
-
errorMessage: error instanceof Error ? error.message : String(error)
|
|
575
|
-
});
|
|
576
|
-
continue;
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
const protocols = manifestProtocols(manifest, seller);
|
|
580
|
-
const paymentMethods = manifestPaymentMethods(manifest, seller);
|
|
581
|
-
const modelIds = manifestModelIds(manifest);
|
|
582
|
-
if (!protocols.includes(protocol) || !paymentMethods.includes(paymentMethod) || !modelIds.includes(modelId)) {
|
|
583
|
-
continue;
|
|
584
|
-
}
|
|
585
448
|
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
449
|
+
const indexCandidates = this.modelIndex.sellersFor(modelId, { protocol, paymentMethod });
|
|
450
|
+
let ordered = indexCandidates;
|
|
451
|
+
if (this.selectionMode === "manual" && this.selectedSellerId) {
|
|
452
|
+
ordered = indexCandidates.filter((seller) => seller.id === this.selectedSellerId);
|
|
453
|
+
} else if (this.selectionMode === "manual" && registry.defaultSeller) {
|
|
454
|
+
ordered = indexCandidates.filter((seller) => seller.id === registry.defaultSeller);
|
|
455
|
+
} else if (registry.defaultSeller) {
|
|
456
|
+
// auto mode: default first, then backups in registry order
|
|
457
|
+
ordered = [
|
|
458
|
+
...indexCandidates.filter((seller) => seller.id === registry.defaultSeller),
|
|
459
|
+
...indexCandidates.filter((seller) => seller.id !== registry.defaultSeller)
|
|
460
|
+
];
|
|
593
461
|
}
|
|
594
462
|
|
|
595
|
-
if (
|
|
463
|
+
if (ordered.length === 0) {
|
|
596
464
|
throw new Error(`no compatible seller for ${endpoint} model ${modelId}`);
|
|
597
465
|
}
|
|
598
466
|
|
|
467
|
+
const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
|
|
468
|
+
const routes: SellerRoute[] = ordered.map((seller) => ({
|
|
469
|
+
seller,
|
|
470
|
+
manifest: null,
|
|
471
|
+
protocol,
|
|
472
|
+
modelId,
|
|
473
|
+
paymentMethod,
|
|
474
|
+
poolEntry: poolById.get(seller.id)
|
|
475
|
+
}));
|
|
476
|
+
|
|
599
477
|
logger.info("route.candidates.prewarmed", "seller route candidates prewarmed", {
|
|
600
478
|
model: modelId,
|
|
601
479
|
endpoint,
|
|
@@ -608,48 +486,56 @@ export class TokenbuddyDaemon {
|
|
|
608
486
|
return routes;
|
|
609
487
|
}
|
|
610
488
|
|
|
611
|
-
private
|
|
612
|
-
|
|
613
|
-
sellerKey: route.seller.id,
|
|
614
|
-
model: route.modelId,
|
|
615
|
-
endpoint,
|
|
616
|
-
protocol: route.protocol,
|
|
617
|
-
paymentMethod: route.paymentMethod,
|
|
618
|
-
routeIndex,
|
|
619
|
-
backup: routeIndex > 0
|
|
620
|
-
});
|
|
489
|
+
private failoverErrorMessage(error: unknown): string {
|
|
490
|
+
return error instanceof Error ? error.message : String(error);
|
|
621
491
|
}
|
|
622
492
|
|
|
623
|
-
|
|
624
|
-
|
|
493
|
+
/**
|
|
494
|
+
* Map an HTTP status from a failed seller call to a `FailureKind` that
|
|
495
|
+
* the route-failover controller understands. Hard 4xx (other than
|
|
496
|
+
* auth/insufficient) means the seller is wrong for the request; 5xx
|
|
497
|
+
* and 429 are treated as transient and eligible for the soft-failure
|
|
498
|
+
* retry budget. The v1.1 "insufficient funds" check stays on the
|
|
499
|
+
* caller side because it short-circuits the failure path with a
|
|
500
|
+
* re-purchase.
|
|
501
|
+
*/
|
|
502
|
+
private classifyFailureStatus(status: number): FailureKind {
|
|
503
|
+
if (status === 401 || status === 403) {
|
|
504
|
+
return "auth_invalid";
|
|
505
|
+
}
|
|
506
|
+
if (status === 402) {
|
|
507
|
+
return "insufficient_funds";
|
|
508
|
+
}
|
|
509
|
+
if (status === 400 || status === 404 || status === 422) {
|
|
510
|
+
return "hard_4xx";
|
|
511
|
+
}
|
|
512
|
+
return "soft_5xx";
|
|
625
513
|
}
|
|
626
514
|
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
515
|
+
/**
|
|
516
|
+
* Emit the structured failover log line. The decision itself is
|
|
517
|
+
* produced by `RouteFailover.decide`; this helper exists only to keep
|
|
518
|
+
* the controller loop readable.
|
|
519
|
+
*/
|
|
520
|
+
private handleFailoverDecision(
|
|
521
|
+
decision: FailoverDecision,
|
|
522
|
+
context: { sellerKey: string; endpoint: string; routeIndex: number; status?: number; reason?: string }
|
|
633
523
|
): void {
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
const routes = await this.selectSellerRoutes(endpoint, modelId);
|
|
650
|
-
const route = routes[0];
|
|
651
|
-
this.logRouteSelected(route, endpoint, 0);
|
|
652
|
-
return route;
|
|
524
|
+
if (decision.action === "retry_same_seller") {
|
|
525
|
+
return;
|
|
526
|
+
}
|
|
527
|
+
if (decision.action === "failover_next") {
|
|
528
|
+
logger.warn("route.failover.triggered", "seller route failed over to backup candidate", {
|
|
529
|
+
sellerKey: context.sellerKey,
|
|
530
|
+
endpoint: context.endpoint,
|
|
531
|
+
routeIndex: context.routeIndex,
|
|
532
|
+
reason: decision.reason,
|
|
533
|
+
status: context.status,
|
|
534
|
+
wastedCreditMicros: decision.wastedCreditMicros,
|
|
535
|
+
freshPurchase: decision.freshPurchase,
|
|
536
|
+
retryAttemptsBeforeFailover: decision.retryAttemptsBeforeFailover
|
|
537
|
+
});
|
|
538
|
+
}
|
|
653
539
|
}
|
|
654
540
|
|
|
655
541
|
private async listSellerBackedModels(): Promise<{
|
|
@@ -851,13 +737,60 @@ export class TokenbuddyDaemon {
|
|
|
851
737
|
return parsed;
|
|
852
738
|
}
|
|
853
739
|
|
|
740
|
+
/**
|
|
741
|
+
* v1.2 §8: hard per-request deadline. The buyer refuses to wait longer
|
|
742
|
+
* than this for a single seller; on expiry the request is aborted and
|
|
743
|
+
* the route-failover controller can either retry the same seller with
|
|
744
|
+
* a smaller body or fail over. Configurable via
|
|
745
|
+
* `TB_PROXYD_REQUEST_DEADLINE_MS` (default 30s).
|
|
746
|
+
*/
|
|
747
|
+
private requestDeadlineMs(): number {
|
|
748
|
+
const raw = process.env.TB_PROXYD_REQUEST_DEADLINE_MS;
|
|
749
|
+
if (!raw) {
|
|
750
|
+
return 30_000;
|
|
751
|
+
}
|
|
752
|
+
const parsed = Number(raw);
|
|
753
|
+
if (!Number.isInteger(parsed) || parsed < 1000) {
|
|
754
|
+
return 30_000;
|
|
755
|
+
}
|
|
756
|
+
return parsed;
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
/**
|
|
760
|
+
* Safety margin subtracted from the cached token's `expiresAt` before
|
|
761
|
+
* deciding to reuse it. Buying a new token 60s before expiry gives the
|
|
762
|
+
* upstream enough headroom to reject any in-flight calls under the old
|
|
763
|
+
* token before the buyer assumes the new one is valid.
|
|
764
|
+
*/
|
|
765
|
+
private tokenExpirySafetyMarginMs(): number {
|
|
766
|
+
const raw = process.env.TB_PROXYD_TOKEN_EXPIRY_SAFETY_MARGIN_MS;
|
|
767
|
+
if (!raw) {
|
|
768
|
+
return 60_000;
|
|
769
|
+
}
|
|
770
|
+
const parsed = Number(raw);
|
|
771
|
+
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
772
|
+
return 60_000;
|
|
773
|
+
}
|
|
774
|
+
return parsed;
|
|
775
|
+
}
|
|
776
|
+
|
|
854
777
|
private async getOrPurchaseToken(route: SellerRoute): Promise<string> {
|
|
855
778
|
const sellerKey = route.seller.id;
|
|
856
779
|
const sellerUrl = normalizeSellerUrl(route.seller);
|
|
857
780
|
const { modelId, paymentMethod } = route;
|
|
858
781
|
const cached = this.tokenStore.getToken(sellerKey);
|
|
859
782
|
const rebuyMinBalanceMicros = this.tokenRebuyMinBalanceMicros();
|
|
860
|
-
|
|
783
|
+
// v1.2 PR-fix (2026-06-02): reject cached tokens that are inside the
|
|
784
|
+
// safety margin of their seller-assigned expiry. The previous
|
|
785
|
+
// implementation only checked `balanceMicros`, which let the buyer
|
|
786
|
+
// keep serving 24h-expired access tokens to the upstream and
|
|
787
|
+
// produced 401 "Bearer token is invalid or expired" errors. The
|
|
788
|
+
// `expiresAt` field is sourced from the seller's
|
|
789
|
+
// `/purchase/complete` response and is part of the `saveToken`
|
|
790
|
+
// contract.
|
|
791
|
+
const expiresAtMs = cached?.expiresAt ? Date.parse(cached.expiresAt) : NaN;
|
|
792
|
+
const tokenStillFresh = Number.isFinite(expiresAtMs) && Date.now() + this.tokenExpirySafetyMarginMs() < expiresAtMs;
|
|
793
|
+
if (cached && tokenStillFresh && cached.balanceMicros > rebuyMinBalanceMicros) {
|
|
861
794
|
logger.info("token.cache.hit", "seller token cache hit", {
|
|
862
795
|
sellerKey,
|
|
863
796
|
model: modelId,
|
|
@@ -870,7 +803,8 @@ export class TokenbuddyDaemon {
|
|
|
870
803
|
sellerKey,
|
|
871
804
|
model: modelId,
|
|
872
805
|
balanceMicros: cached?.balanceMicros || 0,
|
|
873
|
-
rebuyMinBalanceMicros
|
|
806
|
+
rebuyMinBalanceMicros,
|
|
807
|
+
expired: Boolean(cached) && !tokenStillFresh
|
|
874
808
|
});
|
|
875
809
|
|
|
876
810
|
const purchaseKey = `${sellerKey}:${modelId}:${paymentMethod}`;
|
|
@@ -982,6 +916,9 @@ export class TokenbuddyDaemon {
|
|
|
982
916
|
paymentReference: completeData.paymentReference || completeData.payment_reference,
|
|
983
917
|
completedAt: new Date().toISOString()
|
|
984
918
|
});
|
|
919
|
+
// v1.1: feed the credit tracker so the route-failover controller
|
|
920
|
+
// knows the seller is inside the fresh-purchase window.
|
|
921
|
+
this.creditTracker.recordPurchase(sellerKey, creditMicros, creditMicros);
|
|
985
922
|
logger.info("purchase.token.succeeded", "seller token purchased", {
|
|
986
923
|
sellerKey,
|
|
987
924
|
model: modelId,
|
|
@@ -1157,58 +1094,141 @@ export class TokenbuddyDaemon {
|
|
|
1157
1094
|
for (let routeIndex = 0; routeIndex < routes.length; routeIndex += 1) {
|
|
1158
1095
|
const route = routes[routeIndex];
|
|
1159
1096
|
const sellerKey = route.seller.id;
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1097
|
+
logger.info("route.selected", "seller route selected", {
|
|
1098
|
+
sellerKey,
|
|
1099
|
+
model: modelId,
|
|
1100
|
+
endpoint,
|
|
1101
|
+
protocol: route.protocol,
|
|
1102
|
+
paymentMethod: route.paymentMethod,
|
|
1103
|
+
routeIndex,
|
|
1104
|
+
backup: routeIndex > 0
|
|
1105
|
+
});
|
|
1106
|
+
let attempt = 0;
|
|
1107
|
+
// Soft-failure retry budget; the route-failover controller decides
|
|
1108
|
+
// whether the same seller should be retried or we move on. The
|
|
1109
|
+
// v1 "1 retry for 4xx fallback" loop is replaced with a
|
|
1110
|
+
// stateful decision per attempt.
|
|
1111
|
+
// eslint-disable-next-line no-constant-condition
|
|
1112
|
+
while (true) {
|
|
1113
|
+
try {
|
|
1114
|
+
logger.info("proxy.request.started", "proxy request started", {
|
|
1115
|
+
requestId,
|
|
1116
|
+
sellerKey,
|
|
1117
|
+
model: modelId,
|
|
1118
|
+
requestedModel: requestedModelId,
|
|
1119
|
+
endpoint,
|
|
1120
|
+
stream: Boolean((body as { stream?: unknown }).stream),
|
|
1121
|
+
attempt
|
|
1122
|
+
});
|
|
1123
|
+
const sellerUrl = normalizeSellerUrl(route.seller);
|
|
1124
|
+
const upstreamBody = this.applyResolvedModelToBody(endpoint, {
|
|
1125
|
+
...(body as Record<string, unknown>),
|
|
1126
|
+
requestId
|
|
1127
|
+
}, modelId);
|
|
1128
|
+
|
|
1129
|
+
logger.info("proxy.upstream_fetch.started", "proxy upstream fetch started", {
|
|
1130
|
+
requestId,
|
|
1131
|
+
sellerKey,
|
|
1132
|
+
model: modelId,
|
|
1133
|
+
endpoint,
|
|
1134
|
+
stream: Boolean((body as { stream?: unknown }).stream),
|
|
1135
|
+
upstreamBody
|
|
1136
|
+
});
|
|
1137
|
+
// v1.1 §17.5: refuse to auto-purchase once the session budget is
|
|
1138
|
+
// exhausted. The seller is treated as "no auto-purchase available"
|
|
1139
|
+
// and the request fails over to the next candidate.
|
|
1140
|
+
if (!this.routeFailover.canAutoPurchase()) {
|
|
1141
|
+
logger.warn("purchase.budget.exceeded", "session auto-purchase budget exhausted; failing over without buying", {
|
|
1142
|
+
requestId,
|
|
1143
|
+
sellerKey,
|
|
1144
|
+
model: modelId,
|
|
1145
|
+
endpoint,
|
|
1146
|
+
routeIndex
|
|
1147
|
+
});
|
|
1148
|
+
lastError = new Error("auto-purchase budget exceeded for this session");
|
|
1149
|
+
break;
|
|
1150
|
+
}
|
|
1151
|
+
// v1.1: a purchase failure means the seller is unreachable for
|
|
1152
|
+
// payment, not "transiently flapping". Do not retry the same
|
|
1153
|
+
// seller; transfer leftover to wasted and fail over immediately.
|
|
1154
|
+
let token: string;
|
|
1155
|
+
try {
|
|
1156
|
+
token = await this.getOrPurchaseToken(route);
|
|
1157
|
+
} catch (purchaseError) {
|
|
1158
|
+
logger.warn("purchase.failed", "seller auto-purchase failed; failing over without retry", {
|
|
1159
|
+
requestId,
|
|
1160
|
+
sellerKey,
|
|
1161
|
+
model: modelId,
|
|
1162
|
+
endpoint,
|
|
1163
|
+
errorMessage: this.failoverErrorMessage(purchaseError)
|
|
1164
|
+
});
|
|
1165
|
+
this.routeFailover.decide(
|
|
1166
|
+
{
|
|
1167
|
+
sellerId: sellerKey,
|
|
1168
|
+
errorKind: "deadline",
|
|
1169
|
+
errorMessage: this.failoverErrorMessage(purchaseError),
|
|
1170
|
+
attempt
|
|
1171
|
+
},
|
|
1172
|
+
routes.length - routeIndex
|
|
1173
|
+
);
|
|
1174
|
+
lastError = purchaseError;
|
|
1175
|
+
break;
|
|
1176
|
+
}
|
|
1177
|
+
// v1.2 §8: enforce a hard per-request deadline so a slow
|
|
1178
|
+
// upstream cannot hang the buyer. The deadline is honored by
|
|
1179
|
+
// the AbortController passed to `fetch`; sellers that observe
|
|
1180
|
+
// the `X-TokenBuddy-Deadline-Ms` header (PR-6) can propagate
|
|
1181
|
+
// it to their own upstream fetch via the same signal.
|
|
1182
|
+
const deadlineMs = this.requestDeadlineMs();
|
|
1183
|
+
const requestAc = new AbortController();
|
|
1184
|
+
const requestTimer = setTimeout(() => requestAc.abort(new Error("buyer deadline exceeded")), deadlineMs);
|
|
1185
|
+
const sendSellerRequest = async (token: string) => {
|
|
1186
|
+
const headers: Record<string, string> = {
|
|
1178
1187
|
"Content-Type": "application/json",
|
|
1179
1188
|
"Authorization": `Bearer ${token}`,
|
|
1180
1189
|
"X-Request-Id": requestId,
|
|
1181
1190
|
"Idempotency-Key": idempotencyKey
|
|
1182
|
-
}
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
stream: Boolean((body as { stream?: unknown }).stream)
|
|
1192
|
-
});
|
|
1193
|
-
let token = await this.getOrPurchaseToken(route);
|
|
1191
|
+
};
|
|
1192
|
+
headers["X-TokenBuddy-Deadline-Ms"] = String(deadlineMs);
|
|
1193
|
+
return fetch(`${sellerUrl}${endpoint}`, {
|
|
1194
|
+
method: "POST",
|
|
1195
|
+
headers,
|
|
1196
|
+
body: JSON.stringify(upstreamBody),
|
|
1197
|
+
signal: requestAc.signal
|
|
1198
|
+
});
|
|
1199
|
+
};
|
|
1194
1200
|
let upstreamResponse = await sendSellerRequest(token);
|
|
1195
1201
|
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1202
|
+
if (!upstreamResponse.ok) {
|
|
1203
|
+
const errorBody = await upstreamResponse.text();
|
|
1204
|
+
if (this.isInsufficientFundsResponse(upstreamResponse.status, errorBody)) {
|
|
1205
|
+
token = await this.recoverFromInsufficientFunds(route, token);
|
|
1206
|
+
upstreamResponse = await sendSellerRequest(token);
|
|
1207
|
+
if (upstreamResponse.ok) {
|
|
1208
|
+
logger.info("proxy.retry_after_402.succeeded", "seller request succeeded after one-shot auto purchase retry", {
|
|
1209
|
+
requestId,
|
|
1210
|
+
sellerKey,
|
|
1211
|
+
model: modelId,
|
|
1212
|
+
endpoint,
|
|
1213
|
+
durationMs: Date.now() - startedAt
|
|
1214
|
+
});
|
|
1215
|
+
} else {
|
|
1216
|
+
const retryErrorBody = await upstreamResponse.text();
|
|
1217
|
+
logger.warn("proxy.retry_after_402.failed", "seller request still failed after one-shot auto purchase retry", {
|
|
1218
|
+
requestId,
|
|
1219
|
+
sellerKey,
|
|
1220
|
+
model: modelId,
|
|
1221
|
+
endpoint,
|
|
1222
|
+
status: upstreamResponse.status,
|
|
1223
|
+
durationMs: Date.now() - startedAt
|
|
1224
|
+
});
|
|
1225
|
+
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1226
|
+
res.status(upstreamResponse.status);
|
|
1227
|
+
res.send(retryErrorBody);
|
|
1228
|
+
return;
|
|
1229
|
+
}
|
|
1209
1230
|
} else {
|
|
1210
|
-
|
|
1211
|
-
logger.warn("proxy.retry_after_402.failed", "seller request still failed after one-shot auto purchase retry", {
|
|
1231
|
+
logger.warn("proxy.upstream_fetch.failed", "proxy upstream fetch returned non-ok status", {
|
|
1212
1232
|
requestId,
|
|
1213
1233
|
sellerKey,
|
|
1214
1234
|
model: modelId,
|
|
@@ -1216,131 +1236,149 @@ export class TokenbuddyDaemon {
|
|
|
1216
1236
|
status: upstreamResponse.status,
|
|
1217
1237
|
durationMs: Date.now() - startedAt
|
|
1218
1238
|
});
|
|
1219
|
-
this.
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1239
|
+
const kind: FailureKind = this.classifyFailureStatus(upstreamResponse.status);
|
|
1240
|
+
const decision = this.routeFailover.decide(
|
|
1241
|
+
{
|
|
1242
|
+
sellerId: sellerKey,
|
|
1243
|
+
status: upstreamResponse.status,
|
|
1244
|
+
errorKind: kind,
|
|
1245
|
+
errorMessage: errorBody,
|
|
1246
|
+
attempt
|
|
1247
|
+
},
|
|
1248
|
+
routes.length - routeIndex
|
|
1249
|
+
);
|
|
1250
|
+
this.handleFailoverDecision(decision, { sellerKey, endpoint, routeIndex });
|
|
1251
|
+
if (decision.action === "fail_fast" || decision.action === "abort") {
|
|
1252
|
+
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1253
|
+
res.status(upstreamResponse.status);
|
|
1254
|
+
res.send(errorBody);
|
|
1255
|
+
return;
|
|
1256
|
+
}
|
|
1257
|
+
if (decision.action === "retry_same_seller") {
|
|
1258
|
+
attempt += 1;
|
|
1259
|
+
if (decision.retryDelayMs) {
|
|
1260
|
+
await new Promise<void>((resolve) => setTimeout(resolve, decision.retryDelayMs));
|
|
1261
|
+
}
|
|
1262
|
+
continue;
|
|
1263
|
+
}
|
|
1264
|
+
// failover_next
|
|
1265
|
+
lastError = new Error(`seller ${sellerKey} returned ${upstreamResponse.status}`);
|
|
1266
|
+
break;
|
|
1223
1267
|
}
|
|
1224
|
-
}
|
|
1225
|
-
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
// Successful response: stream or buffer.
|
|
1271
|
+
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1272
|
+
res.status(upstreamResponse.status);
|
|
1273
|
+
logger.info("proxy.upstream_fetch.succeeded", "proxy upstream fetch succeeded", {
|
|
1226
1274
|
requestId,
|
|
1227
1275
|
sellerKey,
|
|
1228
1276
|
model: modelId,
|
|
1229
1277
|
endpoint,
|
|
1230
1278
|
status: upstreamResponse.status,
|
|
1231
|
-
|
|
1279
|
+
stream: Boolean((body as { stream?: unknown }).stream)
|
|
1232
1280
|
});
|
|
1233
|
-
if (this.shouldFailoverStatus(upstreamResponse.status) && routeIndex < routes.length - 1) {
|
|
1234
|
-
lastError = new Error(`seller ${sellerKey} returned ${upstreamResponse.status}`);
|
|
1235
|
-
this.logFailover(route, endpoint, routeIndex, "upstream_status", upstreamResponse.status);
|
|
1236
|
-
continue;
|
|
1237
|
-
}
|
|
1238
|
-
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1239
|
-
res.status(upstreamResponse.status);
|
|
1240
|
-
res.send(errorBody);
|
|
1241
|
-
return;
|
|
1242
|
-
}
|
|
1243
|
-
}
|
|
1244
|
-
|
|
1245
|
-
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1246
|
-
res.status(upstreamResponse.status);
|
|
1247
|
-
logger.info("proxy.upstream_fetch.succeeded", "proxy upstream fetch succeeded", {
|
|
1248
|
-
requestId,
|
|
1249
|
-
sellerKey,
|
|
1250
|
-
model: modelId,
|
|
1251
|
-
endpoint,
|
|
1252
|
-
status: upstreamResponse.status,
|
|
1253
|
-
stream: Boolean((body as { stream?: unknown }).stream)
|
|
1254
|
-
});
|
|
1255
1281
|
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
}
|
|
1263
|
-
let bytes = 0;
|
|
1264
|
-
const decoder = new TextDecoder();
|
|
1265
|
-
const responsesStreamNormalizer = new ResponsesStreamNormalizer();
|
|
1266
|
-
const settlementExtractor = new SellerSettlementStreamExtractor();
|
|
1267
|
-
while (true) {
|
|
1268
|
-
const { done, value } = await reader.read();
|
|
1269
|
-
if (done) {
|
|
1270
|
-
break;
|
|
1271
|
-
}
|
|
1272
|
-
bytes += value.byteLength;
|
|
1273
|
-
const chunk = decoder.decode(value, { stream: true });
|
|
1274
|
-
const sellerChunk = settlementExtractor.push(chunk);
|
|
1275
|
-
if (sellerChunk.length === 0) {
|
|
1276
|
-
continue;
|
|
1282
|
+
const contentType = upstreamResponse.headers.get("content-type") || "";
|
|
1283
|
+
if (contentType.includes("text/event-stream") || Boolean((body as { stream?: unknown }).stream)) {
|
|
1284
|
+
const reader = upstreamResponse.body?.getReader();
|
|
1285
|
+
if (!reader) {
|
|
1286
|
+
res.end();
|
|
1287
|
+
return;
|
|
1277
1288
|
}
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1289
|
+
let bytes = 0;
|
|
1290
|
+
const decoder = new TextDecoder();
|
|
1291
|
+
const settlementExtractor = new SellerSettlementStreamExtractor();
|
|
1292
|
+
while (true) {
|
|
1293
|
+
const { done, value } = await reader.read();
|
|
1294
|
+
if (done) {
|
|
1295
|
+
break;
|
|
1296
|
+
}
|
|
1297
|
+
bytes += value.byteLength;
|
|
1298
|
+
const chunk = decoder.decode(value, { stream: true });
|
|
1299
|
+
// 透明代理:把 seller 的 SSE 字节原样转给客户端,只剥离我们注入的
|
|
1300
|
+
// tokenbuddy.settlement 事件(不让客户端看到内部记账字段)。除此之外
|
|
1301
|
+
// 不做任何协议转换——卖方格式 bug(如 chat.completion.chunk prefix、
|
|
1302
|
+
// 缺 event: 行)由卖方修,buyer 不兜底。
|
|
1303
|
+
const sellerChunk = settlementExtractor.push(chunk);
|
|
1304
|
+
if (sellerChunk.length > 0) {
|
|
1305
|
+
res.write(sellerChunk);
|
|
1282
1306
|
}
|
|
1283
|
-
} else {
|
|
1284
|
-
res.write(sellerChunk);
|
|
1285
1307
|
}
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1308
|
+
// flush TextDecoder 内部 buffer:stream:true 模式下最后可能留有几个字节的
|
|
1309
|
+
// 不完整 UTF-8 序列(多字节字符被切到下一 chunk 的场景),不调 stream:false
|
|
1310
|
+
// flush 就 break 会丢这批字节。上面的 stream 末尾事件(done / completed)
|
|
1311
|
+
// 之前被吞掉就是这个原因。
|
|
1312
|
+
const decoderTail = decoder.decode();
|
|
1313
|
+
if (decoderTail.length > 0) {
|
|
1314
|
+
const sellerTail = settlementExtractor.push(decoderTail);
|
|
1315
|
+
if (sellerTail.length > 0) {
|
|
1316
|
+
res.write(sellerTail);
|
|
1293
1317
|
}
|
|
1294
|
-
} else {
|
|
1295
|
-
res.write(settlementTrailing.downstream);
|
|
1296
1318
|
}
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
if (trailing.length > 0) {
|
|
1301
|
-
res.write(`${trailing}\n\n`);
|
|
1319
|
+
const settlementTrailing = settlementExtractor.finish();
|
|
1320
|
+
if (settlementTrailing.downstream.length > 0) {
|
|
1321
|
+
res.write(settlementTrailing.downstream);
|
|
1302
1322
|
}
|
|
1323
|
+
res.end();
|
|
1324
|
+
this.recordReconciledInference(
|
|
1325
|
+
route,
|
|
1326
|
+
endpoint,
|
|
1327
|
+
requestId,
|
|
1328
|
+
{ promptTokens: 0, completionTokens: 0, billedMicros: Math.max(1, bytes) },
|
|
1329
|
+
this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(),
|
|
1330
|
+
this.inferPromptForHash(body)
|
|
1331
|
+
);
|
|
1332
|
+
return;
|
|
1303
1333
|
}
|
|
1304
|
-
|
|
1334
|
+
|
|
1335
|
+
const responseBody = await upstreamResponse.text();
|
|
1336
|
+
res.send(responseBody);
|
|
1337
|
+
const usage = this.readUsage(responseBody);
|
|
1305
1338
|
this.recordReconciledInference(
|
|
1306
1339
|
route,
|
|
1307
1340
|
endpoint,
|
|
1308
1341
|
requestId,
|
|
1309
|
-
|
|
1310
|
-
this.parseSellerSettlementSummary(upstreamResponse.headers)
|
|
1311
|
-
this.inferPromptForHash(body)
|
|
1342
|
+
usage,
|
|
1343
|
+
this.parseSellerSettlementSummary(upstreamResponse.headers),
|
|
1344
|
+
this.inferPromptForHash(body),
|
|
1345
|
+
responseBody
|
|
1312
1346
|
);
|
|
1313
1347
|
return;
|
|
1348
|
+
} catch (routeError: unknown) {
|
|
1349
|
+
lastError = routeError;
|
|
1350
|
+
const kind: FailureKind = "deadline";
|
|
1351
|
+
const decision = this.routeFailover.decide(
|
|
1352
|
+
{
|
|
1353
|
+
sellerId: sellerKey,
|
|
1354
|
+
errorKind: kind,
|
|
1355
|
+
errorMessage: this.failoverErrorMessage(routeError),
|
|
1356
|
+
attempt
|
|
1357
|
+
},
|
|
1358
|
+
routes.length - routeIndex
|
|
1359
|
+
);
|
|
1360
|
+
this.handleFailoverDecision(decision, { sellerKey, endpoint, routeIndex, reason: "exception" });
|
|
1361
|
+
logger.warn("proxy.route.failed", "seller route failed before response", {
|
|
1362
|
+
requestId,
|
|
1363
|
+
sellerKey,
|
|
1364
|
+
model: modelId,
|
|
1365
|
+
endpoint,
|
|
1366
|
+
errorMessage: this.failoverErrorMessage(routeError),
|
|
1367
|
+
durationMs: Date.now() - startedAt
|
|
1368
|
+
});
|
|
1369
|
+
if (decision.action === "retry_same_seller") {
|
|
1370
|
+
attempt += 1;
|
|
1371
|
+
if (decision.retryDelayMs) {
|
|
1372
|
+
await new Promise<void>((resolve) => setTimeout(resolve, decision.retryDelayMs));
|
|
1373
|
+
}
|
|
1374
|
+
continue;
|
|
1375
|
+
}
|
|
1376
|
+
if (decision.action === "fail_fast" || decision.action === "abort") {
|
|
1377
|
+
throw routeError;
|
|
1378
|
+
}
|
|
1379
|
+
// failover_next
|
|
1380
|
+
break;
|
|
1314
1381
|
}
|
|
1315
|
-
|
|
1316
|
-
const responseBody = await upstreamResponse.text();
|
|
1317
|
-
res.send(responseBody);
|
|
1318
|
-
const usage = this.readUsage(responseBody);
|
|
1319
|
-
this.recordReconciledInference(
|
|
1320
|
-
route,
|
|
1321
|
-
endpoint,
|
|
1322
|
-
requestId,
|
|
1323
|
-
usage,
|
|
1324
|
-
this.parseSellerSettlementSummary(upstreamResponse.headers),
|
|
1325
|
-
this.inferPromptForHash(body),
|
|
1326
|
-
responseBody
|
|
1327
|
-
);
|
|
1328
|
-
return;
|
|
1329
|
-
} catch (routeError: unknown) {
|
|
1330
|
-
lastError = routeError;
|
|
1331
|
-
logger.warn("proxy.route.failed", "seller route failed before response", {
|
|
1332
|
-
requestId,
|
|
1333
|
-
sellerKey,
|
|
1334
|
-
model: modelId,
|
|
1335
|
-
endpoint,
|
|
1336
|
-
errorMessage: this.failoverErrorMessage(routeError),
|
|
1337
|
-
durationMs: Date.now() - startedAt
|
|
1338
|
-
});
|
|
1339
|
-
if (!res.headersSent && routeIndex < routes.length - 1) {
|
|
1340
|
-
this.logFailover(route, endpoint, routeIndex, "exception");
|
|
1341
|
-
continue;
|
|
1342
|
-
}
|
|
1343
|
-
throw routeError;
|
|
1344
1382
|
}
|
|
1345
1383
|
}
|
|
1346
1384
|
|
|
@@ -1419,6 +1457,47 @@ export class TokenbuddyDaemon {
|
|
|
1419
1457
|
});
|
|
1420
1458
|
});
|
|
1421
1459
|
|
|
1460
|
+
// v1.2 §18.11: control plane snapshot of the prewarm cache + seller
|
|
1461
|
+
// pool + credit tracker. `tb doctor` reads this to render the
|
|
1462
|
+
// recovery / prewarm / credit summary block.
|
|
1463
|
+
controlApp.get("/v1.2/prewarm", (req, res) => {
|
|
1464
|
+
const prewarmEntries = this.prewarmCache.snapshot().map((entry) => ({
|
|
1465
|
+
modelId: entry.modelId,
|
|
1466
|
+
protocol: entry.protocol,
|
|
1467
|
+
paymentMethod: entry.paymentMethod,
|
|
1468
|
+
state: entry.state,
|
|
1469
|
+
candidateCount: entry.candidates.length,
|
|
1470
|
+
warmedAt: entry.warmedAt,
|
|
1471
|
+
ttlMs: entry.ttlMs,
|
|
1472
|
+
consecutiveWarmingFailures: entry.consecutiveWarmingFailures
|
|
1473
|
+
}));
|
|
1474
|
+
const poolSnapshot = this.sellerPool.snapshot().map((entry) => ({
|
|
1475
|
+
sellerId: entry.sellerId,
|
|
1476
|
+
url: entry.url,
|
|
1477
|
+
circuit: entry.circuit,
|
|
1478
|
+
consecutiveFailures: entry.consecutiveFailures,
|
|
1479
|
+
lastSuccessAt: entry.lastSuccessAt,
|
|
1480
|
+
lastFailAt: entry.lastFailAt,
|
|
1481
|
+
healthScore: entry.healthScore
|
|
1482
|
+
}));
|
|
1483
|
+
const creditSummary = this.creditTracker.summary();
|
|
1484
|
+
const focusSet = this.resolveFocusSet();
|
|
1485
|
+
const schedulerStats = this.prewarmScheduler.stats();
|
|
1486
|
+
res.status(200).json({
|
|
1487
|
+
prewarm: {
|
|
1488
|
+
entries: prewarmEntries,
|
|
1489
|
+
size: prewarmEntries.length
|
|
1490
|
+
},
|
|
1491
|
+
pool: {
|
|
1492
|
+
size: poolSnapshot.length,
|
|
1493
|
+
entries: poolSnapshot
|
|
1494
|
+
},
|
|
1495
|
+
credit: creditSummary,
|
|
1496
|
+
focusSet,
|
|
1497
|
+
scheduler: schedulerStats
|
|
1498
|
+
});
|
|
1499
|
+
});
|
|
1500
|
+
|
|
1422
1501
|
controlApp.get("/sellers", async (req, res) => {
|
|
1423
1502
|
try {
|
|
1424
1503
|
const registry = await this.fetchRegistry();
|
|
@@ -1632,11 +1711,56 @@ export class TokenbuddyDaemon {
|
|
|
1632
1711
|
sellerRegistryUrl: this.config.sellerRegistryUrl,
|
|
1633
1712
|
selectionMode: this.selectionMode
|
|
1634
1713
|
});
|
|
1714
|
+
|
|
1715
|
+
// v1.2 §18.5: kick off the on-demand prewarm pipeline. The startup
|
|
1716
|
+
// sweep runs after the configured jitter window (5-10s by default);
|
|
1717
|
+
// subsequent refreshes run on the `idleIntervalMs` cadence and the
|
|
1718
|
+
// `forwardProxyRequest` hot path can dispatch lazy prewarms on miss.
|
|
1719
|
+
this.prewarmScheduler.start();
|
|
1720
|
+
void this.runStartupPrewarmSweep();
|
|
1721
|
+
}
|
|
1722
|
+
|
|
1723
|
+
/**
|
|
1724
|
+
* v1.2 §18.4: build the focus set from the explicit config, the env
|
|
1725
|
+
* override, and the historical usage in the buyer store. The order of
|
|
1726
|
+
* precedence: explicit config > env > historical > empty.
|
|
1727
|
+
*/
|
|
1728
|
+
private resolveFocusSet(): string[] {
|
|
1729
|
+
const explicit = this.config.warmupModels ?? [];
|
|
1730
|
+
if (explicit.length > 0) {
|
|
1731
|
+
return explicit;
|
|
1732
|
+
}
|
|
1733
|
+
const envRaw = process.env.TB_BUYER_WARMUP_MODELS || "";
|
|
1734
|
+
const envModels = envRaw.split(",").map((s) => s.trim()).filter(Boolean);
|
|
1735
|
+
if (envModels.length > 0) {
|
|
1736
|
+
return envModels;
|
|
1737
|
+
}
|
|
1738
|
+
return this.tokenStore.recentModels(7, 5);
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
private async runStartupPrewarmSweep(): Promise<void> {
|
|
1742
|
+
const focusSet = this.resolveFocusSet();
|
|
1743
|
+
if (focusSet.length === 0) {
|
|
1744
|
+
logger.info("prewarm.startup.skipped", "no focus set configured; relying on lazy prewarms", {});
|
|
1745
|
+
return;
|
|
1746
|
+
}
|
|
1747
|
+
logger.info("prewarm.startup.scheduled", "startup prewarm sweep scheduled", {
|
|
1748
|
+
focusSetSize: focusSet.length,
|
|
1749
|
+
focusSet: focusSet.slice(0, 20)
|
|
1750
|
+
});
|
|
1751
|
+
try {
|
|
1752
|
+
await this.prewarmScheduler.runStartupPrewarm(focusSet);
|
|
1753
|
+
} catch (err) {
|
|
1754
|
+
logger.warn("prewarm.startup.failed", "startup prewarm sweep failed", {
|
|
1755
|
+
errorMessage: err instanceof Error ? err.message : String(err)
|
|
1756
|
+
});
|
|
1757
|
+
}
|
|
1635
1758
|
}
|
|
1636
1759
|
|
|
1637
1760
|
public stop() {
|
|
1638
1761
|
if (this.controlServer) this.controlServer.close();
|
|
1639
1762
|
if (this.proxyServer) this.proxyServer.close();
|
|
1763
|
+
void this.prewarmScheduler.stop();
|
|
1640
1764
|
this.tokenStore.close();
|
|
1641
1765
|
}
|
|
1642
1766
|
}
|