@tokenbuddy/tokenbuddy 1.0.8 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +13 -0
- package/dist/src/buyer-store.d.ts.map +1 -1
- package/dist/src/buyer-store.js +21 -2
- package/dist/src/buyer-store.js.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +54 -0
- package/dist/src/cli.js.map +1 -1
- package/dist/src/credit-tracker.d.ts +118 -0
- package/dist/src/credit-tracker.d.ts.map +1 -0
- package/dist/src/credit-tracker.js +220 -0
- package/dist/src/credit-tracker.js.map +1 -0
- package/dist/src/daemon.d.ts +49 -4
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +541 -405
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/model-index.d.ts +86 -0
- package/dist/src/model-index.d.ts.map +1 -0
- package/dist/src/model-index.js +214 -0
- package/dist/src/model-index.js.map +1 -0
- package/dist/src/prewarm-cache.d.ts +149 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -0
- package/dist/src/prewarm-cache.js +288 -0
- package/dist/src/prewarm-cache.js.map +1 -0
- package/dist/src/prewarm-scheduler.d.ts +150 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -0
- package/dist/src/prewarm-scheduler.js +484 -0
- package/dist/src/prewarm-scheduler.js.map +1 -0
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +9 -1
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/route-failover.d.ts +96 -0
- package/dist/src/route-failover.d.ts.map +1 -0
- package/dist/src/route-failover.js +177 -0
- package/dist/src/route-failover.js.map +1 -0
- package/dist/src/seller-catalog.d.ts +26 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js +40 -0
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +127 -0
- package/dist/src/seller-pool.d.ts.map +1 -0
- package/dist/src/seller-pool.js +243 -0
- package/dist/src/seller-pool.js.map +1 -0
- package/dist/src/stream-failover.d.ts +78 -0
- package/dist/src/stream-failover.d.ts.map +1 -0
- package/dist/src/stream-failover.js +93 -0
- package/dist/src/stream-failover.js.map +1 -0
- package/package.json +1 -1
- package/src/buyer-store.ts +32 -2
- package/src/cli.ts +61 -0
- package/src/credit-tracker.test.ts +165 -0
- package/src/credit-tracker.ts +269 -0
- package/src/daemon.ts +569 -445
- package/src/model-index.test.ts +184 -0
- package/src/model-index.ts +266 -0
- package/src/prewarm-cache.test.ts +281 -0
- package/src/prewarm-cache.ts +373 -0
- package/src/prewarm-scheduler.test.ts +367 -0
- package/src/prewarm-scheduler.ts +581 -0
- package/src/provider-install.ts +9 -1
- package/src/route-failover.test.ts +193 -0
- package/src/route-failover.ts +233 -0
- package/src/seller-catalog-413.test.ts +61 -0
- package/src/seller-catalog.ts +47 -0
- package/src/seller-pool.test.ts +231 -0
- package/src/seller-pool.ts +333 -0
- package/src/stream-failover.test.ts +52 -0
- package/src/stream-failover.ts +129 -0
- package/src/thousand-seller.test.ts +151 -0
- package/tests/daemon-413-fallback.test.ts +92 -0
- package/tests/e2e.test.ts +3 -2
- package/tests/tokenbuddy.test.ts +68 -11
package/dist/src/daemon.js
CHANGED
|
@@ -5,7 +5,13 @@ import * as fs from "fs";
|
|
|
5
5
|
import { createModuleLogger } from "@tokenbuddy/logging";
|
|
6
6
|
import { BuyerStore } from "./buyer-store.js";
|
|
7
7
|
import { applyProviderInstall, detectProviders, previewProviderInstall, rollbackProviderInstall, } from "./provider-install.js";
|
|
8
|
-
import { discoverSellerBackedModels,
|
|
8
|
+
import { discoverSellerBackedModels, fetchSellerRegistry, normalizeSellerUrl, RegistryTooLargeError, } from "./seller-catalog.js";
|
|
9
|
+
import { ModelIndex } from "./model-index.js";
|
|
10
|
+
import { PrewarmCache } from "./prewarm-cache.js";
|
|
11
|
+
import { CreditTracker } from "./credit-tracker.js";
|
|
12
|
+
import { SellerPool } from "./seller-pool.js";
|
|
13
|
+
import { RouteFailover } from "./route-failover.js";
|
|
14
|
+
import { PrewarmScheduler } from "./prewarm-scheduler.js";
|
|
9
15
|
const logger = createModuleLogger("tb-proxyd");
|
|
10
16
|
const PROXY_JSON_BODY_LIMIT = "10mb";
|
|
11
17
|
function numericHeaderField(value) {
|
|
@@ -18,197 +24,6 @@ function numericHeaderField(value) {
|
|
|
18
24
|
}
|
|
19
25
|
return undefined;
|
|
20
26
|
}
|
|
21
|
-
class ResponsesStreamNormalizer {
|
|
22
|
-
pending = "";
|
|
23
|
-
state = new Map();
|
|
24
|
-
push(chunk) {
|
|
25
|
-
this.pending += chunk;
|
|
26
|
-
const blocks = this.pending.split("\n\n");
|
|
27
|
-
this.pending = blocks.pop() || "";
|
|
28
|
-
return blocks
|
|
29
|
-
.map((block) => this.normalizeBlock(block))
|
|
30
|
-
.filter((block) => block.length > 0)
|
|
31
|
-
.join("\n\n");
|
|
32
|
-
}
|
|
33
|
-
finish() {
|
|
34
|
-
if (!this.pending.trim()) {
|
|
35
|
-
return "";
|
|
36
|
-
}
|
|
37
|
-
const block = this.normalizeBlock(this.pending);
|
|
38
|
-
this.pending = "";
|
|
39
|
-
return block;
|
|
40
|
-
}
|
|
41
|
-
normalizeBlock(block) {
|
|
42
|
-
if (!block.trim()) {
|
|
43
|
-
return "";
|
|
44
|
-
}
|
|
45
|
-
// Each \n\n separates an event in SSE format
|
|
46
|
-
const subBlocks = block.split("\n\n");
|
|
47
|
-
const output = [];
|
|
48
|
-
for (const sub of subBlocks) {
|
|
49
|
-
if (!sub.trim() || sub.trim() === "data: [DONE]") {
|
|
50
|
-
if (sub.trim())
|
|
51
|
-
output.push(sub);
|
|
52
|
-
continue;
|
|
53
|
-
}
|
|
54
|
-
const lines = sub.split("\n");
|
|
55
|
-
const eventLine = lines.find((l) => l.startsWith("event:"));
|
|
56
|
-
const dataLine = lines.find((l) => l.startsWith("data:"));
|
|
57
|
-
if (!dataLine) {
|
|
58
|
-
output.push(sub);
|
|
59
|
-
continue;
|
|
60
|
-
}
|
|
61
|
-
const rawData = dataLine.replace(/^data:\s?/, "");
|
|
62
|
-
if (rawData === "[DONE]") {
|
|
63
|
-
output.push(sub);
|
|
64
|
-
continue;
|
|
65
|
-
}
|
|
66
|
-
let payload;
|
|
67
|
-
try {
|
|
68
|
-
payload = JSON.parse(rawData);
|
|
69
|
-
}
|
|
70
|
-
catch {
|
|
71
|
-
output.push(sub);
|
|
72
|
-
continue;
|
|
73
|
-
}
|
|
74
|
-
const eventName = (eventLine?.replace(/^event:\s?/, "") || payload?.type);
|
|
75
|
-
if (!eventName || !eventName.startsWith("response.")) {
|
|
76
|
-
output.push(sub);
|
|
77
|
-
continue;
|
|
78
|
-
}
|
|
79
|
-
// When upstream already sends content_part.added, record it in state
|
|
80
|
-
if (eventName === "response.content_part.added" &&
|
|
81
|
-
payload?.item_id) {
|
|
82
|
-
const current = this.state.get(payload.item_id);
|
|
83
|
-
if (current)
|
|
84
|
-
current.contentPartStarted = true;
|
|
85
|
-
output.push(sub);
|
|
86
|
-
continue;
|
|
87
|
-
}
|
|
88
|
-
// response.output_item.added: inject content_part.added only if upstream hasn't
|
|
89
|
-
if (eventName === "response.output_item.added" &&
|
|
90
|
-
payload?.item?.type === "message" &&
|
|
91
|
-
payload?.item?.id) {
|
|
92
|
-
const itemId = payload.item.id;
|
|
93
|
-
const current = this.getState(itemId);
|
|
94
|
-
const item = { ...payload.item };
|
|
95
|
-
item.content = [{ type: "output_text", text: "", annotations: [] }];
|
|
96
|
-
output.push(this.serializeEvent(eventName, {
|
|
97
|
-
...payload,
|
|
98
|
-
output_index: payload.output_index ?? 0,
|
|
99
|
-
item
|
|
100
|
-
}));
|
|
101
|
-
if (!current.contentPartStarted) {
|
|
102
|
-
current.contentPartStarted = true;
|
|
103
|
-
output.push(this.serializeEvent("response.content_part.added", {
|
|
104
|
-
type: "response.content_part.added",
|
|
105
|
-
item_id: itemId,
|
|
106
|
-
output_index: payload.output_index ?? 0,
|
|
107
|
-
content_index: 0,
|
|
108
|
-
part: { type: "output_text", text: "", annotations: [] }
|
|
109
|
-
}));
|
|
110
|
-
}
|
|
111
|
-
continue;
|
|
112
|
-
}
|
|
113
|
-
// response.output_text.delta: inject content_part.added if missing
|
|
114
|
-
if (eventName === "response.output_text.delta" && payload?.item_id) {
|
|
115
|
-
const itemId = payload.item_id;
|
|
116
|
-
const current = this.getState(itemId);
|
|
117
|
-
if (!current.contentPartStarted) {
|
|
118
|
-
current.contentPartStarted = true;
|
|
119
|
-
output.push(this.serializeEvent("response.content_part.added", {
|
|
120
|
-
type: "response.content_part.added",
|
|
121
|
-
item_id: itemId,
|
|
122
|
-
output_index: payload.output_index ?? 0,
|
|
123
|
-
content_index: payload.content_index ?? 0,
|
|
124
|
-
part: { type: "output_text", text: "", annotations: [] }
|
|
125
|
-
}));
|
|
126
|
-
}
|
|
127
|
-
const deltaText = typeof payload.delta === "string"
|
|
128
|
-
? payload.delta
|
|
129
|
-
: typeof payload.delta?.text === "string"
|
|
130
|
-
? payload.delta.text
|
|
131
|
-
: "";
|
|
132
|
-
current.text += deltaText;
|
|
133
|
-
output.push(this.serializeEvent(eventName, {
|
|
134
|
-
...payload,
|
|
135
|
-
output_index: payload.output_index ?? 0,
|
|
136
|
-
content_index: payload.content_index ?? 0
|
|
137
|
-
}));
|
|
138
|
-
continue;
|
|
139
|
-
}
|
|
140
|
-
// response.output_text.done: also emit content_part.done
|
|
141
|
-
if (eventName === "response.output_text.done" && payload?.item_id) {
|
|
142
|
-
const itemId = payload.item_id;
|
|
143
|
-
const current = this.getState(itemId);
|
|
144
|
-
output.push(this.serializeEvent(eventName, {
|
|
145
|
-
...payload,
|
|
146
|
-
output_index: payload.output_index ?? 0,
|
|
147
|
-
content_index: payload.content_index ?? 0
|
|
148
|
-
}));
|
|
149
|
-
output.push(this.serializeEvent("response.content_part.done", {
|
|
150
|
-
type: "response.content_part.done",
|
|
151
|
-
item_id: itemId,
|
|
152
|
-
output_index: payload.output_index ?? 0,
|
|
153
|
-
content_index: payload.content_index ?? 0,
|
|
154
|
-
part: { type: "output_text", text: current.text, annotations: [] }
|
|
155
|
-
}));
|
|
156
|
-
continue;
|
|
157
|
-
}
|
|
158
|
-
// response.output_item.done: normalize content to output_text type
|
|
159
|
-
if (eventName === "response.output_item.done" &&
|
|
160
|
-
payload?.item?.type === "message" &&
|
|
161
|
-
payload?.item?.id) {
|
|
162
|
-
const itemId = payload.item.id;
|
|
163
|
-
const current = this.getState(itemId);
|
|
164
|
-
const item = {
|
|
165
|
-
...payload.item,
|
|
166
|
-
content: [{ type: "output_text", text: current.text, annotations: [] }]
|
|
167
|
-
};
|
|
168
|
-
output.push(this.serializeEvent(eventName, {
|
|
169
|
-
...payload,
|
|
170
|
-
output_index: payload.output_index ?? 0,
|
|
171
|
-
item
|
|
172
|
-
}));
|
|
173
|
-
continue;
|
|
174
|
-
}
|
|
175
|
-
// response.completed: patch output if empty
|
|
176
|
-
if (eventName === "response.completed" && payload?.response) {
|
|
177
|
-
const response = { ...payload.response };
|
|
178
|
-
if (!Array.isArray(response.output) || response.output.length === 0) {
|
|
179
|
-
const first = this.state.values().next()
|
|
180
|
-
.value;
|
|
181
|
-
if (first) {
|
|
182
|
-
response.output = [{
|
|
183
|
-
id: first.itemId,
|
|
184
|
-
type: "message",
|
|
185
|
-
status: "completed",
|
|
186
|
-
role: "assistant",
|
|
187
|
-
content: [{ type: "output_text", text: first.text, annotations: [] }]
|
|
188
|
-
}];
|
|
189
|
-
response.output_text = first.text;
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
output.push(this.serializeEvent(eventName, { ...payload, response }));
|
|
193
|
-
continue;
|
|
194
|
-
}
|
|
195
|
-
// All other events: pass through unchanged
|
|
196
|
-
output.push(sub);
|
|
197
|
-
}
|
|
198
|
-
return output.join("\n\n");
|
|
199
|
-
}
|
|
200
|
-
getState(itemId) {
|
|
201
|
-
const current = this.state.get(itemId);
|
|
202
|
-
if (current)
|
|
203
|
-
return current;
|
|
204
|
-
const created = { itemId, text: "", contentPartStarted: false };
|
|
205
|
-
this.state.set(itemId, created);
|
|
206
|
-
return created;
|
|
207
|
-
}
|
|
208
|
-
serializeEvent(name, data) {
|
|
209
|
-
return `event: ${name}\ndata: ${JSON.stringify(data)}`;
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
27
|
class SellerSettlementStreamExtractor {
|
|
213
28
|
pending = "";
|
|
214
29
|
settlement;
|
|
@@ -289,6 +104,25 @@ export class TokenbuddyDaemon {
|
|
|
289
104
|
selectionMode;
|
|
290
105
|
selectedSellerId;
|
|
291
106
|
activePurchases = new Map();
|
|
107
|
+
// v1.2 fallback pipeline: model-index, prewarm-cache, credit-tracker,
|
|
108
|
+
// pool, and route-failover together replace the v1
|
|
109
|
+
// "fetchRegistry + manifest per request" path.
|
|
110
|
+
modelIndex = new ModelIndex();
|
|
111
|
+
prewarmCache = new PrewarmCache();
|
|
112
|
+
creditTracker = new CreditTracker();
|
|
113
|
+
sellerPool = new SellerPool({
|
|
114
|
+
modelIndex: this.modelIndex,
|
|
115
|
+
cache: this.prewarmCache,
|
|
116
|
+
creditTracker: this.creditTracker
|
|
117
|
+
});
|
|
118
|
+
routeFailover = new RouteFailover({
|
|
119
|
+
pool: this.sellerPool,
|
|
120
|
+
creditTracker: this.creditTracker
|
|
121
|
+
});
|
|
122
|
+
// v1.2 §18.5: assigned in the constructor because the scheduler needs
|
|
123
|
+
// config-derived knobs. The `!` opts out of strict-initialization so the
|
|
124
|
+
// rest of the class can treat it as non-nullable.
|
|
125
|
+
prewarmScheduler;
|
|
292
126
|
constructor(config) {
|
|
293
127
|
this.tokenStore = new BuyerStore({ dbPath: config.dbPath });
|
|
294
128
|
const routingPreference = this.tokenStore.getDaemonRuntimeConfig("routing")
|
|
@@ -300,6 +134,43 @@ export class TokenbuddyDaemon {
|
|
|
300
134
|
"auto";
|
|
301
135
|
this.selectedSellerId =
|
|
302
136
|
config.selectedSellerId || routingPreference?.sellerId;
|
|
137
|
+
// v1.2 §18.5: scheduler is created here (not in the field initializer)
|
|
138
|
+
// because it needs the config-derived prober + idle interval.
|
|
139
|
+
Object.assign(this, {
|
|
140
|
+
prewarmScheduler: new PrewarmScheduler({
|
|
141
|
+
modelIndex: this.modelIndex,
|
|
142
|
+
cache: this.prewarmCache,
|
|
143
|
+
prober: this.buildHealthProber(config.warmupProbeTimeoutMs ?? 3000),
|
|
144
|
+
idleIntervalMs: (config.warmupRefreshIntervalSecs ?? 60) * 1000
|
|
145
|
+
})
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
buildHealthProber(timeoutMs) {
|
|
149
|
+
return async (seller, signal) => {
|
|
150
|
+
try {
|
|
151
|
+
const ac = new AbortController();
|
|
152
|
+
const timer = setTimeout(() => ac.abort(new Error("healthz timeout")), timeoutMs);
|
|
153
|
+
if (signal) {
|
|
154
|
+
if (signal.aborted) {
|
|
155
|
+
ac.abort(signal.reason);
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
signal.addEventListener("abort", () => ac.abort(signal.reason), { once: true });
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const startedAt = Date.now();
|
|
162
|
+
const res = await fetch(`${seller.url.replace(/\/+$/, "")}/healthz`, { signal: ac.signal });
|
|
163
|
+
clearTimeout(timer);
|
|
164
|
+
if (!res.ok) {
|
|
165
|
+
return { ok: false, latencyMs: Date.now() - startedAt, httpStatus: res.status, errorMessage: `healthz returned ${res.status}` };
|
|
166
|
+
}
|
|
167
|
+
return { ok: true, latencyMs: Date.now() - startedAt, httpStatus: res.status };
|
|
168
|
+
}
|
|
169
|
+
catch (err) {
|
|
170
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
171
|
+
return { ok: false, latencyMs: 0, errorMessage: message };
|
|
172
|
+
}
|
|
173
|
+
};
|
|
303
174
|
}
|
|
304
175
|
activeControlPort() {
|
|
305
176
|
const address = this.controlServer?.address?.();
|
|
@@ -309,8 +180,43 @@ export class TokenbuddyDaemon {
|
|
|
309
180
|
const address = this.proxyServer?.address?.();
|
|
310
181
|
return typeof address === "object" && address ? address.port : this.config.proxyPort;
|
|
311
182
|
}
|
|
183
|
+
// v1.2 §18.9: stale-cache fallback. The buyer remembers the last
|
|
184
|
+
// successfully fetched registry document and reuses it when the
|
|
185
|
+
// bootstrap returns 413 (`X-TokenBuddy-Registry-Too-Large: 1`). This
|
|
186
|
+
// trades freshness for availability: requests still route, but the
|
|
187
|
+
// model set is whatever was cached before the registry outgrew 1MB.
|
|
188
|
+
lastRegistrySnapshot = null;
|
|
312
189
|
async fetchRegistry() {
|
|
313
|
-
|
|
190
|
+
try {
|
|
191
|
+
const registry = await fetchSellerRegistry(this.config.sellerRegistryUrl);
|
|
192
|
+
this.modelIndex.rebuild(registry.sellers, {
|
|
193
|
+
registryVersion: registry.version,
|
|
194
|
+
defaultSellerId: registry.defaultSeller
|
|
195
|
+
});
|
|
196
|
+
this.sellerPool.sync();
|
|
197
|
+
this.lastRegistrySnapshot = registry;
|
|
198
|
+
return registry;
|
|
199
|
+
}
|
|
200
|
+
catch (err) {
|
|
201
|
+
// v1.2 §18.9: if the bootstrap returns 413, fall back to the
|
|
202
|
+
// last-known registry document. This keeps the buyer routing even
|
|
203
|
+
// when the registry temporarily outgrows the 1MB cap.
|
|
204
|
+
if (err instanceof RegistryTooLargeError && this.lastRegistrySnapshot) {
|
|
205
|
+
logger.warn("registry.stale_fallback", "registry returned 413; using last-known snapshot for routing", {
|
|
206
|
+
sellerRegistryUrl: this.config.sellerRegistryUrl,
|
|
207
|
+
cachedVersion: this.lastRegistrySnapshot.version,
|
|
208
|
+
cachedSellers: this.lastRegistrySnapshot.sellers.length
|
|
209
|
+
});
|
|
210
|
+
const stale = this.lastRegistrySnapshot;
|
|
211
|
+
this.modelIndex.rebuild(stale.sellers, {
|
|
212
|
+
registryVersion: stale.version,
|
|
213
|
+
defaultSellerId: stale.defaultSeller
|
|
214
|
+
});
|
|
215
|
+
this.sellerPool.sync();
|
|
216
|
+
return stale;
|
|
217
|
+
}
|
|
218
|
+
throw err;
|
|
219
|
+
}
|
|
314
220
|
}
|
|
315
221
|
runtimeSummary() {
|
|
316
222
|
const sellerRoutingMode = this.selectedSellerId ? "fixed" : this.selectionMode;
|
|
@@ -413,45 +319,39 @@ export class TokenbuddyDaemon {
|
|
|
413
319
|
if (!paymentMethod || !["mock", "clawtip"].includes(paymentMethod)) {
|
|
414
320
|
throw new Error("mock or clawtip payment method is not configured as an enabled buyer payment method");
|
|
415
321
|
}
|
|
322
|
+
// v1.2: registry is the source of truth for routing. We rebuild the
|
|
323
|
+
// model-index once per request (cheap; index lookup is in-memory) so
|
|
324
|
+
// the response always reflects the latest seller list. The previous
|
|
325
|
+
// "fetchSellerManifest per candidate" path is removed in favor of
|
|
326
|
+
// pulling `models` directly off the registry entries.
|
|
416
327
|
const registry = await this.fetchRegistry();
|
|
417
|
-
const
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
sellerKey: seller.id,
|
|
432
|
-
model: modelId,
|
|
433
|
-
endpoint,
|
|
434
|
-
errorMessage: error instanceof Error ? error.message : String(error)
|
|
435
|
-
});
|
|
436
|
-
continue;
|
|
437
|
-
}
|
|
438
|
-
const protocols = manifestProtocols(manifest, seller);
|
|
439
|
-
const paymentMethods = manifestPaymentMethods(manifest, seller);
|
|
440
|
-
const modelIds = manifestModelIds(manifest);
|
|
441
|
-
if (!protocols.includes(protocol) || !paymentMethods.includes(paymentMethod) || !modelIds.includes(modelId)) {
|
|
442
|
-
continue;
|
|
443
|
-
}
|
|
444
|
-
routes.push({
|
|
445
|
-
seller,
|
|
446
|
-
manifest,
|
|
447
|
-
protocol,
|
|
448
|
-
modelId,
|
|
449
|
-
paymentMethod
|
|
450
|
-
});
|
|
328
|
+
const indexCandidates = this.modelIndex.sellersFor(modelId, { protocol, paymentMethod });
|
|
329
|
+
let ordered = indexCandidates;
|
|
330
|
+
if (this.selectionMode === "manual" && this.selectedSellerId) {
|
|
331
|
+
ordered = indexCandidates.filter((seller) => seller.id === this.selectedSellerId);
|
|
332
|
+
}
|
|
333
|
+
else if (this.selectionMode === "manual" && registry.defaultSeller) {
|
|
334
|
+
ordered = indexCandidates.filter((seller) => seller.id === registry.defaultSeller);
|
|
335
|
+
}
|
|
336
|
+
else if (registry.defaultSeller) {
|
|
337
|
+
// auto mode: default first, then backups in registry order
|
|
338
|
+
ordered = [
|
|
339
|
+
...indexCandidates.filter((seller) => seller.id === registry.defaultSeller),
|
|
340
|
+
...indexCandidates.filter((seller) => seller.id !== registry.defaultSeller)
|
|
341
|
+
];
|
|
451
342
|
}
|
|
452
|
-
if (
|
|
343
|
+
if (ordered.length === 0) {
|
|
453
344
|
throw new Error(`no compatible seller for ${endpoint} model ${modelId}`);
|
|
454
345
|
}
|
|
346
|
+
const poolById = new Map(this.sellerPool.snapshot().map((entry) => [entry.sellerId, entry]));
|
|
347
|
+
const routes = ordered.map((seller) => ({
|
|
348
|
+
seller,
|
|
349
|
+
manifest: null,
|
|
350
|
+
protocol,
|
|
351
|
+
modelId,
|
|
352
|
+
paymentMethod,
|
|
353
|
+
poolEntry: poolById.get(seller.id)
|
|
354
|
+
}));
|
|
455
355
|
logger.info("route.candidates.prewarmed", "seller route candidates prewarmed", {
|
|
456
356
|
model: modelId,
|
|
457
357
|
endpoint,
|
|
@@ -463,38 +363,51 @@ export class TokenbuddyDaemon {
|
|
|
463
363
|
});
|
|
464
364
|
return routes;
|
|
465
365
|
}
|
|
466
|
-
logRouteSelected(route, endpoint, routeIndex) {
|
|
467
|
-
logger.info("route.selected", "seller route selected", {
|
|
468
|
-
sellerKey: route.seller.id,
|
|
469
|
-
model: route.modelId,
|
|
470
|
-
endpoint,
|
|
471
|
-
protocol: route.protocol,
|
|
472
|
-
paymentMethod: route.paymentMethod,
|
|
473
|
-
routeIndex,
|
|
474
|
-
backup: routeIndex > 0
|
|
475
|
-
});
|
|
476
|
-
}
|
|
477
|
-
shouldFailoverStatus(status) {
|
|
478
|
-
return status === 429 || status >= 500;
|
|
479
|
-
}
|
|
480
|
-
logFailover(route, endpoint, routeIndex, reason, status) {
|
|
481
|
-
logger.warn("route.failover.triggered", "seller route failed over to backup candidate", {
|
|
482
|
-
sellerKey: route.seller.id,
|
|
483
|
-
model: route.modelId,
|
|
484
|
-
endpoint,
|
|
485
|
-
routeIndex,
|
|
486
|
-
reason,
|
|
487
|
-
status
|
|
488
|
-
});
|
|
489
|
-
}
|
|
490
366
|
failoverErrorMessage(error) {
|
|
491
367
|
return error instanceof Error ? error.message : String(error);
|
|
492
368
|
}
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
369
|
+
/**
|
|
370
|
+
* Map an HTTP status from a failed seller call to a `FailureKind` that
|
|
371
|
+
* the route-failover controller understands. Hard 4xx (other than
|
|
372
|
+
* auth/insufficient) means the seller is wrong for the request; 5xx
|
|
373
|
+
* and 429 are treated as transient and eligible for the soft-failure
|
|
374
|
+
* retry budget. The v1.1 "insufficient funds" check stays on the
|
|
375
|
+
* caller side because it short-circuits the failure path with a
|
|
376
|
+
* re-purchase.
|
|
377
|
+
*/
|
|
378
|
+
classifyFailureStatus(status) {
|
|
379
|
+
if (status === 401 || status === 403) {
|
|
380
|
+
return "auth_invalid";
|
|
381
|
+
}
|
|
382
|
+
if (status === 402) {
|
|
383
|
+
return "insufficient_funds";
|
|
384
|
+
}
|
|
385
|
+
if (status === 400 || status === 404 || status === 422) {
|
|
386
|
+
return "hard_4xx";
|
|
387
|
+
}
|
|
388
|
+
return "soft_5xx";
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Emit the structured failover log line. The decision itself is
|
|
392
|
+
* produced by `RouteFailover.decide`; this helper exists only to keep
|
|
393
|
+
* the controller loop readable.
|
|
394
|
+
*/
|
|
395
|
+
handleFailoverDecision(decision, context) {
|
|
396
|
+
if (decision.action === "retry_same_seller") {
|
|
397
|
+
return;
|
|
398
|
+
}
|
|
399
|
+
if (decision.action === "failover_next") {
|
|
400
|
+
logger.warn("route.failover.triggered", "seller route failed over to backup candidate", {
|
|
401
|
+
sellerKey: context.sellerKey,
|
|
402
|
+
endpoint: context.endpoint,
|
|
403
|
+
routeIndex: context.routeIndex,
|
|
404
|
+
reason: decision.reason,
|
|
405
|
+
status: context.status,
|
|
406
|
+
wastedCreditMicros: decision.wastedCreditMicros,
|
|
407
|
+
freshPurchase: decision.freshPurchase,
|
|
408
|
+
retryAttemptsBeforeFailover: decision.retryAttemptsBeforeFailover
|
|
409
|
+
});
|
|
410
|
+
}
|
|
498
411
|
}
|
|
499
412
|
async listSellerBackedModels() {
|
|
500
413
|
const catalog = await discoverSellerBackedModels(this.config.sellerRegistryUrl);
|
|
@@ -668,13 +581,58 @@ export class TokenbuddyDaemon {
|
|
|
668
581
|
}
|
|
669
582
|
return parsed;
|
|
670
583
|
}
|
|
584
|
+
/**
|
|
585
|
+
* v1.2 §8: hard per-request deadline. The buyer refuses to wait longer
|
|
586
|
+
* than this for a single seller; on expiry the request is aborted and
|
|
587
|
+
* the route-failover controller can either retry the same seller with
|
|
588
|
+
* a smaller body or fail over. Configurable via
|
|
589
|
+
* `TB_PROXYD_REQUEST_DEADLINE_MS` (default 30s).
|
|
590
|
+
*/
|
|
591
|
+
requestDeadlineMs() {
|
|
592
|
+
const raw = process.env.TB_PROXYD_REQUEST_DEADLINE_MS;
|
|
593
|
+
if (!raw) {
|
|
594
|
+
return 30_000;
|
|
595
|
+
}
|
|
596
|
+
const parsed = Number(raw);
|
|
597
|
+
if (!Number.isInteger(parsed) || parsed < 1000) {
|
|
598
|
+
return 30_000;
|
|
599
|
+
}
|
|
600
|
+
return parsed;
|
|
601
|
+
}
|
|
602
|
+
/**
|
|
603
|
+
* Safety margin subtracted from the cached token's `expiresAt` before
|
|
604
|
+
* deciding to reuse it. Buying a new token 60s before expiry gives the
|
|
605
|
+
* upstream enough headroom to reject any in-flight calls under the old
|
|
606
|
+
* token before the buyer assumes the new one is valid.
|
|
607
|
+
*/
|
|
608
|
+
tokenExpirySafetyMarginMs() {
|
|
609
|
+
const raw = process.env.TB_PROXYD_TOKEN_EXPIRY_SAFETY_MARGIN_MS;
|
|
610
|
+
if (!raw) {
|
|
611
|
+
return 60_000;
|
|
612
|
+
}
|
|
613
|
+
const parsed = Number(raw);
|
|
614
|
+
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
615
|
+
return 60_000;
|
|
616
|
+
}
|
|
617
|
+
return parsed;
|
|
618
|
+
}
|
|
671
619
|
async getOrPurchaseToken(route) {
|
|
672
620
|
const sellerKey = route.seller.id;
|
|
673
621
|
const sellerUrl = normalizeSellerUrl(route.seller);
|
|
674
622
|
const { modelId, paymentMethod } = route;
|
|
675
623
|
const cached = this.tokenStore.getToken(sellerKey);
|
|
676
624
|
const rebuyMinBalanceMicros = this.tokenRebuyMinBalanceMicros();
|
|
677
|
-
|
|
625
|
+
// v1.2 PR-fix (2026-06-02): reject cached tokens that are inside the
|
|
626
|
+
// safety margin of their seller-assigned expiry. The previous
|
|
627
|
+
// implementation only checked `balanceMicros`, which let the buyer
|
|
628
|
+
// keep serving 24h-expired access tokens to the upstream and
|
|
629
|
+
// produced 401 "Bearer token is invalid or expired" errors. The
|
|
630
|
+
// `expiresAt` field is sourced from the seller's
|
|
631
|
+
// `/purchase/complete` response and is part of the `saveToken`
|
|
632
|
+
// contract.
|
|
633
|
+
const expiresAtMs = cached?.expiresAt ? Date.parse(cached.expiresAt) : NaN;
|
|
634
|
+
const tokenStillFresh = Number.isFinite(expiresAtMs) && Date.now() + this.tokenExpirySafetyMarginMs() < expiresAtMs;
|
|
635
|
+
if (cached && tokenStillFresh && cached.balanceMicros > rebuyMinBalanceMicros) {
|
|
678
636
|
logger.info("token.cache.hit", "seller token cache hit", {
|
|
679
637
|
sellerKey,
|
|
680
638
|
model: modelId,
|
|
@@ -687,7 +645,8 @@ export class TokenbuddyDaemon {
|
|
|
687
645
|
sellerKey,
|
|
688
646
|
model: modelId,
|
|
689
647
|
balanceMicros: cached?.balanceMicros || 0,
|
|
690
|
-
rebuyMinBalanceMicros
|
|
648
|
+
rebuyMinBalanceMicros,
|
|
649
|
+
expired: Boolean(cached) && !tokenStillFresh
|
|
691
650
|
});
|
|
692
651
|
const purchaseKey = `${sellerKey}:${modelId}:${paymentMethod}`;
|
|
693
652
|
const purchasePromise = this.activePurchases.get(purchaseKey);
|
|
@@ -794,6 +753,9 @@ export class TokenbuddyDaemon {
|
|
|
794
753
|
paymentReference: completeData.paymentReference || completeData.payment_reference,
|
|
795
754
|
completedAt: new Date().toISOString()
|
|
796
755
|
});
|
|
756
|
+
// v1.1: feed the credit tracker so the route-failover controller
|
|
757
|
+
// knows the seller is inside the fresh-purchase window.
|
|
758
|
+
this.creditTracker.recordPurchase(sellerKey, creditMicros, creditMicros);
|
|
797
759
|
logger.info("purchase.token.succeeded", "seller token purchased", {
|
|
798
760
|
sellerKey,
|
|
799
761
|
model: modelId,
|
|
@@ -949,57 +911,139 @@ export class TokenbuddyDaemon {
|
|
|
949
911
|
for (let routeIndex = 0; routeIndex < routes.length; routeIndex += 1) {
|
|
950
912
|
const route = routes[routeIndex];
|
|
951
913
|
const sellerKey = route.seller.id;
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
914
|
+
logger.info("route.selected", "seller route selected", {
|
|
915
|
+
sellerKey,
|
|
916
|
+
model: modelId,
|
|
917
|
+
endpoint,
|
|
918
|
+
protocol: route.protocol,
|
|
919
|
+
paymentMethod: route.paymentMethod,
|
|
920
|
+
routeIndex,
|
|
921
|
+
backup: routeIndex > 0
|
|
922
|
+
});
|
|
923
|
+
let attempt = 0;
|
|
924
|
+
// Soft-failure retry budget; the route-failover controller decides
|
|
925
|
+
// whether the same seller should be retried or we move on. The
|
|
926
|
+
// v1 "1 retry for 4xx fallback" loop is replaced with a
|
|
927
|
+
// stateful decision per attempt.
|
|
928
|
+
// eslint-disable-next-line no-constant-condition
|
|
929
|
+
while (true) {
|
|
930
|
+
try {
|
|
931
|
+
logger.info("proxy.request.started", "proxy request started", {
|
|
932
|
+
requestId,
|
|
933
|
+
sellerKey,
|
|
934
|
+
model: modelId,
|
|
935
|
+
requestedModel: requestedModelId,
|
|
936
|
+
endpoint,
|
|
937
|
+
stream: Boolean(body.stream),
|
|
938
|
+
attempt
|
|
939
|
+
});
|
|
940
|
+
const sellerUrl = normalizeSellerUrl(route.seller);
|
|
941
|
+
const upstreamBody = this.applyResolvedModelToBody(endpoint, {
|
|
942
|
+
...body,
|
|
943
|
+
requestId
|
|
944
|
+
}, modelId);
|
|
945
|
+
logger.info("proxy.upstream_fetch.started", "proxy upstream fetch started", {
|
|
946
|
+
requestId,
|
|
947
|
+
sellerKey,
|
|
948
|
+
model: modelId,
|
|
949
|
+
endpoint,
|
|
950
|
+
stream: Boolean(body.stream),
|
|
951
|
+
upstreamBody
|
|
952
|
+
});
|
|
953
|
+
// v1.1 §17.5: refuse to auto-purchase once the session budget is
|
|
954
|
+
// exhausted. The seller is treated as "no auto-purchase available"
|
|
955
|
+
// and the request fails over to the next candidate.
|
|
956
|
+
if (!this.routeFailover.canAutoPurchase()) {
|
|
957
|
+
logger.warn("purchase.budget.exceeded", "session auto-purchase budget exhausted; failing over without buying", {
|
|
958
|
+
requestId,
|
|
959
|
+
sellerKey,
|
|
960
|
+
model: modelId,
|
|
961
|
+
endpoint,
|
|
962
|
+
routeIndex
|
|
963
|
+
});
|
|
964
|
+
lastError = new Error("auto-purchase budget exceeded for this session");
|
|
965
|
+
break;
|
|
966
|
+
}
|
|
967
|
+
// v1.1: a purchase failure means the seller is unreachable for
|
|
968
|
+
// payment, not "transiently flapping". Do not retry the same
|
|
969
|
+
// seller; transfer leftover to wasted and fail over immediately.
|
|
970
|
+
let token;
|
|
971
|
+
try {
|
|
972
|
+
token = await this.getOrPurchaseToken(route);
|
|
973
|
+
}
|
|
974
|
+
catch (purchaseError) {
|
|
975
|
+
logger.warn("purchase.failed", "seller auto-purchase failed; failing over without retry", {
|
|
976
|
+
requestId,
|
|
977
|
+
sellerKey,
|
|
978
|
+
model: modelId,
|
|
979
|
+
endpoint,
|
|
980
|
+
errorMessage: this.failoverErrorMessage(purchaseError)
|
|
981
|
+
});
|
|
982
|
+
this.routeFailover.decide({
|
|
983
|
+
sellerId: sellerKey,
|
|
984
|
+
errorKind: "deadline",
|
|
985
|
+
errorMessage: this.failoverErrorMessage(purchaseError),
|
|
986
|
+
attempt
|
|
987
|
+
}, routes.length - routeIndex);
|
|
988
|
+
lastError = purchaseError;
|
|
989
|
+
break;
|
|
990
|
+
}
|
|
991
|
+
// v1.2 §8: enforce a hard per-request deadline so a slow
|
|
992
|
+
// upstream cannot hang the buyer. The deadline is honored by
|
|
993
|
+
// the AbortController passed to `fetch`; sellers that observe
|
|
994
|
+
// the `X-TokenBuddy-Deadline-Ms` header (PR-6) can propagate
|
|
995
|
+
// it to their own upstream fetch via the same signal.
|
|
996
|
+
const deadlineMs = this.requestDeadlineMs();
|
|
997
|
+
const requestAc = new AbortController();
|
|
998
|
+
const requestTimer = setTimeout(() => requestAc.abort(new Error("buyer deadline exceeded")), deadlineMs);
|
|
999
|
+
const sendSellerRequest = async (token) => {
|
|
1000
|
+
const headers = {
|
|
1001
|
+
"Content-Type": "application/json",
|
|
1002
|
+
"Authorization": `Bearer ${token}`,
|
|
1003
|
+
"X-Request-Id": requestId,
|
|
1004
|
+
"Idempotency-Key": idempotencyKey
|
|
1005
|
+
};
|
|
1006
|
+
headers["X-TokenBuddy-Deadline-Ms"] = String(deadlineMs);
|
|
1007
|
+
return fetch(`${sellerUrl}${endpoint}`, {
|
|
1008
|
+
method: "POST",
|
|
1009
|
+
headers,
|
|
1010
|
+
body: JSON.stringify(upstreamBody),
|
|
1011
|
+
signal: requestAc.signal
|
|
1012
|
+
});
|
|
1013
|
+
};
|
|
1014
|
+
let upstreamResponse = await sendSellerRequest(token);
|
|
1015
|
+
if (!upstreamResponse.ok) {
|
|
1016
|
+
const errorBody = await upstreamResponse.text();
|
|
1017
|
+
if (this.isInsufficientFundsResponse(upstreamResponse.status, errorBody)) {
|
|
1018
|
+
token = await this.recoverFromInsufficientFunds(route, token);
|
|
1019
|
+
upstreamResponse = await sendSellerRequest(token);
|
|
1020
|
+
if (upstreamResponse.ok) {
|
|
1021
|
+
logger.info("proxy.retry_after_402.succeeded", "seller request succeeded after one-shot auto purchase retry", {
|
|
1022
|
+
requestId,
|
|
1023
|
+
sellerKey,
|
|
1024
|
+
model: modelId,
|
|
1025
|
+
endpoint,
|
|
1026
|
+
durationMs: Date.now() - startedAt
|
|
1027
|
+
});
|
|
1028
|
+
}
|
|
1029
|
+
else {
|
|
1030
|
+
const retryErrorBody = await upstreamResponse.text();
|
|
1031
|
+
logger.warn("proxy.retry_after_402.failed", "seller request still failed after one-shot auto purchase retry", {
|
|
1032
|
+
requestId,
|
|
1033
|
+
sellerKey,
|
|
1034
|
+
model: modelId,
|
|
1035
|
+
endpoint,
|
|
1036
|
+
status: upstreamResponse.status,
|
|
1037
|
+
durationMs: Date.now() - startedAt
|
|
1038
|
+
});
|
|
1039
|
+
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1040
|
+
res.status(upstreamResponse.status);
|
|
1041
|
+
res.send(retryErrorBody);
|
|
1042
|
+
return;
|
|
1043
|
+
}
|
|
999
1044
|
}
|
|
1000
1045
|
else {
|
|
1001
|
-
|
|
1002
|
-
logger.warn("proxy.retry_after_402.failed", "seller request still failed after one-shot auto purchase retry", {
|
|
1046
|
+
logger.warn("proxy.upstream_fetch.failed", "proxy upstream fetch returned non-ok status", {
|
|
1003
1047
|
requestId,
|
|
1004
1048
|
sellerKey,
|
|
1005
1049
|
model: modelId,
|
|
@@ -1007,117 +1051,126 @@ export class TokenbuddyDaemon {
|
|
|
1007
1051
|
status: upstreamResponse.status,
|
|
1008
1052
|
durationMs: Date.now() - startedAt
|
|
1009
1053
|
});
|
|
1010
|
-
this.
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1054
|
+
const kind = this.classifyFailureStatus(upstreamResponse.status);
|
|
1055
|
+
const decision = this.routeFailover.decide({
|
|
1056
|
+
sellerId: sellerKey,
|
|
1057
|
+
status: upstreamResponse.status,
|
|
1058
|
+
errorKind: kind,
|
|
1059
|
+
errorMessage: errorBody,
|
|
1060
|
+
attempt
|
|
1061
|
+
}, routes.length - routeIndex);
|
|
1062
|
+
this.handleFailoverDecision(decision, { sellerKey, endpoint, routeIndex });
|
|
1063
|
+
if (decision.action === "fail_fast" || decision.action === "abort") {
|
|
1064
|
+
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1065
|
+
res.status(upstreamResponse.status);
|
|
1066
|
+
res.send(errorBody);
|
|
1067
|
+
return;
|
|
1068
|
+
}
|
|
1069
|
+
if (decision.action === "retry_same_seller") {
|
|
1070
|
+
attempt += 1;
|
|
1071
|
+
if (decision.retryDelayMs) {
|
|
1072
|
+
await new Promise((resolve) => setTimeout(resolve, decision.retryDelayMs));
|
|
1073
|
+
}
|
|
1074
|
+
continue;
|
|
1075
|
+
}
|
|
1076
|
+
// failover_next
|
|
1026
1077
|
lastError = new Error(`seller ${sellerKey} returned ${upstreamResponse.status}`);
|
|
1027
|
-
this.logFailover(route, endpoint, routeIndex, "upstream_status", upstreamResponse.status);
|
|
1028
|
-
continue;
|
|
1029
|
-
}
|
|
1030
|
-
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1031
|
-
res.status(upstreamResponse.status);
|
|
1032
|
-
res.send(errorBody);
|
|
1033
|
-
return;
|
|
1034
|
-
}
|
|
1035
|
-
}
|
|
1036
|
-
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1037
|
-
res.status(upstreamResponse.status);
|
|
1038
|
-
logger.info("proxy.upstream_fetch.succeeded", "proxy upstream fetch succeeded", {
|
|
1039
|
-
requestId,
|
|
1040
|
-
sellerKey,
|
|
1041
|
-
model: modelId,
|
|
1042
|
-
endpoint,
|
|
1043
|
-
status: upstreamResponse.status,
|
|
1044
|
-
stream: Boolean(body.stream)
|
|
1045
|
-
});
|
|
1046
|
-
const contentType = upstreamResponse.headers.get("content-type") || "";
|
|
1047
|
-
if (contentType.includes("text/event-stream") || Boolean(body.stream)) {
|
|
1048
|
-
const reader = upstreamResponse.body?.getReader();
|
|
1049
|
-
if (!reader) {
|
|
1050
|
-
res.end();
|
|
1051
|
-
return;
|
|
1052
|
-
}
|
|
1053
|
-
let bytes = 0;
|
|
1054
|
-
const decoder = new TextDecoder();
|
|
1055
|
-
const responsesStreamNormalizer = new ResponsesStreamNormalizer();
|
|
1056
|
-
const settlementExtractor = new SellerSettlementStreamExtractor();
|
|
1057
|
-
while (true) {
|
|
1058
|
-
const { done, value } = await reader.read();
|
|
1059
|
-
if (done) {
|
|
1060
1078
|
break;
|
|
1061
1079
|
}
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1080
|
+
}
|
|
1081
|
+
// Successful response: stream or buffer.
|
|
1082
|
+
this.copyUpstreamHeaders(upstreamResponse, res);
|
|
1083
|
+
res.status(upstreamResponse.status);
|
|
1084
|
+
logger.info("proxy.upstream_fetch.succeeded", "proxy upstream fetch succeeded", {
|
|
1085
|
+
requestId,
|
|
1086
|
+
sellerKey,
|
|
1087
|
+
model: modelId,
|
|
1088
|
+
endpoint,
|
|
1089
|
+
status: upstreamResponse.status,
|
|
1090
|
+
stream: Boolean(body.stream)
|
|
1091
|
+
});
|
|
1092
|
+
const contentType = upstreamResponse.headers.get("content-type") || "";
|
|
1093
|
+
if (contentType.includes("text/event-stream") || Boolean(body.stream)) {
|
|
1094
|
+
const reader = upstreamResponse.body?.getReader();
|
|
1095
|
+
if (!reader) {
|
|
1096
|
+
res.end();
|
|
1097
|
+
return;
|
|
1067
1098
|
}
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1099
|
+
let bytes = 0;
|
|
1100
|
+
const decoder = new TextDecoder();
|
|
1101
|
+
const settlementExtractor = new SellerSettlementStreamExtractor();
|
|
1102
|
+
while (true) {
|
|
1103
|
+
const { done, value } = await reader.read();
|
|
1104
|
+
if (done) {
|
|
1105
|
+
break;
|
|
1106
|
+
}
|
|
1107
|
+
bytes += value.byteLength;
|
|
1108
|
+
const chunk = decoder.decode(value, { stream: true });
|
|
1109
|
+
// 透明代理:把 seller 的 SSE 字节原样转给客户端,只剥离我们注入的
|
|
1110
|
+
// tokenbuddy.settlement 事件(不让客户端看到内部记账字段)。除此之外
|
|
1111
|
+
// 不做任何协议转换——卖方格式 bug(如 chat.completion.chunk prefix、
|
|
1112
|
+
// 缺 event: 行)由卖方修,buyer 不兜底。
|
|
1113
|
+
const sellerChunk = settlementExtractor.push(chunk);
|
|
1114
|
+
if (sellerChunk.length > 0) {
|
|
1115
|
+
res.write(sellerChunk);
|
|
1072
1116
|
}
|
|
1073
1117
|
}
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
res.write(`${normalized}\n\n`);
|
|
1118
|
+
// flush TextDecoder 内部 buffer:stream:true 模式下最后可能留有几个字节的
|
|
1119
|
+
// 不完整 UTF-8 序列(多字节字符被切到下一 chunk 的场景),不调 stream:false
|
|
1120
|
+
// flush 就 break 会丢这批字节。上面的 stream 末尾事件(done / completed)
|
|
1121
|
+
// 之前被吞掉就是这个原因。
|
|
1122
|
+
const decoderTail = decoder.decode();
|
|
1123
|
+
if (decoderTail.length > 0) {
|
|
1124
|
+
const sellerTail = settlementExtractor.push(decoderTail);
|
|
1125
|
+
if (sellerTail.length > 0) {
|
|
1126
|
+
res.write(sellerTail);
|
|
1084
1127
|
}
|
|
1085
1128
|
}
|
|
1086
|
-
|
|
1129
|
+
const settlementTrailing = settlementExtractor.finish();
|
|
1130
|
+
if (settlementTrailing.downstream.length > 0) {
|
|
1087
1131
|
res.write(settlementTrailing.downstream);
|
|
1088
1132
|
}
|
|
1133
|
+
res.end();
|
|
1134
|
+
this.recordReconciledInference(route, endpoint, requestId, { promptTokens: 0, completionTokens: 0, billedMicros: Math.max(1, bytes) }, this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(), this.inferPromptForHash(body));
|
|
1135
|
+
return;
|
|
1089
1136
|
}
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
}
|
|
1095
|
-
}
|
|
1096
|
-
res.end();
|
|
1097
|
-
this.recordReconciledInference(route, endpoint, requestId, { promptTokens: 0, completionTokens: 0, billedMicros: Math.max(1, bytes) }, this.parseSellerSettlementSummary(upstreamResponse.headers) ?? settlementTrailing.settlement ?? settlementExtractor.current(), this.inferPromptForHash(body));
|
|
1137
|
+
const responseBody = await upstreamResponse.text();
|
|
1138
|
+
res.send(responseBody);
|
|
1139
|
+
const usage = this.readUsage(responseBody);
|
|
1140
|
+
this.recordReconciledInference(route, endpoint, requestId, usage, this.parseSellerSettlementSummary(upstreamResponse.headers), this.inferPromptForHash(body), responseBody);
|
|
1098
1141
|
return;
|
|
1099
1142
|
}
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1143
|
+
catch (routeError) {
|
|
1144
|
+
lastError = routeError;
|
|
1145
|
+
const kind = "deadline";
|
|
1146
|
+
const decision = this.routeFailover.decide({
|
|
1147
|
+
sellerId: sellerKey,
|
|
1148
|
+
errorKind: kind,
|
|
1149
|
+
errorMessage: this.failoverErrorMessage(routeError),
|
|
1150
|
+
attempt
|
|
1151
|
+
}, routes.length - routeIndex);
|
|
1152
|
+
this.handleFailoverDecision(decision, { sellerKey, endpoint, routeIndex, reason: "exception" });
|
|
1153
|
+
logger.warn("proxy.route.failed", "seller route failed before response", {
|
|
1154
|
+
requestId,
|
|
1155
|
+
sellerKey,
|
|
1156
|
+
model: modelId,
|
|
1157
|
+
endpoint,
|
|
1158
|
+
errorMessage: this.failoverErrorMessage(routeError),
|
|
1159
|
+
durationMs: Date.now() - startedAt
|
|
1160
|
+
});
|
|
1161
|
+
if (decision.action === "retry_same_seller") {
|
|
1162
|
+
attempt += 1;
|
|
1163
|
+
if (decision.retryDelayMs) {
|
|
1164
|
+
await new Promise((resolve) => setTimeout(resolve, decision.retryDelayMs));
|
|
1165
|
+
}
|
|
1166
|
+
continue;
|
|
1167
|
+
}
|
|
1168
|
+
if (decision.action === "fail_fast" || decision.action === "abort") {
|
|
1169
|
+
throw routeError;
|
|
1170
|
+
}
|
|
1171
|
+
// failover_next
|
|
1172
|
+
break;
|
|
1119
1173
|
}
|
|
1120
|
-
throw routeError;
|
|
1121
1174
|
}
|
|
1122
1175
|
}
|
|
1123
1176
|
throw lastError instanceof Error ? lastError : new Error("all seller routes failed");
|
|
@@ -1189,6 +1242,46 @@ export class TokenbuddyDaemon {
|
|
|
1189
1242
|
inferences: this.tokenStore.listInferenceLedger()
|
|
1190
1243
|
});
|
|
1191
1244
|
});
|
|
1245
|
+
// v1.2 §18.11: control plane snapshot of the prewarm cache + seller
|
|
1246
|
+
// pool + credit tracker. `tb doctor` reads this to render the
|
|
1247
|
+
// recovery / prewarm / credit summary block.
|
|
1248
|
+
controlApp.get("/v1.2/prewarm", (req, res) => {
|
|
1249
|
+
const prewarmEntries = this.prewarmCache.snapshot().map((entry) => ({
|
|
1250
|
+
modelId: entry.modelId,
|
|
1251
|
+
protocol: entry.protocol,
|
|
1252
|
+
paymentMethod: entry.paymentMethod,
|
|
1253
|
+
state: entry.state,
|
|
1254
|
+
candidateCount: entry.candidates.length,
|
|
1255
|
+
warmedAt: entry.warmedAt,
|
|
1256
|
+
ttlMs: entry.ttlMs,
|
|
1257
|
+
consecutiveWarmingFailures: entry.consecutiveWarmingFailures
|
|
1258
|
+
}));
|
|
1259
|
+
const poolSnapshot = this.sellerPool.snapshot().map((entry) => ({
|
|
1260
|
+
sellerId: entry.sellerId,
|
|
1261
|
+
url: entry.url,
|
|
1262
|
+
circuit: entry.circuit,
|
|
1263
|
+
consecutiveFailures: entry.consecutiveFailures,
|
|
1264
|
+
lastSuccessAt: entry.lastSuccessAt,
|
|
1265
|
+
lastFailAt: entry.lastFailAt,
|
|
1266
|
+
healthScore: entry.healthScore
|
|
1267
|
+
}));
|
|
1268
|
+
const creditSummary = this.creditTracker.summary();
|
|
1269
|
+
const focusSet = this.resolveFocusSet();
|
|
1270
|
+
const schedulerStats = this.prewarmScheduler.stats();
|
|
1271
|
+
res.status(200).json({
|
|
1272
|
+
prewarm: {
|
|
1273
|
+
entries: prewarmEntries,
|
|
1274
|
+
size: prewarmEntries.length
|
|
1275
|
+
},
|
|
1276
|
+
pool: {
|
|
1277
|
+
size: poolSnapshot.length,
|
|
1278
|
+
entries: poolSnapshot
|
|
1279
|
+
},
|
|
1280
|
+
credit: creditSummary,
|
|
1281
|
+
focusSet,
|
|
1282
|
+
scheduler: schedulerStats
|
|
1283
|
+
});
|
|
1284
|
+
});
|
|
1192
1285
|
controlApp.get("/sellers", async (req, res) => {
|
|
1193
1286
|
try {
|
|
1194
1287
|
const registry = await this.fetchRegistry();
|
|
@@ -1399,12 +1492,55 @@ export class TokenbuddyDaemon {
|
|
|
1399
1492
|
sellerRegistryUrl: this.config.sellerRegistryUrl,
|
|
1400
1493
|
selectionMode: this.selectionMode
|
|
1401
1494
|
});
|
|
1495
|
+
// v1.2 §18.5: kick off the on-demand prewarm pipeline. The startup
|
|
1496
|
+
// sweep runs after the configured jitter window (5-10s by default);
|
|
1497
|
+
// subsequent refreshes run on the `idleIntervalMs` cadence and the
|
|
1498
|
+
// `forwardProxyRequest` hot path can dispatch lazy prewarms on miss.
|
|
1499
|
+
this.prewarmScheduler.start();
|
|
1500
|
+
void this.runStartupPrewarmSweep();
|
|
1501
|
+
}
|
|
1502
|
+
/**
|
|
1503
|
+
* v1.2 §18.4: build the focus set from the explicit config, the env
|
|
1504
|
+
* override, and the historical usage in the buyer store. The order of
|
|
1505
|
+
* precedence: explicit config > env > historical > empty.
|
|
1506
|
+
*/
|
|
1507
|
+
resolveFocusSet() {
|
|
1508
|
+
const explicit = this.config.warmupModels ?? [];
|
|
1509
|
+
if (explicit.length > 0) {
|
|
1510
|
+
return explicit;
|
|
1511
|
+
}
|
|
1512
|
+
const envRaw = process.env.TB_BUYER_WARMUP_MODELS || "";
|
|
1513
|
+
const envModels = envRaw.split(",").map((s) => s.trim()).filter(Boolean);
|
|
1514
|
+
if (envModels.length > 0) {
|
|
1515
|
+
return envModels;
|
|
1516
|
+
}
|
|
1517
|
+
return this.tokenStore.recentModels(7, 5);
|
|
1518
|
+
}
|
|
1519
|
+
async runStartupPrewarmSweep() {
|
|
1520
|
+
const focusSet = this.resolveFocusSet();
|
|
1521
|
+
if (focusSet.length === 0) {
|
|
1522
|
+
logger.info("prewarm.startup.skipped", "no focus set configured; relying on lazy prewarms", {});
|
|
1523
|
+
return;
|
|
1524
|
+
}
|
|
1525
|
+
logger.info("prewarm.startup.scheduled", "startup prewarm sweep scheduled", {
|
|
1526
|
+
focusSetSize: focusSet.length,
|
|
1527
|
+
focusSet: focusSet.slice(0, 20)
|
|
1528
|
+
});
|
|
1529
|
+
try {
|
|
1530
|
+
await this.prewarmScheduler.runStartupPrewarm(focusSet);
|
|
1531
|
+
}
|
|
1532
|
+
catch (err) {
|
|
1533
|
+
logger.warn("prewarm.startup.failed", "startup prewarm sweep failed", {
|
|
1534
|
+
errorMessage: err instanceof Error ? err.message : String(err)
|
|
1535
|
+
});
|
|
1536
|
+
}
|
|
1402
1537
|
}
|
|
1403
1538
|
stop() {
|
|
1404
1539
|
if (this.controlServer)
|
|
1405
1540
|
this.controlServer.close();
|
|
1406
1541
|
if (this.proxyServer)
|
|
1407
1542
|
this.proxyServer.close();
|
|
1543
|
+
void this.prewarmScheduler.stop();
|
|
1408
1544
|
this.tokenStore.close();
|
|
1409
1545
|
}
|
|
1410
1546
|
}
|