@tokenbuddy/tokenbuddy 1.0.8 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +13 -0
- package/dist/src/buyer-store.d.ts.map +1 -1
- package/dist/src/buyer-store.js +21 -2
- package/dist/src/buyer-store.js.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +54 -0
- package/dist/src/cli.js.map +1 -1
- package/dist/src/credit-tracker.d.ts +118 -0
- package/dist/src/credit-tracker.d.ts.map +1 -0
- package/dist/src/credit-tracker.js +220 -0
- package/dist/src/credit-tracker.js.map +1 -0
- package/dist/src/daemon.d.ts +49 -4
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +541 -405
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/model-index.d.ts +86 -0
- package/dist/src/model-index.d.ts.map +1 -0
- package/dist/src/model-index.js +214 -0
- package/dist/src/model-index.js.map +1 -0
- package/dist/src/prewarm-cache.d.ts +149 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -0
- package/dist/src/prewarm-cache.js +288 -0
- package/dist/src/prewarm-cache.js.map +1 -0
- package/dist/src/prewarm-scheduler.d.ts +150 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -0
- package/dist/src/prewarm-scheduler.js +484 -0
- package/dist/src/prewarm-scheduler.js.map +1 -0
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +9 -1
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/route-failover.d.ts +96 -0
- package/dist/src/route-failover.d.ts.map +1 -0
- package/dist/src/route-failover.js +177 -0
- package/dist/src/route-failover.js.map +1 -0
- package/dist/src/seller-catalog.d.ts +26 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js +40 -0
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +127 -0
- package/dist/src/seller-pool.d.ts.map +1 -0
- package/dist/src/seller-pool.js +243 -0
- package/dist/src/seller-pool.js.map +1 -0
- package/dist/src/stream-failover.d.ts +78 -0
- package/dist/src/stream-failover.d.ts.map +1 -0
- package/dist/src/stream-failover.js +93 -0
- package/dist/src/stream-failover.js.map +1 -0
- package/package.json +1 -1
- package/src/buyer-store.ts +32 -2
- package/src/cli.ts +61 -0
- package/src/credit-tracker.test.ts +165 -0
- package/src/credit-tracker.ts +269 -0
- package/src/daemon.ts +569 -445
- package/src/model-index.test.ts +184 -0
- package/src/model-index.ts +266 -0
- package/src/prewarm-cache.test.ts +281 -0
- package/src/prewarm-cache.ts +373 -0
- package/src/prewarm-scheduler.test.ts +367 -0
- package/src/prewarm-scheduler.ts +581 -0
- package/src/provider-install.ts +9 -1
- package/src/route-failover.test.ts +193 -0
- package/src/route-failover.ts +233 -0
- package/src/seller-catalog-413.test.ts +61 -0
- package/src/seller-catalog.ts +47 -0
- package/src/seller-pool.test.ts +231 -0
- package/src/seller-pool.ts +333 -0
- package/src/stream-failover.test.ts +52 -0
- package/src/stream-failover.ts +129 -0
- package/src/thousand-seller.test.ts +151 -0
- package/tests/daemon-413-fallback.test.ts +92 -0
- package/tests/e2e.test.ts +3 -2
- package/tests/tokenbuddy.test.ts +68 -11
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
import { createModuleLogger } from "@tokenbuddy/logging";
|
|
2
|
+
import type { RegistrySeller } from "./seller-catalog.js";
|
|
3
|
+
import type { ModelIndex } from "./model-index.js";
|
|
4
|
+
import type { PrewarmCache, PrewarmCandidate } from "./prewarm-cache.js";
|
|
5
|
+
|
|
6
|
+
const logger = createModuleLogger("tb-proxyd:prewarm-scheduler");
|
|
7
|
+
|
|
8
|
+
export type PrewarmReason = "startup" | "lazy" | "idle" | "explicit";
|
|
9
|
+
|
|
10
|
+
export interface ProbeResult {
|
|
11
|
+
ok: boolean;
|
|
12
|
+
latencyMs: number;
|
|
13
|
+
httpStatus?: number;
|
|
14
|
+
errorMessage?: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* The probe function used by the scheduler. Decoupled so the scheduler can
|
|
19
|
+
* be unit-tested without spinning up HTTP servers. The default
|
|
20
|
+
* implementation in `health-probe.ts` (PR-2/PR-3) calls
|
|
21
|
+
* `GET <seller.url>/healthz` with a 3s `AbortSignal.timeout`. Probers must
|
|
22
|
+
* observe the provided `AbortSignal` and reject when it aborts so the
|
|
23
|
+
* scheduler can short-circuit in-flight probes on `stop()`.
|
|
24
|
+
*/
|
|
25
|
+
export type SellerProber = (seller: RegistrySeller, signal: AbortSignal) => Promise<ProbeResult>;
|
|
26
|
+
|
|
27
|
+
export interface PrewarmSchedulerOptions {
|
|
28
|
+
modelIndex: ModelIndex;
|
|
29
|
+
cache: PrewarmCache;
|
|
30
|
+
prober: SellerProber;
|
|
31
|
+
// Limits (defaults match buyer-driven-fallback-design.md §18.6).
|
|
32
|
+
concurrency?: number;
|
|
33
|
+
perSellerMinIntervalMs?: number;
|
|
34
|
+
maxPrewarmPerMinute?: number;
|
|
35
|
+
// Idle loop cadence; the scheduler can also be driven externally
|
|
36
|
+
// (PR-2.1 wires `tickIdle` into the existing registry-loop heartbeat).
|
|
37
|
+
idleIntervalMs?: number;
|
|
38
|
+
// Startup jitter (5-10s by default per §18.5.1).
|
|
39
|
+
startupJitterMinMs?: number;
|
|
40
|
+
startupJitterMaxMs?: number;
|
|
41
|
+
// Hooks for testing; defaults to Node's setTimeout / setImmediate.
|
|
42
|
+
sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;
|
|
43
|
+
random?: () => number;
|
|
44
|
+
now?: () => number;
|
|
45
|
+
// Optional filter applied to every probe (e.g. preferred protocol).
|
|
46
|
+
protocol?: string;
|
|
47
|
+
paymentMethod?: string;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
interface PrewarmTask {
|
|
51
|
+
id: number;
|
|
52
|
+
modelId: string;
|
|
53
|
+
reason: PrewarmReason;
|
|
54
|
+
protocol: string;
|
|
55
|
+
paymentMethod: string;
|
|
56
|
+
enqueuedAt: number;
|
|
57
|
+
sellerIds: string[];
|
|
58
|
+
startedAt?: number;
|
|
59
|
+
completedAt?: number;
|
|
60
|
+
status: "queued" | "running" | "succeeded" | "failed" | "canceled" | "rate_limited";
|
|
61
|
+
errorMessage?: string;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export interface PrewarmSchedulerStats {
|
|
65
|
+
queueDepth: number;
|
|
66
|
+
inFlight: number;
|
|
67
|
+
totalScheduled: number;
|
|
68
|
+
totalSucceeded: number;
|
|
69
|
+
totalFailed: number;
|
|
70
|
+
totalRateLimited: number;
|
|
71
|
+
recentProbesInLastMinute: number;
|
|
72
|
+
concurrency: number;
|
|
73
|
+
maxPrewarmPerMinute: number;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Background scheduler that warms up sellers for a (model, protocol,
|
|
78
|
+
* payment) triple on demand. The scheduler owns:
|
|
79
|
+
* - queue management with bounded concurrency (default 4)
|
|
80
|
+
* - per-seller rate limiting (default 30s between probes to the same
|
|
81
|
+
* seller, even across different models)
|
|
82
|
+
* - global rate limiting (default 30 probes/minute)
|
|
83
|
+
* - jitter on startup and between probes to avoid thundering herds
|
|
84
|
+
*
|
|
85
|
+
* The scheduler does NOT own HTTP I/O; that lives in the injected
|
|
86
|
+
* `prober` so tests can swap in a deterministic stub.
|
|
87
|
+
*/
|
|
88
|
+
export class PrewarmScheduler {
|
|
89
|
+
private readonly modelIndex: ModelIndex;
|
|
90
|
+
private readonly cache: PrewarmCache;
|
|
91
|
+
private readonly prober: SellerProber;
|
|
92
|
+
|
|
93
|
+
private readonly concurrency: number;
|
|
94
|
+
private readonly perSellerMinIntervalMs: number;
|
|
95
|
+
private readonly maxPrewarmPerMinute: number;
|
|
96
|
+
private readonly idleIntervalMs: number;
|
|
97
|
+
private readonly startupJitterMinMs: number;
|
|
98
|
+
private readonly startupJitterMaxMs: number;
|
|
99
|
+
private readonly sleep: (ms: number, signal?: AbortSignal) => Promise<void>;
|
|
100
|
+
private readonly random: () => number;
|
|
101
|
+
private readonly now: () => number;
|
|
102
|
+
private readonly protocol: string | undefined;
|
|
103
|
+
private readonly paymentMethod: string | undefined;
|
|
104
|
+
|
|
105
|
+
private readonly queue: PrewarmTask[] = [];
|
|
106
|
+
private inFlight = 0;
|
|
107
|
+
private recentProbes: number[] = [];
|
|
108
|
+
private lastProbeAtBySeller = new Map<string, number>();
|
|
109
|
+
private nextTaskId = 1;
|
|
110
|
+
|
|
111
|
+
private totalScheduled = 0;
|
|
112
|
+
private totalSucceeded = 0;
|
|
113
|
+
private totalFailed = 0;
|
|
114
|
+
private totalRateLimited = 0;
|
|
115
|
+
|
|
116
|
+
private abortController: AbortController | null = null;
|
|
117
|
+
private idleLoopPromise: Promise<void> | null = null;
|
|
118
|
+
|
|
119
|
+
constructor(options: PrewarmSchedulerOptions) {
|
|
120
|
+
this.modelIndex = options.modelIndex;
|
|
121
|
+
this.cache = options.cache;
|
|
122
|
+
this.prober = options.prober;
|
|
123
|
+
this.concurrency = options.concurrency ?? 4;
|
|
124
|
+
this.perSellerMinIntervalMs = options.perSellerMinIntervalMs ?? 30_000;
|
|
125
|
+
this.maxPrewarmPerMinute = options.maxPrewarmPerMinute ?? 30;
|
|
126
|
+
this.idleIntervalMs = options.idleIntervalMs ?? 60_000;
|
|
127
|
+
this.startupJitterMinMs = options.startupJitterMinMs ?? 5_000;
|
|
128
|
+
this.startupJitterMaxMs = options.startupJitterMaxMs ?? 10_000;
|
|
129
|
+
this.sleep = options.sleep ?? defaultSleep;
|
|
130
|
+
this.random = options.random ?? Math.random;
|
|
131
|
+
this.now = options.now ?? Date.now;
|
|
132
|
+
this.protocol = options.protocol;
|
|
133
|
+
this.paymentMethod = options.paymentMethod;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Start the background idle loop. Safe to call once per scheduler
|
|
138
|
+
* instance; subsequent calls are no-ops. The idle loop probes any cached
|
|
139
|
+
* entry whose TTL is within 10% of expiry (`isExpiringSoon`).
|
|
140
|
+
*/
|
|
141
|
+
start(): void {
|
|
142
|
+
if (this.abortController) {
|
|
143
|
+
return;
|
|
144
|
+
}
|
|
145
|
+
this.abortController = new AbortController();
|
|
146
|
+
this.idleLoopPromise = this.runIdleLoop(this.abortController.signal);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Cancel the idle loop and any pending tasks. Existing `inFlight` probes
|
|
151
|
+
* are not aborted (the prober owns its own timeout) but will not be
|
|
152
|
+
* dispatched to the cache.
|
|
153
|
+
*/
|
|
154
|
+
async stop(): Promise<void> {
|
|
155
|
+
if (!this.abortController) {
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
this.abortController.abort();
|
|
159
|
+
this.abortController = null;
|
|
160
|
+
if (this.idleLoopPromise) {
|
|
161
|
+
await this.idleLoopPromise.catch(() => undefined);
|
|
162
|
+
this.idleLoopPromise = null;
|
|
163
|
+
}
|
|
164
|
+
// Mark queued tasks as canceled so callers awaiting them can short-circuit.
|
|
165
|
+
for (const task of this.queue) {
|
|
166
|
+
task.status = "canceled";
|
|
167
|
+
}
|
|
168
|
+
this.queue.length = 0;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Enqueue a prewarm for a (model, protocol, payment) triple. The
|
|
173
|
+
* `reason` controls how aggressively the scheduler resolves candidates
|
|
174
|
+
* (e.g. `startup` defers; `lazy` waits on the returned promise). The
|
|
175
|
+
* returned promise resolves with the final task status once the queue
|
|
176
|
+
* drains or the scheduler is stopped.
|
|
177
|
+
*/
|
|
178
|
+
schedulePrewarm(input: {
|
|
179
|
+
modelId: string;
|
|
180
|
+
reason: PrewarmReason;
|
|
181
|
+
protocol?: string;
|
|
182
|
+
paymentMethod?: string;
|
|
183
|
+
blockOnFirst?: boolean;
|
|
184
|
+
}): Promise<PrewarmTask> {
|
|
185
|
+
const protocol = input.protocol ?? this.protocol ?? "chat_completions";
|
|
186
|
+
const paymentMethod = input.paymentMethod ?? this.paymentMethod ?? "clawtip";
|
|
187
|
+
const task: PrewarmTask = {
|
|
188
|
+
id: this.nextTaskId++,
|
|
189
|
+
modelId: input.modelId,
|
|
190
|
+
reason: input.reason,
|
|
191
|
+
protocol,
|
|
192
|
+
paymentMethod,
|
|
193
|
+
enqueuedAt: this.now(),
|
|
194
|
+
sellerIds: [],
|
|
195
|
+
status: "queued"
|
|
196
|
+
};
|
|
197
|
+
this.queue.push(task);
|
|
198
|
+
this.totalScheduled += 1;
|
|
199
|
+
logger.info("prewarm.scheduled", "prewarm task enqueued", {
|
|
200
|
+
taskId: task.id,
|
|
201
|
+
modelId: task.modelId,
|
|
202
|
+
reason: task.reason,
|
|
203
|
+
protocol,
|
|
204
|
+
paymentMethod,
|
|
205
|
+
queueDepth: this.queue.length
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
// Fire-and-forget dispatch; the awaiter observes `task.status` via
|
|
209
|
+
// `taskResolved` rather than blocking the queue.
|
|
210
|
+
this.dispatch().catch((err) => {
|
|
211
|
+
logger.error("prewarm.dispatch.unexpected", "dispatcher threw unexpectedly", {
|
|
212
|
+
errorMessage: err instanceof Error ? err.message : String(err)
|
|
213
|
+
});
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
return new Promise<PrewarmTask>((resolve) => {
|
|
217
|
+
const check = () => {
|
|
218
|
+
if (task.status === "succeeded" || task.status === "failed" || task.status === "canceled" || task.status === "rate_limited") {
|
|
219
|
+
resolve(task);
|
|
220
|
+
} else {
|
|
221
|
+
setImmediate(check);
|
|
222
|
+
}
|
|
223
|
+
};
|
|
224
|
+
check();
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Run a one-shot sweep that probes every focus-set model. Used by the
|
|
230
|
+
* `tb doctor --prewarm` explicit trigger and by the startup hook after
|
|
231
|
+
* the configured jitter window. Resolves once every scheduled task has
|
|
232
|
+
* reached a terminal state.
|
|
233
|
+
*/
|
|
234
|
+
async runStartupPrewarm(modelIds: string[]): Promise<void> {
|
|
235
|
+
await this.sleep(this.jitterMs(), this.abortController?.signal);
|
|
236
|
+
if (this.abortController?.signal.aborted) {
|
|
237
|
+
return;
|
|
238
|
+
}
|
|
239
|
+
const tasks = modelIds.map((modelId) =>
|
|
240
|
+
this.schedulePrewarm({ modelId, reason: "startup" })
|
|
241
|
+
);
|
|
242
|
+
await Promise.all(tasks);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
/**
|
|
246
|
+
* Force a sweep of any cache key whose TTL is about to expire. Returns
|
|
247
|
+
* the number of tasks that were enqueued. Intended to be called from
|
|
248
|
+
* the registry loop's heartbeat (replaces the v1 "all sellers" probe
|
|
249
|
+
* cycle with "only the ones we are about to forget").
|
|
250
|
+
*/
|
|
251
|
+
tickIdle(): number {
|
|
252
|
+
const expiring = this.cache
|
|
253
|
+
.snapshot()
|
|
254
|
+
.filter((entry) => this.cache.isExpiringSoon(entry.modelId, entry.protocol, entry.paymentMethod, 60_000));
|
|
255
|
+
if (expiring.length === 0) {
|
|
256
|
+
return 0;
|
|
257
|
+
}
|
|
258
|
+
for (const entry of expiring) {
|
|
259
|
+
this.schedulePrewarm({
|
|
260
|
+
modelId: entry.modelId,
|
|
261
|
+
protocol: entry.protocol,
|
|
262
|
+
paymentMethod: entry.paymentMethod,
|
|
263
|
+
reason: "idle"
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
return expiring.length;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
stats(): PrewarmSchedulerStats {
|
|
270
|
+
const now = this.now();
|
|
271
|
+
const cutoff = now - 60_000;
|
|
272
|
+
const recentProbesInLastMinute = this.recentProbes.filter((ts) => ts >= cutoff).length;
|
|
273
|
+
return {
|
|
274
|
+
queueDepth: this.queue.length,
|
|
275
|
+
inFlight: this.inFlight,
|
|
276
|
+
totalScheduled: this.totalScheduled,
|
|
277
|
+
totalSucceeded: this.totalSucceeded,
|
|
278
|
+
totalFailed: this.totalFailed,
|
|
279
|
+
totalRateLimited: this.totalRateLimited,
|
|
280
|
+
recentProbesInLastMinute,
|
|
281
|
+
concurrency: this.concurrency,
|
|
282
|
+
maxPrewarmPerMinute: this.maxPrewarmPerMinute
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
private jitterMs(): number {
|
|
287
|
+
const span = Math.max(0, this.startupJitterMaxMs - this.startupJitterMinMs);
|
|
288
|
+
return this.startupJitterMinMs + Math.floor(this.random() * span);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
private async runIdleLoop(signal: AbortSignal): Promise<void> {
|
|
292
|
+
while (!signal.aborted) {
|
|
293
|
+
try {
|
|
294
|
+
await this.sleep(this.idleIntervalMs, signal);
|
|
295
|
+
} catch {
|
|
296
|
+
return;
|
|
297
|
+
}
|
|
298
|
+
if (signal.aborted) {
|
|
299
|
+
return;
|
|
300
|
+
}
|
|
301
|
+
try {
|
|
302
|
+
this.tickIdle();
|
|
303
|
+
} catch (err) {
|
|
304
|
+
logger.error("prewarm.idle.failed", "idle tick threw unexpectedly", {
|
|
305
|
+
errorMessage: err instanceof Error ? err.message : String(err)
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
private async dispatch(): Promise<void> {
|
|
312
|
+
while (this.queue.length > 0 && this.inFlight < this.concurrency) {
|
|
313
|
+
const task = this.queue.shift();
|
|
314
|
+
if (!task) {
|
|
315
|
+
break;
|
|
316
|
+
}
|
|
317
|
+
if (task.status === "canceled") {
|
|
318
|
+
continue;
|
|
319
|
+
}
|
|
320
|
+
if (this.isOverBudget()) {
|
|
321
|
+
task.status = "rate_limited";
|
|
322
|
+
this.totalRateLimited += 1;
|
|
323
|
+
logger.warn("prewarm.rate_limited", "global per-minute probe budget exhausted", {
|
|
324
|
+
taskId: task.id,
|
|
325
|
+
modelId: task.modelId,
|
|
326
|
+
recentProbes: this.recentProbesInLastMinute()
|
|
327
|
+
});
|
|
328
|
+
continue;
|
|
329
|
+
}
|
|
330
|
+
const sellers = this.modelIndex.sellersFor(task.modelId, {
|
|
331
|
+
protocol: task.protocol,
|
|
332
|
+
paymentMethod: task.paymentMethod
|
|
333
|
+
});
|
|
334
|
+
task.sellerIds = sellers.map((s) => s.id);
|
|
335
|
+
if (sellers.length === 0) {
|
|
336
|
+
task.status = "failed";
|
|
337
|
+
task.errorMessage = "no sellers for model";
|
|
338
|
+
task.completedAt = this.now();
|
|
339
|
+
this.totalFailed += 1;
|
|
340
|
+
logger.warn("prewarm.no_sellers", "no registry sellers match model", {
|
|
341
|
+
taskId: task.id,
|
|
342
|
+
modelId: task.modelId,
|
|
343
|
+
protocol: task.protocol,
|
|
344
|
+
paymentMethod: task.paymentMethod
|
|
345
|
+
});
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
// Ensure an abort controller exists so `stop()` works even when the
|
|
349
|
+
// caller never invoked `start()`. `start()` is otherwise responsible
|
|
350
|
+
// for the idle loop; dispatch only borrows the controller for
|
|
351
|
+
// short-lived abort propagation.
|
|
352
|
+
if (!this.abortController) {
|
|
353
|
+
this.abortController = new AbortController();
|
|
354
|
+
}
|
|
355
|
+
this.inFlight += 1;
|
|
356
|
+
task.status = "running";
|
|
357
|
+
task.startedAt = this.now();
|
|
358
|
+
// Capture the abort signal so an in-flight task can still observe
|
|
359
|
+
// `stop()` even after the controller reference is cleared.
|
|
360
|
+
const signal = this.abortController.signal;
|
|
361
|
+
// Run async without awaiting; the loop continues to dispatch.
|
|
362
|
+
void this.runTask(task, sellers, signal);
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
private async runTask(task: PrewarmTask, sellers: RegistrySeller[], signal: AbortSignal | undefined): Promise<void> {
|
|
367
|
+
const begin = this.cache.beginWarming(task.modelId, task.protocol, task.paymentMethod);
|
|
368
|
+
const candidates: PrewarmCandidate[] = [];
|
|
369
|
+
let anyOk = false;
|
|
370
|
+
let probedAny = false;
|
|
371
|
+
|
|
372
|
+
for (const seller of sellers) {
|
|
373
|
+
if (signal?.aborted) {
|
|
374
|
+
task.status = "canceled";
|
|
375
|
+
task.completedAt = this.now();
|
|
376
|
+
this.inFlight -= 1;
|
|
377
|
+
this.dispatch().catch(() => undefined);
|
|
378
|
+
return;
|
|
379
|
+
}
|
|
380
|
+
if (this.isSellerRateLimited(seller.id)) {
|
|
381
|
+
logger.debug("prewarm.seller_skipped", "seller probe skipped due to per-seller rate limit", {
|
|
382
|
+
taskId: task.id,
|
|
383
|
+
sellerId: seller.id
|
|
384
|
+
});
|
|
385
|
+
continue;
|
|
386
|
+
}
|
|
387
|
+
probedAny = true;
|
|
388
|
+
this.recordProbeAttempt();
|
|
389
|
+
const probeSignal = composeProbeSignal(signal);
|
|
390
|
+
let result: ProbeResult;
|
|
391
|
+
try {
|
|
392
|
+
result = await this.prober(seller, probeSignal.signal);
|
|
393
|
+
} catch (err) {
|
|
394
|
+
// Prober rejected (typically because of `stop()` aborting the probe
|
|
395
|
+
// signal). Treat the rejection as a canceled run and exit early.
|
|
396
|
+
if (signal?.aborted) {
|
|
397
|
+
task.status = "canceled";
|
|
398
|
+
task.errorMessage = err instanceof Error ? err.message : String(err);
|
|
399
|
+
task.completedAt = this.now();
|
|
400
|
+
this.inFlight -= 1;
|
|
401
|
+
this.dispatch().catch(() => undefined);
|
|
402
|
+
return;
|
|
403
|
+
}
|
|
404
|
+
// An unexpected prober error is recorded as a per-seller failure
|
|
405
|
+
// and the loop continues with the next seller.
|
|
406
|
+
logger.error("prewarm.probe.threw", "seller prober threw unexpectedly", {
|
|
407
|
+
taskId: task.id,
|
|
408
|
+
sellerId: seller.id,
|
|
409
|
+
modelId: task.modelId,
|
|
410
|
+
errorMessage: err instanceof Error ? err.message : String(err)
|
|
411
|
+
});
|
|
412
|
+
candidates.push({
|
|
413
|
+
sellerId: seller.id,
|
|
414
|
+
url: seller.url,
|
|
415
|
+
healthScore: 0,
|
|
416
|
+
lastSuccessAt: 0,
|
|
417
|
+
lastFailAt: this.now(),
|
|
418
|
+
avgLatencyMs: 0
|
|
419
|
+
});
|
|
420
|
+
continue;
|
|
421
|
+
}
|
|
422
|
+
if (signal?.aborted) {
|
|
423
|
+
task.status = "canceled";
|
|
424
|
+
task.completedAt = this.now();
|
|
425
|
+
this.lastProbeAtBySeller.set(seller.id, this.now());
|
|
426
|
+
this.inFlight -= 1;
|
|
427
|
+
this.dispatch().catch(() => undefined);
|
|
428
|
+
return;
|
|
429
|
+
}
|
|
430
|
+
this.lastProbeAtBySeller.set(seller.id, this.now());
|
|
431
|
+
if (result.ok) {
|
|
432
|
+
anyOk = true;
|
|
433
|
+
candidates.push({
|
|
434
|
+
sellerId: seller.id,
|
|
435
|
+
url: seller.url,
|
|
436
|
+
healthScore: scoreFromLatency(result.latencyMs),
|
|
437
|
+
lastSuccessAt: this.now(),
|
|
438
|
+
lastFailAt: 0,
|
|
439
|
+
avgLatencyMs: result.latencyMs
|
|
440
|
+
});
|
|
441
|
+
logger.info("prewarm.succeeded", "seller probe succeeded", {
|
|
442
|
+
taskId: task.id,
|
|
443
|
+
sellerId: seller.id,
|
|
444
|
+
modelId: task.modelId,
|
|
445
|
+
latencyMs: result.latencyMs,
|
|
446
|
+
httpStatus: result.httpStatus
|
|
447
|
+
});
|
|
448
|
+
} else {
|
|
449
|
+
candidates.push({
|
|
450
|
+
sellerId: seller.id,
|
|
451
|
+
url: seller.url,
|
|
452
|
+
healthScore: 0,
|
|
453
|
+
lastSuccessAt: 0,
|
|
454
|
+
lastFailAt: this.now(),
|
|
455
|
+
avgLatencyMs: result.latencyMs
|
|
456
|
+
});
|
|
457
|
+
logger.warn("prewarm.failed", "seller probe failed", {
|
|
458
|
+
taskId: task.id,
|
|
459
|
+
sellerId: seller.id,
|
|
460
|
+
modelId: task.modelId,
|
|
461
|
+
errorMessage: result.errorMessage,
|
|
462
|
+
httpStatus: result.httpStatus
|
|
463
|
+
});
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
if (!probedAny) {
|
|
468
|
+
// Every seller was rate-limited; the task is a no-op. Preserve the
|
|
469
|
+
// prior cache entry untouched and report the task as a no-op success
|
|
470
|
+
// so callers do not see a transient failure.
|
|
471
|
+
task.status = "succeeded";
|
|
472
|
+
task.completedAt = this.now();
|
|
473
|
+
this.totalSucceeded += 1;
|
|
474
|
+
this.inFlight -= 1;
|
|
475
|
+
this.dispatch().catch(() => undefined);
|
|
476
|
+
return;
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// Filter out hard failures so the cache only stores reachable sellers.
|
|
480
|
+
const viable = candidates.filter((c) => c.healthScore > 0);
|
|
481
|
+
this.cache.commitWarm({
|
|
482
|
+
modelId: task.modelId,
|
|
483
|
+
protocol: task.protocol,
|
|
484
|
+
paymentMethod: task.paymentMethod,
|
|
485
|
+
candidates: viable
|
|
486
|
+
});
|
|
487
|
+
|
|
488
|
+
if (!anyOk) {
|
|
489
|
+
// No seller responded; record failure for the (model, protocol, payment)
|
|
490
|
+
// entry so the scheduler can back off.
|
|
491
|
+
this.cache.recordFailure(task.modelId, task.protocol, task.paymentMethod, "all probes failed");
|
|
492
|
+
task.status = "failed";
|
|
493
|
+
task.errorMessage = "all probes failed";
|
|
494
|
+
this.totalFailed += 1;
|
|
495
|
+
} else {
|
|
496
|
+
task.status = "succeeded";
|
|
497
|
+
this.totalSucceeded += 1;
|
|
498
|
+
}
|
|
499
|
+
task.completedAt = this.now();
|
|
500
|
+
|
|
501
|
+
// Free a slot and keep dispatching.
|
|
502
|
+
this.inFlight -= 1;
|
|
503
|
+
this.dispatch().catch(() => undefined);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
private isOverBudget(): boolean {
|
|
507
|
+
return this.recentProbesInLastMinute() >= this.maxPrewarmPerMinute;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
private recentProbesInLastMinute(): number {
|
|
511
|
+
const cutoff = this.now() - 60_000;
|
|
512
|
+
while (this.recentProbes.length > 0 && this.recentProbes[0] < cutoff) {
|
|
513
|
+
this.recentProbes.shift();
|
|
514
|
+
}
|
|
515
|
+
return this.recentProbes.length;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
private recordProbeAttempt(): void {
|
|
519
|
+
this.recentProbes.push(this.now());
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
private isSellerRateLimited(sellerId: string): boolean {
|
|
523
|
+
const last = this.lastProbeAtBySeller.get(sellerId);
|
|
524
|
+
if (last === undefined) {
|
|
525
|
+
return false;
|
|
526
|
+
}
|
|
527
|
+
return this.now() - last < this.perSellerMinIntervalMs;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
function defaultSleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
532
|
+
return new Promise<void>((resolve, reject) => {
|
|
533
|
+
if (signal?.aborted) {
|
|
534
|
+
reject(new Error("aborted"));
|
|
535
|
+
return;
|
|
536
|
+
}
|
|
537
|
+
const timer = setTimeout(() => {
|
|
538
|
+
cleanup();
|
|
539
|
+
resolve();
|
|
540
|
+
}, ms);
|
|
541
|
+
const onAbort = () => {
|
|
542
|
+
cleanup();
|
|
543
|
+
reject(new Error("aborted"));
|
|
544
|
+
};
|
|
545
|
+
const cleanup = () => {
|
|
546
|
+
clearTimeout(timer);
|
|
547
|
+
signal?.removeEventListener("abort", onAbort);
|
|
548
|
+
};
|
|
549
|
+
signal?.addEventListener("abort", onAbort, { once: true });
|
|
550
|
+
});
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
/**
|
|
554
|
+
* Build a per-probe abort signal that mirrors the scheduler's overall abort
|
|
555
|
+
* signal. Probers receive this scoped signal so aborting the scheduler
|
|
556
|
+
* propagates into any in-flight HTTP request (typically wired through
|
|
557
|
+
* `fetch(..., { signal })`).
|
|
558
|
+
*/
|
|
559
|
+
function composeProbeSignal(parent: AbortSignal | undefined): { signal: AbortSignal; abort(reason?: unknown): void } {
|
|
560
|
+
const controller = new AbortController();
|
|
561
|
+
if (parent) {
|
|
562
|
+
if (parent.aborted) {
|
|
563
|
+
controller.abort(parent.reason);
|
|
564
|
+
} else {
|
|
565
|
+
parent.addEventListener("abort", () => controller.abort(parent.reason), { once: true });
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
return controller;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
function scoreFromLatency(latencyMs: number): number {
|
|
572
|
+
if (!Number.isFinite(latencyMs) || latencyMs < 0) {
|
|
573
|
+
return 30;
|
|
574
|
+
}
|
|
575
|
+
if (latencyMs <= 100) return 100;
|
|
576
|
+
if (latencyMs <= 300) return 90;
|
|
577
|
+
if (latencyMs <= 800) return 75;
|
|
578
|
+
if (latencyMs <= 1500) return 60;
|
|
579
|
+
if (latencyMs <= 3000) return 40;
|
|
580
|
+
return 20;
|
|
581
|
+
}
|
package/src/provider-install.ts
CHANGED
|
@@ -469,7 +469,12 @@ function opencodeConfig(home: string, proxyUrl: string, config: ProviderRuntimeC
|
|
|
469
469
|
: {};
|
|
470
470
|
providers.tokenbuddy = {
|
|
471
471
|
name: "TokenBuddy",
|
|
472
|
-
|
|
472
|
+
// v1.0.10 起默认走 OpenAI Responses API 协议(/v1/responses)。
|
|
473
|
+
// 验证:之前默认 @ai-sdk/openai(chat completions)也能 work,但 Responses API
|
|
474
|
+
// 才是上游(code.shoestravel.xin 等)原生支持的 SSE 事件链,type 字段更标准
|
|
475
|
+
// (response.created / response.output_text.delta / response.completed),
|
|
476
|
+
// 让 buyer 端 SseUsageExtractor 能稳定 parse usage 字段。
|
|
477
|
+
npm: "@ai-sdk/openai-responses",
|
|
473
478
|
options: {
|
|
474
479
|
apiKey: PROXY_ACCESS_TOKEN_PLACEHOLDER,
|
|
475
480
|
baseURL: openAiBaseUrl(proxyUrl),
|
|
@@ -483,6 +488,9 @@ function opencodeConfig(home: string, proxyUrl: string, config: ProviderRuntimeC
|
|
|
483
488
|
},
|
|
484
489
|
};
|
|
485
490
|
current.provider = providers;
|
|
491
|
+
// 写顶层 model / small_model,让 opencode 默认走 tokenbuddy 而不是残留的 openai/qwen-plus 死链
|
|
492
|
+
current.model = `tokenbuddy/${model}`;
|
|
493
|
+
current.small_model = `tokenbuddy/${model}`;
|
|
486
494
|
return [makeChange("opencode", configPath, "configure OpenCode provider for TokenBuddy proxy", jsonContent(current))];
|
|
487
495
|
}
|
|
488
496
|
|