@tokenbuddy/tokenbuddy 1.0.9 → 1.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/src/buyer-store.d.ts +13 -0
  2. package/dist/src/buyer-store.d.ts.map +1 -1
  3. package/dist/src/buyer-store.js +21 -2
  4. package/dist/src/buyer-store.js.map +1 -1
  5. package/dist/src/cli.d.ts.map +1 -1
  6. package/dist/src/cli.js +54 -0
  7. package/dist/src/cli.js.map +1 -1
  8. package/dist/src/credit-tracker.d.ts +118 -0
  9. package/dist/src/credit-tracker.d.ts.map +1 -0
  10. package/dist/src/credit-tracker.js +220 -0
  11. package/dist/src/credit-tracker.js.map +1 -0
  12. package/dist/src/daemon.d.ts +49 -4
  13. package/dist/src/daemon.d.ts.map +1 -1
  14. package/dist/src/daemon.js +541 -405
  15. package/dist/src/daemon.js.map +1 -1
  16. package/dist/src/model-index.d.ts +86 -0
  17. package/dist/src/model-index.d.ts.map +1 -0
  18. package/dist/src/model-index.js +214 -0
  19. package/dist/src/model-index.js.map +1 -0
  20. package/dist/src/prewarm-cache.d.ts +149 -0
  21. package/dist/src/prewarm-cache.d.ts.map +1 -0
  22. package/dist/src/prewarm-cache.js +288 -0
  23. package/dist/src/prewarm-cache.js.map +1 -0
  24. package/dist/src/prewarm-scheduler.d.ts +150 -0
  25. package/dist/src/prewarm-scheduler.d.ts.map +1 -0
  26. package/dist/src/prewarm-scheduler.js +484 -0
  27. package/dist/src/prewarm-scheduler.js.map +1 -0
  28. package/dist/src/provider-install.d.ts.map +1 -1
  29. package/dist/src/provider-install.js +9 -1
  30. package/dist/src/provider-install.js.map +1 -1
  31. package/dist/src/route-failover.d.ts +96 -0
  32. package/dist/src/route-failover.d.ts.map +1 -0
  33. package/dist/src/route-failover.js +177 -0
  34. package/dist/src/route-failover.js.map +1 -0
  35. package/dist/src/seller-catalog.d.ts +26 -0
  36. package/dist/src/seller-catalog.d.ts.map +1 -1
  37. package/dist/src/seller-catalog.js +40 -0
  38. package/dist/src/seller-catalog.js.map +1 -1
  39. package/dist/src/seller-pool.d.ts +127 -0
  40. package/dist/src/seller-pool.d.ts.map +1 -0
  41. package/dist/src/seller-pool.js +243 -0
  42. package/dist/src/seller-pool.js.map +1 -0
  43. package/dist/src/stream-failover.d.ts +78 -0
  44. package/dist/src/stream-failover.d.ts.map +1 -0
  45. package/dist/src/stream-failover.js +93 -0
  46. package/dist/src/stream-failover.js.map +1 -0
  47. package/package.json +1 -1
  48. package/src/buyer-store.ts +32 -2
  49. package/src/cli.ts +61 -0
  50. package/src/credit-tracker.test.ts +165 -0
  51. package/src/credit-tracker.ts +269 -0
  52. package/src/daemon.ts +569 -445
  53. package/src/model-index.test.ts +184 -0
  54. package/src/model-index.ts +266 -0
  55. package/src/prewarm-cache.test.ts +281 -0
  56. package/src/prewarm-cache.ts +373 -0
  57. package/src/prewarm-scheduler.test.ts +367 -0
  58. package/src/prewarm-scheduler.ts +581 -0
  59. package/src/provider-install.ts +9 -1
  60. package/src/route-failover.test.ts +193 -0
  61. package/src/route-failover.ts +233 -0
  62. package/src/seller-catalog-413.test.ts +61 -0
  63. package/src/seller-catalog.ts +47 -0
  64. package/src/seller-pool.test.ts +231 -0
  65. package/src/seller-pool.ts +333 -0
  66. package/src/stream-failover.test.ts +52 -0
  67. package/src/stream-failover.ts +129 -0
  68. package/src/thousand-seller.test.ts +151 -0
  69. package/tests/daemon-413-fallback.test.ts +92 -0
  70. package/tests/e2e.test.ts +3 -2
  71. package/tests/tokenbuddy.test.ts +68 -11
@@ -0,0 +1,581 @@
1
+ import { createModuleLogger } from "@tokenbuddy/logging";
2
+ import type { RegistrySeller } from "./seller-catalog.js";
3
+ import type { ModelIndex } from "./model-index.js";
4
+ import type { PrewarmCache, PrewarmCandidate } from "./prewarm-cache.js";
5
+
6
+ const logger = createModuleLogger("tb-proxyd:prewarm-scheduler");
7
+
8
+ export type PrewarmReason = "startup" | "lazy" | "idle" | "explicit";
9
+
10
+ export interface ProbeResult {
11
+ ok: boolean;
12
+ latencyMs: number;
13
+ httpStatus?: number;
14
+ errorMessage?: string;
15
+ }
16
+
17
+ /**
18
+ * The probe function used by the scheduler. Decoupled so the scheduler can
19
+ * be unit-tested without spinning up HTTP servers. The default
20
+ * implementation in `health-probe.ts` (PR-2/PR-3) calls
21
+ * `GET <seller.url>/healthz` with a 3s `AbortSignal.timeout`. Probers must
22
+ * observe the provided `AbortSignal` and reject when it aborts so the
23
+ * scheduler can short-circuit in-flight probes on `stop()`.
24
+ */
25
+ export type SellerProber = (seller: RegistrySeller, signal: AbortSignal) => Promise<ProbeResult>;
26
+
27
+ export interface PrewarmSchedulerOptions {
28
+ modelIndex: ModelIndex;
29
+ cache: PrewarmCache;
30
+ prober: SellerProber;
31
+ // Limits (defaults match buyer-driven-fallback-design.md §18.6).
32
+ concurrency?: number;
33
+ perSellerMinIntervalMs?: number;
34
+ maxPrewarmPerMinute?: number;
35
+ // Idle loop cadence; the scheduler can also be driven externally
36
+ // (PR-2.1 wires `tickIdle` into the existing registry-loop heartbeat).
37
+ idleIntervalMs?: number;
38
+ // Startup jitter (5-10s by default per §18.5.1).
39
+ startupJitterMinMs?: number;
40
+ startupJitterMaxMs?: number;
41
+ // Hooks for testing; defaults to Node's setTimeout / setImmediate.
42
+ sleep?: (ms: number, signal?: AbortSignal) => Promise<void>;
43
+ random?: () => number;
44
+ now?: () => number;
45
+ // Optional filter applied to every probe (e.g. preferred protocol).
46
+ protocol?: string;
47
+ paymentMethod?: string;
48
+ }
49
+
50
+ interface PrewarmTask {
51
+ id: number;
52
+ modelId: string;
53
+ reason: PrewarmReason;
54
+ protocol: string;
55
+ paymentMethod: string;
56
+ enqueuedAt: number;
57
+ sellerIds: string[];
58
+ startedAt?: number;
59
+ completedAt?: number;
60
+ status: "queued" | "running" | "succeeded" | "failed" | "canceled" | "rate_limited";
61
+ errorMessage?: string;
62
+ }
63
+
64
+ export interface PrewarmSchedulerStats {
65
+ queueDepth: number;
66
+ inFlight: number;
67
+ totalScheduled: number;
68
+ totalSucceeded: number;
69
+ totalFailed: number;
70
+ totalRateLimited: number;
71
+ recentProbesInLastMinute: number;
72
+ concurrency: number;
73
+ maxPrewarmPerMinute: number;
74
+ }
75
+
76
+ /**
77
+ * Background scheduler that warms up sellers for a (model, protocol,
78
+ * payment) triple on demand. The scheduler owns:
79
+ * - queue management with bounded concurrency (default 4)
80
+ * - per-seller rate limiting (default 30s between probes to the same
81
+ * seller, even across different models)
82
+ * - global rate limiting (default 30 probes/minute)
83
+ * - jitter on startup and between probes to avoid thundering herds
84
+ *
85
+ * The scheduler does NOT own HTTP I/O; that lives in the injected
86
+ * `prober` so tests can swap in a deterministic stub.
87
+ */
88
+ export class PrewarmScheduler {
89
+ private readonly modelIndex: ModelIndex;
90
+ private readonly cache: PrewarmCache;
91
+ private readonly prober: SellerProber;
92
+
93
+ private readonly concurrency: number;
94
+ private readonly perSellerMinIntervalMs: number;
95
+ private readonly maxPrewarmPerMinute: number;
96
+ private readonly idleIntervalMs: number;
97
+ private readonly startupJitterMinMs: number;
98
+ private readonly startupJitterMaxMs: number;
99
+ private readonly sleep: (ms: number, signal?: AbortSignal) => Promise<void>;
100
+ private readonly random: () => number;
101
+ private readonly now: () => number;
102
+ private readonly protocol: string | undefined;
103
+ private readonly paymentMethod: string | undefined;
104
+
105
+ private readonly queue: PrewarmTask[] = [];
106
+ private inFlight = 0;
107
+ private recentProbes: number[] = [];
108
+ private lastProbeAtBySeller = new Map<string, number>();
109
+ private nextTaskId = 1;
110
+
111
+ private totalScheduled = 0;
112
+ private totalSucceeded = 0;
113
+ private totalFailed = 0;
114
+ private totalRateLimited = 0;
115
+
116
+ private abortController: AbortController | null = null;
117
+ private idleLoopPromise: Promise<void> | null = null;
118
+
119
+ constructor(options: PrewarmSchedulerOptions) {
120
+ this.modelIndex = options.modelIndex;
121
+ this.cache = options.cache;
122
+ this.prober = options.prober;
123
+ this.concurrency = options.concurrency ?? 4;
124
+ this.perSellerMinIntervalMs = options.perSellerMinIntervalMs ?? 30_000;
125
+ this.maxPrewarmPerMinute = options.maxPrewarmPerMinute ?? 30;
126
+ this.idleIntervalMs = options.idleIntervalMs ?? 60_000;
127
+ this.startupJitterMinMs = options.startupJitterMinMs ?? 5_000;
128
+ this.startupJitterMaxMs = options.startupJitterMaxMs ?? 10_000;
129
+ this.sleep = options.sleep ?? defaultSleep;
130
+ this.random = options.random ?? Math.random;
131
+ this.now = options.now ?? Date.now;
132
+ this.protocol = options.protocol;
133
+ this.paymentMethod = options.paymentMethod;
134
+ }
135
+
136
+ /**
137
+ * Start the background idle loop. Safe to call once per scheduler
138
+ * instance; subsequent calls are no-ops. The idle loop probes any cached
139
+ * entry whose TTL is within 10% of expiry (`isExpiringSoon`).
140
+ */
141
+ start(): void {
142
+ if (this.abortController) {
143
+ return;
144
+ }
145
+ this.abortController = new AbortController();
146
+ this.idleLoopPromise = this.runIdleLoop(this.abortController.signal);
147
+ }
148
+
149
+ /**
150
+ * Cancel the idle loop and any pending tasks. Existing `inFlight` probes
151
+ * are not aborted (the prober owns its own timeout) but will not be
152
+ * dispatched to the cache.
153
+ */
154
+ async stop(): Promise<void> {
155
+ if (!this.abortController) {
156
+ return;
157
+ }
158
+ this.abortController.abort();
159
+ this.abortController = null;
160
+ if (this.idleLoopPromise) {
161
+ await this.idleLoopPromise.catch(() => undefined);
162
+ this.idleLoopPromise = null;
163
+ }
164
+ // Mark queued tasks as canceled so callers awaiting them can short-circuit.
165
+ for (const task of this.queue) {
166
+ task.status = "canceled";
167
+ }
168
+ this.queue.length = 0;
169
+ }
170
+
171
+ /**
172
+ * Enqueue a prewarm for a (model, protocol, payment) triple. The
173
+ * `reason` controls how aggressively the scheduler resolves candidates
174
+ * (e.g. `startup` defers; `lazy` waits on the returned promise). The
175
+ * returned promise resolves with the final task status once the queue
176
+ * drains or the scheduler is stopped.
177
+ */
178
+ schedulePrewarm(input: {
179
+ modelId: string;
180
+ reason: PrewarmReason;
181
+ protocol?: string;
182
+ paymentMethod?: string;
183
+ blockOnFirst?: boolean;
184
+ }): Promise<PrewarmTask> {
185
+ const protocol = input.protocol ?? this.protocol ?? "chat_completions";
186
+ const paymentMethod = input.paymentMethod ?? this.paymentMethod ?? "clawtip";
187
+ const task: PrewarmTask = {
188
+ id: this.nextTaskId++,
189
+ modelId: input.modelId,
190
+ reason: input.reason,
191
+ protocol,
192
+ paymentMethod,
193
+ enqueuedAt: this.now(),
194
+ sellerIds: [],
195
+ status: "queued"
196
+ };
197
+ this.queue.push(task);
198
+ this.totalScheduled += 1;
199
+ logger.info("prewarm.scheduled", "prewarm task enqueued", {
200
+ taskId: task.id,
201
+ modelId: task.modelId,
202
+ reason: task.reason,
203
+ protocol,
204
+ paymentMethod,
205
+ queueDepth: this.queue.length
206
+ });
207
+
208
+ // Fire-and-forget dispatch; the awaiter observes `task.status` via
209
+ // `taskResolved` rather than blocking the queue.
210
+ this.dispatch().catch((err) => {
211
+ logger.error("prewarm.dispatch.unexpected", "dispatcher threw unexpectedly", {
212
+ errorMessage: err instanceof Error ? err.message : String(err)
213
+ });
214
+ });
215
+
216
+ return new Promise<PrewarmTask>((resolve) => {
217
+ const check = () => {
218
+ if (task.status === "succeeded" || task.status === "failed" || task.status === "canceled" || task.status === "rate_limited") {
219
+ resolve(task);
220
+ } else {
221
+ setImmediate(check);
222
+ }
223
+ };
224
+ check();
225
+ });
226
+ }
227
+
228
+ /**
229
+ * Run a one-shot sweep that probes every focus-set model. Used by the
230
+ * `tb doctor --prewarm` explicit trigger and by the startup hook after
231
+ * the configured jitter window. Resolves once every scheduled task has
232
+ * reached a terminal state.
233
+ */
234
+ async runStartupPrewarm(modelIds: string[]): Promise<void> {
235
+ await this.sleep(this.jitterMs(), this.abortController?.signal);
236
+ if (this.abortController?.signal.aborted) {
237
+ return;
238
+ }
239
+ const tasks = modelIds.map((modelId) =>
240
+ this.schedulePrewarm({ modelId, reason: "startup" })
241
+ );
242
+ await Promise.all(tasks);
243
+ }
244
+
245
+ /**
246
+ * Force a sweep of any cache key whose TTL is about to expire. Returns
247
+ * the number of tasks that were enqueued. Intended to be called from
248
+ * the registry loop's heartbeat (replaces the v1 "all sellers" probe
249
+ * cycle with "only the ones we are about to forget").
250
+ */
251
+ tickIdle(): number {
252
+ const expiring = this.cache
253
+ .snapshot()
254
+ .filter((entry) => this.cache.isExpiringSoon(entry.modelId, entry.protocol, entry.paymentMethod, 60_000));
255
+ if (expiring.length === 0) {
256
+ return 0;
257
+ }
258
+ for (const entry of expiring) {
259
+ this.schedulePrewarm({
260
+ modelId: entry.modelId,
261
+ protocol: entry.protocol,
262
+ paymentMethod: entry.paymentMethod,
263
+ reason: "idle"
264
+ });
265
+ }
266
+ return expiring.length;
267
+ }
268
+
269
+ stats(): PrewarmSchedulerStats {
270
+ const now = this.now();
271
+ const cutoff = now - 60_000;
272
+ const recentProbesInLastMinute = this.recentProbes.filter((ts) => ts >= cutoff).length;
273
+ return {
274
+ queueDepth: this.queue.length,
275
+ inFlight: this.inFlight,
276
+ totalScheduled: this.totalScheduled,
277
+ totalSucceeded: this.totalSucceeded,
278
+ totalFailed: this.totalFailed,
279
+ totalRateLimited: this.totalRateLimited,
280
+ recentProbesInLastMinute,
281
+ concurrency: this.concurrency,
282
+ maxPrewarmPerMinute: this.maxPrewarmPerMinute
283
+ };
284
+ }
285
+
286
+ private jitterMs(): number {
287
+ const span = Math.max(0, this.startupJitterMaxMs - this.startupJitterMinMs);
288
+ return this.startupJitterMinMs + Math.floor(this.random() * span);
289
+ }
290
+
291
+ private async runIdleLoop(signal: AbortSignal): Promise<void> {
292
+ while (!signal.aborted) {
293
+ try {
294
+ await this.sleep(this.idleIntervalMs, signal);
295
+ } catch {
296
+ return;
297
+ }
298
+ if (signal.aborted) {
299
+ return;
300
+ }
301
+ try {
302
+ this.tickIdle();
303
+ } catch (err) {
304
+ logger.error("prewarm.idle.failed", "idle tick threw unexpectedly", {
305
+ errorMessage: err instanceof Error ? err.message : String(err)
306
+ });
307
+ }
308
+ }
309
+ }
310
+
311
+ private async dispatch(): Promise<void> {
312
+ while (this.queue.length > 0 && this.inFlight < this.concurrency) {
313
+ const task = this.queue.shift();
314
+ if (!task) {
315
+ break;
316
+ }
317
+ if (task.status === "canceled") {
318
+ continue;
319
+ }
320
+ if (this.isOverBudget()) {
321
+ task.status = "rate_limited";
322
+ this.totalRateLimited += 1;
323
+ logger.warn("prewarm.rate_limited", "global per-minute probe budget exhausted", {
324
+ taskId: task.id,
325
+ modelId: task.modelId,
326
+ recentProbes: this.recentProbesInLastMinute()
327
+ });
328
+ continue;
329
+ }
330
+ const sellers = this.modelIndex.sellersFor(task.modelId, {
331
+ protocol: task.protocol,
332
+ paymentMethod: task.paymentMethod
333
+ });
334
+ task.sellerIds = sellers.map((s) => s.id);
335
+ if (sellers.length === 0) {
336
+ task.status = "failed";
337
+ task.errorMessage = "no sellers for model";
338
+ task.completedAt = this.now();
339
+ this.totalFailed += 1;
340
+ logger.warn("prewarm.no_sellers", "no registry sellers match model", {
341
+ taskId: task.id,
342
+ modelId: task.modelId,
343
+ protocol: task.protocol,
344
+ paymentMethod: task.paymentMethod
345
+ });
346
+ continue;
347
+ }
348
+ // Ensure an abort controller exists so `stop()` works even when the
349
+ // caller never invoked `start()`. `start()` is otherwise responsible
350
+ // for the idle loop; dispatch only borrows the controller for
351
+ // short-lived abort propagation.
352
+ if (!this.abortController) {
353
+ this.abortController = new AbortController();
354
+ }
355
+ this.inFlight += 1;
356
+ task.status = "running";
357
+ task.startedAt = this.now();
358
+ // Capture the abort signal so an in-flight task can still observe
359
+ // `stop()` even after the controller reference is cleared.
360
+ const signal = this.abortController.signal;
361
+ // Run async without awaiting; the loop continues to dispatch.
362
+ void this.runTask(task, sellers, signal);
363
+ }
364
+ }
365
+
366
+ private async runTask(task: PrewarmTask, sellers: RegistrySeller[], signal: AbortSignal | undefined): Promise<void> {
367
+ const begin = this.cache.beginWarming(task.modelId, task.protocol, task.paymentMethod);
368
+ const candidates: PrewarmCandidate[] = [];
369
+ let anyOk = false;
370
+ let probedAny = false;
371
+
372
+ for (const seller of sellers) {
373
+ if (signal?.aborted) {
374
+ task.status = "canceled";
375
+ task.completedAt = this.now();
376
+ this.inFlight -= 1;
377
+ this.dispatch().catch(() => undefined);
378
+ return;
379
+ }
380
+ if (this.isSellerRateLimited(seller.id)) {
381
+ logger.debug("prewarm.seller_skipped", "seller probe skipped due to per-seller rate limit", {
382
+ taskId: task.id,
383
+ sellerId: seller.id
384
+ });
385
+ continue;
386
+ }
387
+ probedAny = true;
388
+ this.recordProbeAttempt();
389
+ const probeSignal = composeProbeSignal(signal);
390
+ let result: ProbeResult;
391
+ try {
392
+ result = await this.prober(seller, probeSignal.signal);
393
+ } catch (err) {
394
+ // Prober rejected (typically because of `stop()` aborting the probe
395
+ // signal). Treat the rejection as a canceled run and exit early.
396
+ if (signal?.aborted) {
397
+ task.status = "canceled";
398
+ task.errorMessage = err instanceof Error ? err.message : String(err);
399
+ task.completedAt = this.now();
400
+ this.inFlight -= 1;
401
+ this.dispatch().catch(() => undefined);
402
+ return;
403
+ }
404
+ // An unexpected prober error is recorded as a per-seller failure
405
+ // and the loop continues with the next seller.
406
+ logger.error("prewarm.probe.threw", "seller prober threw unexpectedly", {
407
+ taskId: task.id,
408
+ sellerId: seller.id,
409
+ modelId: task.modelId,
410
+ errorMessage: err instanceof Error ? err.message : String(err)
411
+ });
412
+ candidates.push({
413
+ sellerId: seller.id,
414
+ url: seller.url,
415
+ healthScore: 0,
416
+ lastSuccessAt: 0,
417
+ lastFailAt: this.now(),
418
+ avgLatencyMs: 0
419
+ });
420
+ continue;
421
+ }
422
+ if (signal?.aborted) {
423
+ task.status = "canceled";
424
+ task.completedAt = this.now();
425
+ this.lastProbeAtBySeller.set(seller.id, this.now());
426
+ this.inFlight -= 1;
427
+ this.dispatch().catch(() => undefined);
428
+ return;
429
+ }
430
+ this.lastProbeAtBySeller.set(seller.id, this.now());
431
+ if (result.ok) {
432
+ anyOk = true;
433
+ candidates.push({
434
+ sellerId: seller.id,
435
+ url: seller.url,
436
+ healthScore: scoreFromLatency(result.latencyMs),
437
+ lastSuccessAt: this.now(),
438
+ lastFailAt: 0,
439
+ avgLatencyMs: result.latencyMs
440
+ });
441
+ logger.info("prewarm.succeeded", "seller probe succeeded", {
442
+ taskId: task.id,
443
+ sellerId: seller.id,
444
+ modelId: task.modelId,
445
+ latencyMs: result.latencyMs,
446
+ httpStatus: result.httpStatus
447
+ });
448
+ } else {
449
+ candidates.push({
450
+ sellerId: seller.id,
451
+ url: seller.url,
452
+ healthScore: 0,
453
+ lastSuccessAt: 0,
454
+ lastFailAt: this.now(),
455
+ avgLatencyMs: result.latencyMs
456
+ });
457
+ logger.warn("prewarm.failed", "seller probe failed", {
458
+ taskId: task.id,
459
+ sellerId: seller.id,
460
+ modelId: task.modelId,
461
+ errorMessage: result.errorMessage,
462
+ httpStatus: result.httpStatus
463
+ });
464
+ }
465
+ }
466
+
467
+ if (!probedAny) {
468
+ // Every seller was rate-limited; the task is a no-op. Preserve the
469
+ // prior cache entry untouched and report the task as a no-op success
470
+ // so callers do not see a transient failure.
471
+ task.status = "succeeded";
472
+ task.completedAt = this.now();
473
+ this.totalSucceeded += 1;
474
+ this.inFlight -= 1;
475
+ this.dispatch().catch(() => undefined);
476
+ return;
477
+ }
478
+
479
+ // Filter out hard failures so the cache only stores reachable sellers.
480
+ const viable = candidates.filter((c) => c.healthScore > 0);
481
+ this.cache.commitWarm({
482
+ modelId: task.modelId,
483
+ protocol: task.protocol,
484
+ paymentMethod: task.paymentMethod,
485
+ candidates: viable
486
+ });
487
+
488
+ if (!anyOk) {
489
+ // No seller responded; record failure for the (model, protocol, payment)
490
+ // entry so the scheduler can back off.
491
+ this.cache.recordFailure(task.modelId, task.protocol, task.paymentMethod, "all probes failed");
492
+ task.status = "failed";
493
+ task.errorMessage = "all probes failed";
494
+ this.totalFailed += 1;
495
+ } else {
496
+ task.status = "succeeded";
497
+ this.totalSucceeded += 1;
498
+ }
499
+ task.completedAt = this.now();
500
+
501
+ // Free a slot and keep dispatching.
502
+ this.inFlight -= 1;
503
+ this.dispatch().catch(() => undefined);
504
+ }
505
+
506
+ private isOverBudget(): boolean {
507
+ return this.recentProbesInLastMinute() >= this.maxPrewarmPerMinute;
508
+ }
509
+
510
+ private recentProbesInLastMinute(): number {
511
+ const cutoff = this.now() - 60_000;
512
+ while (this.recentProbes.length > 0 && this.recentProbes[0] < cutoff) {
513
+ this.recentProbes.shift();
514
+ }
515
+ return this.recentProbes.length;
516
+ }
517
+
518
+ private recordProbeAttempt(): void {
519
+ this.recentProbes.push(this.now());
520
+ }
521
+
522
+ private isSellerRateLimited(sellerId: string): boolean {
523
+ const last = this.lastProbeAtBySeller.get(sellerId);
524
+ if (last === undefined) {
525
+ return false;
526
+ }
527
+ return this.now() - last < this.perSellerMinIntervalMs;
528
+ }
529
+ }
530
+
531
+ function defaultSleep(ms: number, signal?: AbortSignal): Promise<void> {
532
+ return new Promise<void>((resolve, reject) => {
533
+ if (signal?.aborted) {
534
+ reject(new Error("aborted"));
535
+ return;
536
+ }
537
+ const timer = setTimeout(() => {
538
+ cleanup();
539
+ resolve();
540
+ }, ms);
541
+ const onAbort = () => {
542
+ cleanup();
543
+ reject(new Error("aborted"));
544
+ };
545
+ const cleanup = () => {
546
+ clearTimeout(timer);
547
+ signal?.removeEventListener("abort", onAbort);
548
+ };
549
+ signal?.addEventListener("abort", onAbort, { once: true });
550
+ });
551
+ }
552
+
553
+ /**
554
+ * Build a per-probe abort signal that mirrors the scheduler's overall abort
555
+ * signal. Probers receive this scoped signal so aborting the scheduler
556
+ * propagates into any in-flight HTTP request (typically wired through
557
+ * `fetch(..., { signal })`).
558
+ */
559
+ function composeProbeSignal(parent: AbortSignal | undefined): { signal: AbortSignal; abort(reason?: unknown): void } {
560
+ const controller = new AbortController();
561
+ if (parent) {
562
+ if (parent.aborted) {
563
+ controller.abort(parent.reason);
564
+ } else {
565
+ parent.addEventListener("abort", () => controller.abort(parent.reason), { once: true });
566
+ }
567
+ }
568
+ return controller;
569
+ }
570
+
571
+ function scoreFromLatency(latencyMs: number): number {
572
+ if (!Number.isFinite(latencyMs) || latencyMs < 0) {
573
+ return 30;
574
+ }
575
+ if (latencyMs <= 100) return 100;
576
+ if (latencyMs <= 300) return 90;
577
+ if (latencyMs <= 800) return 75;
578
+ if (latencyMs <= 1500) return 60;
579
+ if (latencyMs <= 3000) return 40;
580
+ return 20;
581
+ }
@@ -469,7 +469,12 @@ function opencodeConfig(home: string, proxyUrl: string, config: ProviderRuntimeC
469
469
  : {};
470
470
  providers.tokenbuddy = {
471
471
  name: "TokenBuddy",
472
- npm: "@ai-sdk/openai",
472
+ // v1.0.10 起默认走 OpenAI Responses API 协议(/v1/responses)。
473
+ // 验证:之前默认 @ai-sdk/openai(chat completions)也能 work,但 Responses API
474
+ // 才是上游(code.shoestravel.xin 等)原生支持的 SSE 事件链,type 字段更标准
475
+ // (response.created / response.output_text.delta / response.completed),
476
+ // 让 buyer 端 SseUsageExtractor 能稳定 parse usage 字段。
477
+ npm: "@ai-sdk/openai-responses",
473
478
  options: {
474
479
  apiKey: PROXY_ACCESS_TOKEN_PLACEHOLDER,
475
480
  baseURL: openAiBaseUrl(proxyUrl),
@@ -483,6 +488,9 @@ function opencodeConfig(home: string, proxyUrl: string, config: ProviderRuntimeC
483
488
  },
484
489
  };
485
490
  current.provider = providers;
491
+ // 写顶层 model / small_model,让 opencode 默认走 tokenbuddy 而不是残留的 openai/qwen-plus 死链
492
+ current.model = `tokenbuddy/${model}`;
493
+ current.small_model = `tokenbuddy/${model}`;
486
494
  return [makeChange("opencode", configPath, "configure OpenCode provider for TokenBuddy proxy", jsonContent(current))];
487
495
  }
488
496