@tokenbuddy/tokenbuddy 1.0.9 → 1.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/src/buyer-store.d.ts +13 -0
  2. package/dist/src/buyer-store.d.ts.map +1 -1
  3. package/dist/src/buyer-store.js +21 -2
  4. package/dist/src/buyer-store.js.map +1 -1
  5. package/dist/src/cli.d.ts.map +1 -1
  6. package/dist/src/cli.js +54 -0
  7. package/dist/src/cli.js.map +1 -1
  8. package/dist/src/credit-tracker.d.ts +118 -0
  9. package/dist/src/credit-tracker.d.ts.map +1 -0
  10. package/dist/src/credit-tracker.js +220 -0
  11. package/dist/src/credit-tracker.js.map +1 -0
  12. package/dist/src/daemon.d.ts +49 -4
  13. package/dist/src/daemon.d.ts.map +1 -1
  14. package/dist/src/daemon.js +541 -405
  15. package/dist/src/daemon.js.map +1 -1
  16. package/dist/src/model-index.d.ts +86 -0
  17. package/dist/src/model-index.d.ts.map +1 -0
  18. package/dist/src/model-index.js +214 -0
  19. package/dist/src/model-index.js.map +1 -0
  20. package/dist/src/prewarm-cache.d.ts +149 -0
  21. package/dist/src/prewarm-cache.d.ts.map +1 -0
  22. package/dist/src/prewarm-cache.js +288 -0
  23. package/dist/src/prewarm-cache.js.map +1 -0
  24. package/dist/src/prewarm-scheduler.d.ts +150 -0
  25. package/dist/src/prewarm-scheduler.d.ts.map +1 -0
  26. package/dist/src/prewarm-scheduler.js +484 -0
  27. package/dist/src/prewarm-scheduler.js.map +1 -0
  28. package/dist/src/provider-install.d.ts.map +1 -1
  29. package/dist/src/provider-install.js +10 -0
  30. package/dist/src/provider-install.js.map +1 -1
  31. package/dist/src/route-failover.d.ts +96 -0
  32. package/dist/src/route-failover.d.ts.map +1 -0
  33. package/dist/src/route-failover.js +177 -0
  34. package/dist/src/route-failover.js.map +1 -0
  35. package/dist/src/seller-catalog.d.ts +26 -0
  36. package/dist/src/seller-catalog.d.ts.map +1 -1
  37. package/dist/src/seller-catalog.js +40 -0
  38. package/dist/src/seller-catalog.js.map +1 -1
  39. package/dist/src/seller-pool.d.ts +127 -0
  40. package/dist/src/seller-pool.d.ts.map +1 -0
  41. package/dist/src/seller-pool.js +243 -0
  42. package/dist/src/seller-pool.js.map +1 -0
  43. package/dist/src/stream-failover.d.ts +78 -0
  44. package/dist/src/stream-failover.d.ts.map +1 -0
  45. package/dist/src/stream-failover.js +93 -0
  46. package/dist/src/stream-failover.js.map +1 -0
  47. package/package.json +1 -1
  48. package/src/buyer-store.ts +32 -2
  49. package/src/cli.ts +61 -0
  50. package/src/credit-tracker.test.ts +165 -0
  51. package/src/credit-tracker.ts +269 -0
  52. package/src/daemon.ts +569 -445
  53. package/src/model-index.test.ts +184 -0
  54. package/src/model-index.ts +266 -0
  55. package/src/prewarm-cache.test.ts +281 -0
  56. package/src/prewarm-cache.ts +373 -0
  57. package/src/prewarm-scheduler.test.ts +367 -0
  58. package/src/prewarm-scheduler.ts +581 -0
  59. package/src/provider-install.ts +10 -0
  60. package/src/route-failover.test.ts +193 -0
  61. package/src/route-failover.ts +233 -0
  62. package/src/seller-catalog-413.test.ts +61 -0
  63. package/src/seller-catalog.ts +47 -0
  64. package/src/seller-pool.test.ts +231 -0
  65. package/src/seller-pool.ts +333 -0
  66. package/src/stream-failover.test.ts +52 -0
  67. package/src/stream-failover.ts +129 -0
  68. package/src/thousand-seller.test.ts +151 -0
  69. package/tests/daemon-413-fallback.test.ts +92 -0
  70. package/tests/e2e.test.ts +3 -2
  71. package/tests/tokenbuddy.test.ts +70 -11
@@ -0,0 +1,231 @@
1
+ import { CreditTracker } from "../src/credit-tracker.js";
2
+ import { ModelIndex } from "../src/model-index.js";
3
+ import { PrewarmCache } from "../src/prewarm-cache.js";
4
+ import { SellerPool, type FailureKind } from "../src/seller-pool.js";
5
+ import type { RegistrySeller } from "../src/seller-catalog.js";
6
+
7
+ function makeSeller(overrides: Partial<RegistrySeller> & { id: string; models?: string[] }): RegistrySeller {
8
+ return {
9
+ id: overrides.id,
10
+ name: overrides.name ?? overrides.id,
11
+ url: overrides.url ?? `https://${overrides.id}.example.com`,
12
+ supportedProtocols: overrides.supportedProtocols ?? ["chat_completions"],
13
+ paymentMethods: overrides.paymentMethods ?? ["clawtip"],
14
+ models: overrides.models
15
+ };
16
+ }
17
+
18
+ function makeClock(start = 1_000_000): { now: number; advance: (ms: number) => void } {
19
+ const clock = { now: start, advance: (ms: number) => { clock.now += ms; } };
20
+ return clock;
21
+ }
22
+
23
+ function build(seed: { id: string; healthScore?: number }[] = []): { index: ModelIndex; cache: PrewarmCache; credit: CreditTracker; sellers: RegistrySeller[] } {
24
+ const sellers: RegistrySeller[] = seed.map((s) =>
25
+ makeSeller({ id: s.id, models: ["gpt-4o"] })
26
+ );
27
+ const index = new ModelIndex();
28
+ index.rebuild(sellers, { registryVersion: 1, defaultSellerId: seed[0]?.id });
29
+ const cache = new PrewarmCache();
30
+ cache.commitWarm({
31
+ modelId: "gpt-4o",
32
+ protocol: "chat_completions",
33
+ paymentMethod: "clawtip",
34
+ candidates: seed.map((s) => ({
35
+ sellerId: s.id,
36
+ url: `https://${s.id}.example.com`,
37
+ healthScore: s.healthScore ?? 80
38
+ }))
39
+ });
40
+ const credit = new CreditTracker();
41
+ return { index, cache, credit, sellers };
42
+ }
43
+
44
+ describe("SellerPool", () => {
45
+ test("sync rebuilds entries from the prewarm cache", () => {
46
+ const ctx = build([{ id: "s1" }, { id: "s2" }]);
47
+ const pool = new SellerPool({
48
+ modelIndex: ctx.index,
49
+ cache: ctx.cache,
50
+ creditTracker: ctx.credit
51
+ });
52
+ const size = pool.sync();
53
+ expect(size).toBe(2);
54
+ expect(pool.snapshot().map((e) => e.sellerId).sort()).toEqual(["s1", "s2"]);
55
+ });
56
+
57
+ test("sync drops entries whose seller disappeared from the registry index", () => {
58
+ const ctx = build([{ id: "s1" }, { id: "s2" }]);
59
+ const pool = new SellerPool({
60
+ modelIndex: ctx.index,
61
+ cache: ctx.cache,
62
+ creditTracker: ctx.credit
63
+ });
64
+ pool.sync();
65
+ expect(pool.size()).toBe(2);
66
+
67
+ // Mutate the index so only s1 remains.
68
+ ctx.index.rebuild([makeSeller({ id: "s1", models: ["gpt-4o"] })], { registryVersion: 2 });
69
+ const size = pool.sync();
70
+ expect(size).toBe(1);
71
+ expect(pool.snapshot().map((e) => e.sellerId)).toEqual(["s1"]);
72
+ });
73
+
74
+ test("pick returns candidates sorted by health score and skips open circuit entries", () => {
75
+ const ctx = build([{ id: "s1", healthScore: 90 }, { id: "s2", healthScore: 50 }, { id: "s3", healthScore: 70 }]);
76
+ const pool = new SellerPool({ modelIndex: ctx.index, cache: ctx.cache, creditTracker: ctx.credit });
77
+ pool.sync();
78
+
79
+ const result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
80
+ expect(result.candidates.map((c) => c.entry.sellerId)).toEqual(["s1", "s3", "s2"]);
81
+
82
+ // Force s1 into the open circuit and verify it is skipped.
83
+ pool.markOpen("s1", "registry_gone");
84
+ const filtered = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
85
+ expect(filtered.candidates.map((c) => c.entry.sellerId)).toEqual(["s3", "s2"]);
86
+ });
87
+
88
+ test("half_open recycle happens after openStateMs has elapsed", () => {
89
+ const clock = makeClock();
90
+ const ctx = build([{ id: "s1" }]);
91
+ const pool = new SellerPool({
92
+ modelIndex: ctx.index,
93
+ cache: ctx.cache,
94
+ creditTracker: ctx.credit,
95
+ failureThreshold: 1, // open after the very first failure to keep the test focused
96
+ openStateMs: 1000,
97
+ now: () => clock.now
98
+ });
99
+ pool.sync();
100
+ pool.recordFailure("s1", "soft_5xx");
101
+ expect(pool.snapshot()[0].circuit).toBe("open");
102
+
103
+ // Within the open window: still skipped.
104
+ clock.advance(500);
105
+ let result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
106
+ expect(result.candidates).toEqual([]);
107
+
108
+ // After the open window: recycled to half_open and re-included.
109
+ clock.advance(600);
110
+ result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
111
+ expect(result.candidates.map((c) => c.entry.sellerId)).toEqual(["s1"]);
112
+ expect(pool.snapshot()[0].circuit).toBe("half_open");
113
+ });
114
+
115
+ test("recordFailure escalates to open after the configured threshold", () => {
116
+ const clock = makeClock();
117
+ const ctx = build([{ id: "s1" }]);
118
+ const pool = new SellerPool({
119
+ modelIndex: ctx.index,
120
+ cache: ctx.cache,
121
+ creditTracker: ctx.credit,
122
+ failureThreshold: 3,
123
+ now: () => clock.now
124
+ });
125
+ pool.sync();
126
+
127
+ pool.recordFailure("s1", "soft_5xx");
128
+ expect(pool.snapshot()[0].circuit).toBe("closed");
129
+ pool.recordFailure("s1", "soft_5xx");
130
+ expect(pool.snapshot()[0].circuit).toBe("closed");
131
+ pool.recordFailure("s1", "soft_5xx");
132
+ expect(pool.snapshot()[0].circuit).toBe("open");
133
+ });
134
+
135
+ test("recordSuccess closes the circuit and reports to the credit tracker", () => {
136
+ const clock = makeClock();
137
+ const ctx = build([{ id: "s1" }]);
138
+ const pool = new SellerPool({
139
+ modelIndex: ctx.index,
140
+ cache: ctx.cache,
141
+ creditTracker: ctx.credit,
142
+ failureThreshold: 1, // open after the first failure
143
+ now: () => clock.now
144
+ });
145
+ pool.sync();
146
+
147
+ ctx.credit.recordPurchase("s1", 1_000_000, 1_000_000);
148
+ pool.recordFailure("s1", "soft_5xx");
149
+ expect(pool.snapshot()[0].circuit).toBe("open");
150
+
151
+ clock.advance(31_000); // wait past the open window
152
+ pool.recordSuccess("s1", 250_000);
153
+ const entry = pool.snapshot()[0];
154
+ expect(entry.circuit).toBe("closed");
155
+ expect(entry.consecutiveFailures).toBe(0);
156
+ expect(ctx.credit.getEntry("s1")?.currentBalanceMicros).toBe(250_000);
157
+ });
158
+
159
+ test("hard failure kinds (hard_4xx, auth_invalid) immediately open the circuit and transfer leftover", () => {
160
+ const ctx = build([{ id: "s1" }]);
161
+ const pool = new SellerPool({ modelIndex: ctx.index, cache: ctx.cache, creditTracker: ctx.credit });
162
+ pool.sync();
163
+
164
+ ctx.credit.recordPurchase("s1", 1_000_000, 500_000);
165
+ const kinds: FailureKind[] = ["hard_4xx", "auth_invalid"];
166
+ for (const kind of kinds) {
167
+ ctx.credit.recordPurchase("s1", 1_000_000, 500_000);
168
+ const entry = pool.recordFailure("s1", kind, { reason: "test" });
169
+ expect(entry?.circuit).toBe("open");
170
+ }
171
+ const summary = ctx.credit.summary();
172
+ expect(summary.totalWastedMicros).toBeGreaterThan(0);
173
+ });
174
+
175
+ test("inspect surfaces freshPurchase and autoPurchaseAvailable flags", () => {
176
+ const ctx = build([{ id: "s1" }]);
177
+ const pool = new SellerPool({ modelIndex: ctx.index, cache: ctx.cache, creditTracker: ctx.credit });
178
+ pool.sync();
179
+
180
+ ctx.credit.recordPurchase("s1", 1_000_000, 1_000_000);
181
+ const info = pool.inspect("s1");
182
+ expect(info.entry?.sellerId).toBe("s1");
183
+ expect(info.freshPurchase).toBe(true);
184
+ expect(info.autoPurchaseAvailable).toBe(true);
185
+ });
186
+
187
+ test("markOpen force-opens a circuit without changing other state", () => {
188
+ const ctx = build([{ id: "s1" }]);
189
+ const pool = new SellerPool({ modelIndex: ctx.index, cache: ctx.cache, creditTracker: ctx.credit });
190
+ pool.sync();
191
+ pool.markOpen("s1", "registry_disappeared");
192
+ expect(pool.snapshot()[0].circuit).toBe("open");
193
+ });
194
+
195
+ test("pick returns an empty result when the prewarm cache has no entry for the model", () => {
196
+ const index = new ModelIndex();
197
+ index.rebuild([makeSeller({ id: "s1", models: ["gpt-4o"] })], { registryVersion: 1 });
198
+ const cache = new PrewarmCache();
199
+ const credit = new CreditTracker();
200
+ const pool = new SellerPool({ modelIndex: index, cache, creditTracker: credit });
201
+ pool.sync();
202
+
203
+ const result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
204
+ expect(result.candidates).toEqual([]);
205
+ // No prewarm has been committed for this model yet, so the pool
206
+ // surfaces "no_prewarm_candidates" (not "prewarm_cache_empty"); the
207
+ // distinction matters for the caller deciding whether to schedule a
208
+ // lazy prewarm.
209
+ expect(result.reason).toBe("no_prewarm_candidates");
210
+ });
211
+
212
+ test("pick falls back to the registry index when the cache has no entry yet", () => {
213
+ const index = new ModelIndex();
214
+ index.rebuild(
215
+ [makeSeller({ id: "s1", models: ["gpt-4o"] }), makeSeller({ id: "s2", models: ["gpt-4o"] })],
216
+ { registryVersion: 1 }
217
+ );
218
+ const cache = new PrewarmCache();
219
+ const credit = new CreditTracker();
220
+ const pool = new SellerPool({ modelIndex: index, cache, creditTracker: credit });
221
+ pool.sync();
222
+
223
+ // No probe has run yet, so pool is empty. pick should report "no candidates" and
224
+ // also surface the registry-level model resolution so the caller can decide
225
+ // whether to schedule a lazy prewarm.
226
+ const result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
227
+ expect(result.candidates).toEqual([]);
228
+ expect(result.resolved.matched).toBe(true);
229
+ expect(result.resolved.candidates).toHaveLength(2);
230
+ });
231
+ });
@@ -0,0 +1,333 @@
1
+ import { createModuleLogger } from "@tokenbuddy/logging";
2
+ import type { RegistrySeller } from "./seller-catalog.js";
3
+ import type { ModelIndex } from "./model-index.js";
4
+ import type { PrewarmCache, PrewarmCandidate, PrewarmEntry } from "./prewarm-cache.js";
5
+ import type { CreditTracker } from "./credit-tracker.js";
6
+
7
+ const logger = createModuleLogger("tb-proxyd:seller-pool");
8
+
9
+ export type CircuitState = "closed" | "half_open" | "open";
10
+
11
+ export type FailureKind =
12
+ | "hard_4xx" // 400/404/422 — the seller is wrong for this request
13
+ | "auth_invalid" // 401/403 token invalid
14
+ | "insufficient_funds" // 402
15
+ | "soft_5xx" // 429/5xx/timeout/network
16
+ | "deadline" // buyer deadline exceeded
17
+ | "stream_aborted" // upstream stream broken after first chunk
18
+ | "no_compatible"; // pool had no candidates for the request
19
+
20
+ export interface PoolEntry {
21
+ sellerId: string;
22
+ url: string;
23
+ registrySeller: RegistrySeller;
24
+ circuit: CircuitState;
25
+ consecutiveFailures: number;
26
+ recentFailures: number[]; // timestamps (ms) for sliding window
27
+ lastSuccessAt: number;
28
+ lastFailAt: number;
29
+ lastProbeAt: number;
30
+ // Source-of-truth prewarm state; the pool keeps a copy so the hot path
31
+ // can answer health questions without touching the cache map on every
32
+ // request.
33
+ healthScore: number;
34
+ avgLatencyMs: number;
35
+ }
36
+
37
+ export interface PickOptions {
38
+ modelId: string;
39
+ protocol: string;
40
+ paymentMethod: string;
41
+ limit?: number;
42
+ now?: number;
43
+ }
44
+
45
+ export interface PickResult {
46
+ candidates: Array<{ entry: PoolEntry; registrySeller: RegistrySeller }>;
47
+ reason: string;
48
+ resolved: ModelIndexResolution;
49
+ }
50
+
51
+ export interface ModelIndexResolution {
52
+ modelId: string;
53
+ matched: boolean;
54
+ candidates: RegistrySeller[];
55
+ missingModelsFlag: number;
56
+ }
57
+
58
+ export interface SellerPoolOptions {
59
+ modelIndex: ModelIndex;
60
+ cache: PrewarmCache;
61
+ creditTracker: CreditTracker;
62
+ // Circuit breaker thresholds (v1.2 §13).
63
+ failureThreshold?: number; // default 3
64
+ windowMs?: number; // default 60_000 (1m sliding window)
65
+ windowFailureRate?: number; // default 0.5
66
+ openStateMs?: number; // default 30_000
67
+ now?: () => number;
68
+ // PoolEntry -> CircuitState transition hooks for tests.
69
+ applyRegistry?: (entries: PoolEntry[], registry: RegistrySeller[]) => PoolEntry[];
70
+ }
71
+
72
+ const DEFAULTS = {
73
+ failureThreshold: 3,
74
+ windowMs: 60_000,
75
+ windowFailureRate: 0.5,
76
+ openStateMs: 30_000
77
+ };
78
+
79
+ /**
80
+ * v2 SellerPool: combines `ModelIndex` (registry index), `PrewarmCache`
81
+ * (probe results), and `CreditTracker` (balance protection) into a single
82
+ * source of truth used by the route-failover controller. The pool is
83
+ * process-local and rebuilds its entry list from the cache whenever the
84
+ * cache mutates; entries not yet present in the cache are not in the pool.
85
+ */
86
+ export class SellerPool {
87
+ private readonly modelIndex: ModelIndex;
88
+ private readonly cache: PrewarmCache;
89
+ private readonly creditTracker: CreditTracker;
90
+ private readonly failureThreshold: number;
91
+ private readonly windowMs: number;
92
+ private readonly windowFailureRate: number;
93
+ private readonly openStateMs: number;
94
+ private readonly now: () => number;
95
+
96
+ private entries = new Map<string, PoolEntry>();
97
+
98
+ constructor(options: SellerPoolOptions) {
99
+ this.modelIndex = options.modelIndex;
100
+ this.cache = options.cache;
101
+ this.creditTracker = options.creditTracker;
102
+ this.failureThreshold = options.failureThreshold ?? DEFAULTS.failureThreshold;
103
+ this.windowMs = options.windowMs ?? DEFAULTS.windowMs;
104
+ this.windowFailureRate = options.windowFailureRate ?? DEFAULTS.windowFailureRate;
105
+ this.openStateMs = options.openStateMs ?? DEFAULTS.openStateMs;
106
+ this.now = options.now ?? Date.now;
107
+ }
108
+
109
+ /**
110
+ * Rebuild entries from the current prewarm cache. Called by
111
+ * `route-failover` whenever the cache is mutated (commit, invalidate,
112
+ * etc.) so the pool always reflects the latest probe results.
113
+ */
114
+ sync(): number {
115
+ const fresh = new Map<string, PoolEntry>();
116
+ for (const entry of this.cache.snapshot()) {
117
+ for (const candidate of entry.candidates) {
118
+ const registry = this.modelIndex.getSeller(candidate.sellerId);
119
+ if (!registry) {
120
+ // Seller disappeared from the registry since the probe; skip.
121
+ continue;
122
+ }
123
+ const previous = this.entries.get(candidate.sellerId);
124
+ fresh.set(candidate.sellerId, {
125
+ sellerId: candidate.sellerId,
126
+ url: candidate.url,
127
+ registrySeller: registry,
128
+ circuit: previous?.circuit ?? "closed",
129
+ consecutiveFailures: previous?.consecutiveFailures ?? 0,
130
+ recentFailures: previous?.recentFailures ?? [],
131
+ lastSuccessAt: candidate.lastSuccessAt || previous?.lastSuccessAt || 0,
132
+ lastFailAt: candidate.lastFailAt || previous?.lastFailAt || 0,
133
+ lastProbeAt: entry.warmedAt,
134
+ healthScore: candidate.healthScore,
135
+ avgLatencyMs: candidate.avgLatencyMs
136
+ });
137
+ }
138
+ }
139
+ this.entries = fresh;
140
+ return this.entries.size;
141
+ }
142
+
143
+ /**
144
+ * Pick up to `limit` candidates for a (model, protocol, payment) triple.
145
+ * Sellers in the `open` circuit are skipped unless their open state has
146
+ * expired (they are flipped to `half_open` and included). Candidates are
147
+ * sorted by health score (descending) so the strongest seller goes first.
148
+ */
149
+ pick(options: PickOptions): PickResult {
150
+ const now = options.now ?? this.now();
151
+ const limit = options.limit ?? 4;
152
+ const freshness = this.cache.freshness(options.modelId, options.protocol, options.paymentMethod);
153
+ const resolved = this.modelIndex.resolve(options.modelId, {
154
+ protocol: options.protocol,
155
+ paymentMethod: options.paymentMethod
156
+ });
157
+
158
+ if (freshness.entry && freshness.entry.candidates.length === 0) {
159
+ return {
160
+ candidates: [],
161
+ reason: "prewarm_cache_empty",
162
+ resolved: asResolution(resolved)
163
+ };
164
+ }
165
+
166
+ const candidates = (freshness.entry?.candidates ?? [])
167
+ .map((candidate) => {
168
+ const entry = this.entries.get(candidate.sellerId);
169
+ if (!entry) {
170
+ return null;
171
+ }
172
+ return { entry, registrySeller: entry.registrySeller, candidate };
173
+ })
174
+ .filter((row): row is { entry: PoolEntry; registrySeller: RegistrySeller; candidate: PrewarmCandidate } => row !== null)
175
+ .map((row) => {
176
+ const entry = this.maybeRecycleFromOpen(row.entry, now);
177
+ return { entry, registrySeller: row.registrySeller };
178
+ })
179
+ .filter((row) => row.entry.circuit !== "open")
180
+ .sort((a, b) => b.entry.healthScore - a.entry.healthScore)
181
+ .slice(0, limit);
182
+
183
+ return {
184
+ candidates,
185
+ reason: candidates.length > 0 ? "prewarm_cache" : "no_prewarm_candidates",
186
+ resolved: asResolution(resolved)
187
+ };
188
+ }
189
+
190
+ /**
191
+ * Record a successful inference against `sellerId`. The circuit closes
192
+ * (if it was half-open) and the credit tracker observes the latest
193
+ * balance via `recordSpend`.
194
+ */
195
+ recordSuccess(sellerId: string, balanceMicros: number, now: number = this.now()): PoolEntry | undefined {
196
+ const entry = this.entries.get(sellerId);
197
+ if (!entry) {
198
+ return undefined;
199
+ }
200
+ const next: PoolEntry = {
201
+ ...entry,
202
+ circuit: "closed",
203
+ consecutiveFailures: 0,
204
+ recentFailures: [],
205
+ lastSuccessAt: now,
206
+ healthScore: Math.min(100, Math.max(entry.healthScore, 60))
207
+ };
208
+ this.entries.set(sellerId, next);
209
+ this.creditTracker.recordSpend(sellerId, balanceMicros);
210
+ logger.info("pool.success.recorded", "seller pool entry marked successful", {
211
+ sellerId,
212
+ balanceMicros,
213
+ healthScore: next.healthScore
214
+ });
215
+ return next;
216
+ }
217
+
218
+ /**
219
+ * Record a failure against `sellerId`. Returns the new PoolEntry. The
220
+ * caller (route-failover) uses the returned `entry.circuit` and the
221
+ * entry's `lastFailAt` to decide whether to fail over, retry, or stop.
222
+ * On a non-recoverable failure (`hard_4xx`, `auth_invalid`,
223
+ * `insufficient_funds`) the credit is also transferred to the wasted
224
+ * bucket so the wasted-micros counter stays accurate.
225
+ */
226
+ recordFailure(
227
+ sellerId: string,
228
+ kind: FailureKind,
229
+ options: { transferLeftover?: boolean; reason?: string; now?: number } = {}
230
+ ): PoolEntry | undefined {
231
+ const entry = this.entries.get(sellerId);
232
+ if (!entry) {
233
+ return undefined;
234
+ }
235
+ const now = options.now ?? this.now();
236
+ const recentFailures = [...entry.recentFailures, now].filter((ts) => ts >= now - this.windowMs);
237
+ const consecutiveFailures = entry.consecutiveFailures + 1;
238
+ const failureRate = recentFailures.length / Math.max(1, this.windowMs / 1000);
239
+ const overThreshold = consecutiveFailures >= this.failureThreshold;
240
+ const overRate = failureRate >= this.windowFailureRate;
241
+ const isHard = kind === "hard_4xx" || kind === "auth_invalid" || kind === "no_compatible";
242
+ const circuit: CircuitState = isHard || overThreshold || overRate ? "open" : entry.circuit;
243
+ const next: PoolEntry = {
244
+ ...entry,
245
+ circuit,
246
+ consecutiveFailures,
247
+ recentFailures,
248
+ lastFailAt: now
249
+ };
250
+ this.entries.set(sellerId, next);
251
+ if (options.transferLeftover || isHard) {
252
+ this.creditTracker.transferLeftoverToWasted(sellerId, options.reason ?? kind);
253
+ }
254
+ if (circuit === "open") {
255
+ logger.warn("pool.circuit_opened", "seller pool entry transitioned to circuit_open", {
256
+ sellerId,
257
+ kind,
258
+ consecutiveFailures,
259
+ recentFailureRate: failureRate,
260
+ threshold: this.failureThreshold
261
+ });
262
+ }
263
+ return next;
264
+ }
265
+
266
+ /**
267
+ * Expose a per-seller credit / circuit snapshot to the route-failover.
268
+ * Used to decide whether a soft failure should retry on the same seller
269
+ * (刚买窗口保护) or fail over immediately.
270
+ */
271
+ inspect(sellerId: string): { entry?: PoolEntry; freshPurchase: boolean; autoPurchaseAvailable: boolean } {
272
+ const entry = this.entries.get(sellerId);
273
+ const freshPurchase = this.creditTracker.isInFreshPurchaseWindow(sellerId, this.now());
274
+ const autoPurchaseAvailable = this.creditTracker.canAutoPurchase(this.now());
275
+ return { entry, freshPurchase, autoPurchaseAvailable };
276
+ }
277
+
278
+ /**
279
+ * Manually mark an entry as `open`. Used by the registry loop when a
280
+ * seller is removed from the registry: the entry lingers for a grace
281
+ * period but is unreachable, so opening the circuit prevents any
282
+ * further selection.
283
+ */
284
+ markOpen(sellerId: string, reason: string, now: number = this.now()): void {
285
+ const entry = this.entries.get(sellerId);
286
+ if (!entry) {
287
+ return;
288
+ }
289
+ this.entries.set(sellerId, { ...entry, circuit: "open", lastFailAt: now });
290
+ logger.warn("pool.circuit_force_opened", "seller pool entry forced to circuit_open", {
291
+ sellerId,
292
+ reason
293
+ });
294
+ }
295
+
296
+ /**
297
+ * List all known pool entries. Used by `tb doctor` and tests.
298
+ */
299
+ snapshot(): PoolEntry[] {
300
+ return Array.from(this.entries.values()).map((entry) => ({ ...entry, recentFailures: [...entry.recentFailures] }));
301
+ }
302
+
303
+ size(): number {
304
+ return this.entries.size;
305
+ }
306
+
307
+ private maybeRecycleFromOpen(entry: PoolEntry, now: number): PoolEntry {
308
+ if (entry.circuit !== "open") {
309
+ return entry;
310
+ }
311
+ if (now - entry.lastFailAt < this.openStateMs) {
312
+ return entry;
313
+ }
314
+ const recycled: PoolEntry = { ...entry, circuit: "half_open" };
315
+ this.entries.set(entry.sellerId, recycled);
316
+ logger.info("pool.circuit_half_opened", "seller pool entry recycled to half_open", {
317
+ sellerId: entry.sellerId,
318
+ openStateMs: this.openStateMs
319
+ });
320
+ return recycled;
321
+ }
322
+ }
323
+
324
+ function asResolution(resolved: { modelId: string; matched: boolean; sellers: RegistrySeller[]; missingModelsFlag: number }): ModelIndexResolution {
325
+ return {
326
+ modelId: resolved.modelId,
327
+ matched: resolved.matched,
328
+ candidates: resolved.sellers,
329
+ missingModelsFlag: resolved.missingModelsFlag
330
+ };
331
+ }
332
+
333
+ export type { PrewarmEntry };
@@ -0,0 +1,52 @@
1
+ import { STREAM_FAILOVER_RETRY_HINT, StreamFailover } from "../src/stream-failover.js";
2
+
3
+ describe("StreamFailover", () => {
4
+ test("fresh state reports no chunks committed", () => {
5
+ const sf = new StreamFailover();
6
+ expect(sf.snapshot()).toEqual({ firstChunkCommitted: false, bytesFlushed: 0 });
7
+ });
8
+
9
+ test("markFirstChunkCommitted transitions once and only once", () => {
10
+ const sf = new StreamFailover();
11
+ sf.markFirstChunkCommitted();
12
+ sf.markFirstChunkCommitted();
13
+ expect(sf.snapshot().firstChunkCommitted).toBe(true);
14
+ });
15
+
16
+ test("recordBytesWritten accumulates flushed bytes", () => {
17
+ const sf = new StreamFailover();
18
+ sf.recordBytesWritten(128);
19
+ sf.recordBytesWritten(64);
20
+ expect(sf.snapshot().bytesFlushed).toBe(192);
21
+ });
22
+
23
+ test("decideOnStreamAbort before first chunk defers to the controller", () => {
24
+ const sf = new StreamFailover();
25
+ const decision = sf.decideOnStreamAbort("upstream_reset");
26
+ expect(decision.action).toBe("let_stream_complete");
27
+ expect(decision.retryHintValue).toBe("0");
28
+ });
29
+
30
+ test("decideOnStreamAbort after first chunk aborts and surfaces retry hint", () => {
31
+ const sf = new StreamFailover();
32
+ sf.markFirstChunkCommitted();
33
+ sf.recordBytesWritten(2048);
34
+ const decision = sf.decideOnStreamAbort("upstream_reset");
35
+ expect(decision.action).toBe("abort_with_retry_hint");
36
+ expect(decision.retryHintValue).toBe(STREAM_FAILOVER_RETRY_HINT);
37
+ expect(decision.bytesFlushed).toBe(2048);
38
+ });
39
+
40
+ test("reset clears the chunk and byte counters", () => {
41
+ const sf = new StreamFailover();
42
+ sf.markFirstChunkCommitted();
43
+ sf.recordBytesWritten(999);
44
+ sf.reset();
45
+ expect(sf.snapshot()).toEqual({ firstChunkCommitted: false, bytesFlushed: 0 });
46
+ });
47
+
48
+ test("default header name is X-TokenBuddy-Retry-Hint and is overridable", () => {
49
+ expect(new StreamFailover().headerName).toBe("X-TokenBuddy-Retry-Hint");
50
+ expect(new StreamFailover({ retryHintHeader: "X-Custom-Retry" }).headerName).toBe("X-Custom-Retry");
51
+ });
52
+ });