@tokenbuddy/tokenbuddy 1.0.9 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +13 -0
- package/dist/src/buyer-store.d.ts.map +1 -1
- package/dist/src/buyer-store.js +21 -2
- package/dist/src/buyer-store.js.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +54 -0
- package/dist/src/cli.js.map +1 -1
- package/dist/src/credit-tracker.d.ts +118 -0
- package/dist/src/credit-tracker.d.ts.map +1 -0
- package/dist/src/credit-tracker.js +220 -0
- package/dist/src/credit-tracker.js.map +1 -0
- package/dist/src/daemon.d.ts +49 -4
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +541 -405
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/model-index.d.ts +86 -0
- package/dist/src/model-index.d.ts.map +1 -0
- package/dist/src/model-index.js +214 -0
- package/dist/src/model-index.js.map +1 -0
- package/dist/src/prewarm-cache.d.ts +149 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -0
- package/dist/src/prewarm-cache.js +288 -0
- package/dist/src/prewarm-cache.js.map +1 -0
- package/dist/src/prewarm-scheduler.d.ts +150 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -0
- package/dist/src/prewarm-scheduler.js +484 -0
- package/dist/src/prewarm-scheduler.js.map +1 -0
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +9 -1
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/route-failover.d.ts +96 -0
- package/dist/src/route-failover.d.ts.map +1 -0
- package/dist/src/route-failover.js +177 -0
- package/dist/src/route-failover.js.map +1 -0
- package/dist/src/seller-catalog.d.ts +26 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js +40 -0
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +127 -0
- package/dist/src/seller-pool.d.ts.map +1 -0
- package/dist/src/seller-pool.js +243 -0
- package/dist/src/seller-pool.js.map +1 -0
- package/dist/src/stream-failover.d.ts +78 -0
- package/dist/src/stream-failover.d.ts.map +1 -0
- package/dist/src/stream-failover.js +93 -0
- package/dist/src/stream-failover.js.map +1 -0
- package/package.json +1 -1
- package/src/buyer-store.ts +32 -2
- package/src/cli.ts +61 -0
- package/src/credit-tracker.test.ts +165 -0
- package/src/credit-tracker.ts +269 -0
- package/src/daemon.ts +569 -445
- package/src/model-index.test.ts +184 -0
- package/src/model-index.ts +266 -0
- package/src/prewarm-cache.test.ts +281 -0
- package/src/prewarm-cache.ts +373 -0
- package/src/prewarm-scheduler.test.ts +367 -0
- package/src/prewarm-scheduler.ts +581 -0
- package/src/provider-install.ts +9 -1
- package/src/route-failover.test.ts +193 -0
- package/src/route-failover.ts +233 -0
- package/src/seller-catalog-413.test.ts +61 -0
- package/src/seller-catalog.ts +47 -0
- package/src/seller-pool.test.ts +231 -0
- package/src/seller-pool.ts +333 -0
- package/src/stream-failover.test.ts +52 -0
- package/src/stream-failover.ts +129 -0
- package/src/thousand-seller.test.ts +151 -0
- package/tests/daemon-413-fallback.test.ts +92 -0
- package/tests/e2e.test.ts +3 -2
- package/tests/tokenbuddy.test.ts +68 -11
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import { CreditTracker } from "../src/credit-tracker.js";
|
|
2
|
+
import { ModelIndex } from "../src/model-index.js";
|
|
3
|
+
import { PrewarmCache } from "../src/prewarm-cache.js";
|
|
4
|
+
import { SellerPool, type FailureKind } from "../src/seller-pool.js";
|
|
5
|
+
import type { RegistrySeller } from "../src/seller-catalog.js";
|
|
6
|
+
|
|
7
|
+
function makeSeller(overrides: Partial<RegistrySeller> & { id: string; models?: string[] }): RegistrySeller {
|
|
8
|
+
return {
|
|
9
|
+
id: overrides.id,
|
|
10
|
+
name: overrides.name ?? overrides.id,
|
|
11
|
+
url: overrides.url ?? `https://${overrides.id}.example.com`,
|
|
12
|
+
supportedProtocols: overrides.supportedProtocols ?? ["chat_completions"],
|
|
13
|
+
paymentMethods: overrides.paymentMethods ?? ["clawtip"],
|
|
14
|
+
models: overrides.models
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function makeClock(start = 1_000_000): { now: number; advance: (ms: number) => void } {
|
|
19
|
+
const clock = { now: start, advance: (ms: number) => { clock.now += ms; } };
|
|
20
|
+
return clock;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function build(seed: { id: string; healthScore?: number }[] = []): { index: ModelIndex; cache: PrewarmCache; credit: CreditTracker; sellers: RegistrySeller[] } {
|
|
24
|
+
const sellers: RegistrySeller[] = seed.map((s) =>
|
|
25
|
+
makeSeller({ id: s.id, models: ["gpt-4o"] })
|
|
26
|
+
);
|
|
27
|
+
const index = new ModelIndex();
|
|
28
|
+
index.rebuild(sellers, { registryVersion: 1, defaultSellerId: seed[0]?.id });
|
|
29
|
+
const cache = new PrewarmCache();
|
|
30
|
+
cache.commitWarm({
|
|
31
|
+
modelId: "gpt-4o",
|
|
32
|
+
protocol: "chat_completions",
|
|
33
|
+
paymentMethod: "clawtip",
|
|
34
|
+
candidates: seed.map((s) => ({
|
|
35
|
+
sellerId: s.id,
|
|
36
|
+
url: `https://${s.id}.example.com`,
|
|
37
|
+
healthScore: s.healthScore ?? 80
|
|
38
|
+
}))
|
|
39
|
+
});
|
|
40
|
+
const credit = new CreditTracker();
|
|
41
|
+
return { index, cache, credit, sellers };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
describe("SellerPool", () => {
|
|
45
|
+
test("sync rebuilds entries from the prewarm cache", () => {
|
|
46
|
+
const ctx = build([{ id: "s1" }, { id: "s2" }]);
|
|
47
|
+
const pool = new SellerPool({
|
|
48
|
+
modelIndex: ctx.index,
|
|
49
|
+
cache: ctx.cache,
|
|
50
|
+
creditTracker: ctx.credit
|
|
51
|
+
});
|
|
52
|
+
const size = pool.sync();
|
|
53
|
+
expect(size).toBe(2);
|
|
54
|
+
expect(pool.snapshot().map((e) => e.sellerId).sort()).toEqual(["s1", "s2"]);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("sync drops entries whose seller disappeared from the registry index", () => {
|
|
58
|
+
const ctx = build([{ id: "s1" }, { id: "s2" }]);
|
|
59
|
+
const pool = new SellerPool({
|
|
60
|
+
modelIndex: ctx.index,
|
|
61
|
+
cache: ctx.cache,
|
|
62
|
+
creditTracker: ctx.credit
|
|
63
|
+
});
|
|
64
|
+
pool.sync();
|
|
65
|
+
expect(pool.size()).toBe(2);
|
|
66
|
+
|
|
67
|
+
// Mutate the index so only s1 remains.
|
|
68
|
+
ctx.index.rebuild([makeSeller({ id: "s1", models: ["gpt-4o"] })], { registryVersion: 2 });
|
|
69
|
+
const size = pool.sync();
|
|
70
|
+
expect(size).toBe(1);
|
|
71
|
+
expect(pool.snapshot().map((e) => e.sellerId)).toEqual(["s1"]);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test("pick returns candidates sorted by health score and skips open circuit entries", () => {
|
|
75
|
+
const ctx = build([{ id: "s1", healthScore: 90 }, { id: "s2", healthScore: 50 }, { id: "s3", healthScore: 70 }]);
|
|
76
|
+
const pool = new SellerPool({ modelIndex: ctx.index, cache: ctx.cache, creditTracker: ctx.credit });
|
|
77
|
+
pool.sync();
|
|
78
|
+
|
|
79
|
+
const result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
|
|
80
|
+
expect(result.candidates.map((c) => c.entry.sellerId)).toEqual(["s1", "s3", "s2"]);
|
|
81
|
+
|
|
82
|
+
// Force s1 into the open circuit and verify it is skipped.
|
|
83
|
+
pool.markOpen("s1", "registry_gone");
|
|
84
|
+
const filtered = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
|
|
85
|
+
expect(filtered.candidates.map((c) => c.entry.sellerId)).toEqual(["s3", "s2"]);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
test("half_open recycle happens after openStateMs has elapsed", () => {
|
|
89
|
+
const clock = makeClock();
|
|
90
|
+
const ctx = build([{ id: "s1" }]);
|
|
91
|
+
const pool = new SellerPool({
|
|
92
|
+
modelIndex: ctx.index,
|
|
93
|
+
cache: ctx.cache,
|
|
94
|
+
creditTracker: ctx.credit,
|
|
95
|
+
failureThreshold: 1, // open after the very first failure to keep the test focused
|
|
96
|
+
openStateMs: 1000,
|
|
97
|
+
now: () => clock.now
|
|
98
|
+
});
|
|
99
|
+
pool.sync();
|
|
100
|
+
pool.recordFailure("s1", "soft_5xx");
|
|
101
|
+
expect(pool.snapshot()[0].circuit).toBe("open");
|
|
102
|
+
|
|
103
|
+
// Within the open window: still skipped.
|
|
104
|
+
clock.advance(500);
|
|
105
|
+
let result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
|
|
106
|
+
expect(result.candidates).toEqual([]);
|
|
107
|
+
|
|
108
|
+
// After the open window: recycled to half_open and re-included.
|
|
109
|
+
clock.advance(600);
|
|
110
|
+
result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
|
|
111
|
+
expect(result.candidates.map((c) => c.entry.sellerId)).toEqual(["s1"]);
|
|
112
|
+
expect(pool.snapshot()[0].circuit).toBe("half_open");
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test("recordFailure escalates to open after the configured threshold", () => {
|
|
116
|
+
const clock = makeClock();
|
|
117
|
+
const ctx = build([{ id: "s1" }]);
|
|
118
|
+
const pool = new SellerPool({
|
|
119
|
+
modelIndex: ctx.index,
|
|
120
|
+
cache: ctx.cache,
|
|
121
|
+
creditTracker: ctx.credit,
|
|
122
|
+
failureThreshold: 3,
|
|
123
|
+
now: () => clock.now
|
|
124
|
+
});
|
|
125
|
+
pool.sync();
|
|
126
|
+
|
|
127
|
+
pool.recordFailure("s1", "soft_5xx");
|
|
128
|
+
expect(pool.snapshot()[0].circuit).toBe("closed");
|
|
129
|
+
pool.recordFailure("s1", "soft_5xx");
|
|
130
|
+
expect(pool.snapshot()[0].circuit).toBe("closed");
|
|
131
|
+
pool.recordFailure("s1", "soft_5xx");
|
|
132
|
+
expect(pool.snapshot()[0].circuit).toBe("open");
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
test("recordSuccess closes the circuit and reports to the credit tracker", () => {
|
|
136
|
+
const clock = makeClock();
|
|
137
|
+
const ctx = build([{ id: "s1" }]);
|
|
138
|
+
const pool = new SellerPool({
|
|
139
|
+
modelIndex: ctx.index,
|
|
140
|
+
cache: ctx.cache,
|
|
141
|
+
creditTracker: ctx.credit,
|
|
142
|
+
failureThreshold: 1, // open after the first failure
|
|
143
|
+
now: () => clock.now
|
|
144
|
+
});
|
|
145
|
+
pool.sync();
|
|
146
|
+
|
|
147
|
+
ctx.credit.recordPurchase("s1", 1_000_000, 1_000_000);
|
|
148
|
+
pool.recordFailure("s1", "soft_5xx");
|
|
149
|
+
expect(pool.snapshot()[0].circuit).toBe("open");
|
|
150
|
+
|
|
151
|
+
clock.advance(31_000); // wait past the open window
|
|
152
|
+
pool.recordSuccess("s1", 250_000);
|
|
153
|
+
const entry = pool.snapshot()[0];
|
|
154
|
+
expect(entry.circuit).toBe("closed");
|
|
155
|
+
expect(entry.consecutiveFailures).toBe(0);
|
|
156
|
+
expect(ctx.credit.getEntry("s1")?.currentBalanceMicros).toBe(250_000);
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
test("hard failure kinds (hard_4xx, auth_invalid) immediately open the circuit and transfer leftover", () => {
|
|
160
|
+
const ctx = build([{ id: "s1" }]);
|
|
161
|
+
const pool = new SellerPool({ modelIndex: ctx.index, cache: ctx.cache, creditTracker: ctx.credit });
|
|
162
|
+
pool.sync();
|
|
163
|
+
|
|
164
|
+
ctx.credit.recordPurchase("s1", 1_000_000, 500_000);
|
|
165
|
+
const kinds: FailureKind[] = ["hard_4xx", "auth_invalid"];
|
|
166
|
+
for (const kind of kinds) {
|
|
167
|
+
ctx.credit.recordPurchase("s1", 1_000_000, 500_000);
|
|
168
|
+
const entry = pool.recordFailure("s1", kind, { reason: "test" });
|
|
169
|
+
expect(entry?.circuit).toBe("open");
|
|
170
|
+
}
|
|
171
|
+
const summary = ctx.credit.summary();
|
|
172
|
+
expect(summary.totalWastedMicros).toBeGreaterThan(0);
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
test("inspect surfaces freshPurchase and autoPurchaseAvailable flags", () => {
|
|
176
|
+
const ctx = build([{ id: "s1" }]);
|
|
177
|
+
const pool = new SellerPool({ modelIndex: ctx.index, cache: ctx.cache, creditTracker: ctx.credit });
|
|
178
|
+
pool.sync();
|
|
179
|
+
|
|
180
|
+
ctx.credit.recordPurchase("s1", 1_000_000, 1_000_000);
|
|
181
|
+
const info = pool.inspect("s1");
|
|
182
|
+
expect(info.entry?.sellerId).toBe("s1");
|
|
183
|
+
expect(info.freshPurchase).toBe(true);
|
|
184
|
+
expect(info.autoPurchaseAvailable).toBe(true);
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
test("markOpen force-opens a circuit without changing other state", () => {
|
|
188
|
+
const ctx = build([{ id: "s1" }]);
|
|
189
|
+
const pool = new SellerPool({ modelIndex: ctx.index, cache: ctx.cache, creditTracker: ctx.credit });
|
|
190
|
+
pool.sync();
|
|
191
|
+
pool.markOpen("s1", "registry_disappeared");
|
|
192
|
+
expect(pool.snapshot()[0].circuit).toBe("open");
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
test("pick returns an empty result when the prewarm cache has no entry for the model", () => {
|
|
196
|
+
const index = new ModelIndex();
|
|
197
|
+
index.rebuild([makeSeller({ id: "s1", models: ["gpt-4o"] })], { registryVersion: 1 });
|
|
198
|
+
const cache = new PrewarmCache();
|
|
199
|
+
const credit = new CreditTracker();
|
|
200
|
+
const pool = new SellerPool({ modelIndex: index, cache, creditTracker: credit });
|
|
201
|
+
pool.sync();
|
|
202
|
+
|
|
203
|
+
const result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
|
|
204
|
+
expect(result.candidates).toEqual([]);
|
|
205
|
+
// No prewarm has been committed for this model yet, so the pool
|
|
206
|
+
// surfaces "no_prewarm_candidates" (not "prewarm_cache_empty"); the
|
|
207
|
+
// distinction matters for the caller deciding whether to schedule a
|
|
208
|
+
// lazy prewarm.
|
|
209
|
+
expect(result.reason).toBe("no_prewarm_candidates");
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
test("pick falls back to the registry index when the cache has no entry yet", () => {
|
|
213
|
+
const index = new ModelIndex();
|
|
214
|
+
index.rebuild(
|
|
215
|
+
[makeSeller({ id: "s1", models: ["gpt-4o"] }), makeSeller({ id: "s2", models: ["gpt-4o"] })],
|
|
216
|
+
{ registryVersion: 1 }
|
|
217
|
+
);
|
|
218
|
+
const cache = new PrewarmCache();
|
|
219
|
+
const credit = new CreditTracker();
|
|
220
|
+
const pool = new SellerPool({ modelIndex: index, cache, creditTracker: credit });
|
|
221
|
+
pool.sync();
|
|
222
|
+
|
|
223
|
+
// No probe has run yet, so pool is empty. pick should report "no candidates" and
|
|
224
|
+
// also surface the registry-level model resolution so the caller can decide
|
|
225
|
+
// whether to schedule a lazy prewarm.
|
|
226
|
+
const result = pool.pick({ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" });
|
|
227
|
+
expect(result.candidates).toEqual([]);
|
|
228
|
+
expect(result.resolved.matched).toBe(true);
|
|
229
|
+
expect(result.resolved.candidates).toHaveLength(2);
|
|
230
|
+
});
|
|
231
|
+
});
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
import { createModuleLogger } from "@tokenbuddy/logging";
|
|
2
|
+
import type { RegistrySeller } from "./seller-catalog.js";
|
|
3
|
+
import type { ModelIndex } from "./model-index.js";
|
|
4
|
+
import type { PrewarmCache, PrewarmCandidate, PrewarmEntry } from "./prewarm-cache.js";
|
|
5
|
+
import type { CreditTracker } from "./credit-tracker.js";
|
|
6
|
+
|
|
7
|
+
const logger = createModuleLogger("tb-proxyd:seller-pool");
|
|
8
|
+
|
|
9
|
+
export type CircuitState = "closed" | "half_open" | "open";
|
|
10
|
+
|
|
11
|
+
export type FailureKind =
|
|
12
|
+
| "hard_4xx" // 400/404/422 — the seller is wrong for this request
|
|
13
|
+
| "auth_invalid" // 401/403 token invalid
|
|
14
|
+
| "insufficient_funds" // 402
|
|
15
|
+
| "soft_5xx" // 429/5xx/timeout/network
|
|
16
|
+
| "deadline" // buyer deadline exceeded
|
|
17
|
+
| "stream_aborted" // upstream stream broken after first chunk
|
|
18
|
+
| "no_compatible"; // pool had no candidates for the request
|
|
19
|
+
|
|
20
|
+
export interface PoolEntry {
|
|
21
|
+
sellerId: string;
|
|
22
|
+
url: string;
|
|
23
|
+
registrySeller: RegistrySeller;
|
|
24
|
+
circuit: CircuitState;
|
|
25
|
+
consecutiveFailures: number;
|
|
26
|
+
recentFailures: number[]; // timestamps (ms) for sliding window
|
|
27
|
+
lastSuccessAt: number;
|
|
28
|
+
lastFailAt: number;
|
|
29
|
+
lastProbeAt: number;
|
|
30
|
+
// Source-of-truth prewarm state; the pool keeps a copy so the hot path
|
|
31
|
+
// can answer health questions without touching the cache map on every
|
|
32
|
+
// request.
|
|
33
|
+
healthScore: number;
|
|
34
|
+
avgLatencyMs: number;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface PickOptions {
|
|
38
|
+
modelId: string;
|
|
39
|
+
protocol: string;
|
|
40
|
+
paymentMethod: string;
|
|
41
|
+
limit?: number;
|
|
42
|
+
now?: number;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface PickResult {
|
|
46
|
+
candidates: Array<{ entry: PoolEntry; registrySeller: RegistrySeller }>;
|
|
47
|
+
reason: string;
|
|
48
|
+
resolved: ModelIndexResolution;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface ModelIndexResolution {
|
|
52
|
+
modelId: string;
|
|
53
|
+
matched: boolean;
|
|
54
|
+
candidates: RegistrySeller[];
|
|
55
|
+
missingModelsFlag: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface SellerPoolOptions {
|
|
59
|
+
modelIndex: ModelIndex;
|
|
60
|
+
cache: PrewarmCache;
|
|
61
|
+
creditTracker: CreditTracker;
|
|
62
|
+
// Circuit breaker thresholds (v1.2 §13).
|
|
63
|
+
failureThreshold?: number; // default 3
|
|
64
|
+
windowMs?: number; // default 60_000 (1m sliding window)
|
|
65
|
+
windowFailureRate?: number; // default 0.5
|
|
66
|
+
openStateMs?: number; // default 30_000
|
|
67
|
+
now?: () => number;
|
|
68
|
+
// PoolEntry -> CircuitState transition hooks for tests.
|
|
69
|
+
applyRegistry?: (entries: PoolEntry[], registry: RegistrySeller[]) => PoolEntry[];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const DEFAULTS = {
|
|
73
|
+
failureThreshold: 3,
|
|
74
|
+
windowMs: 60_000,
|
|
75
|
+
windowFailureRate: 0.5,
|
|
76
|
+
openStateMs: 30_000
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* v2 SellerPool: combines `ModelIndex` (registry index), `PrewarmCache`
|
|
81
|
+
* (probe results), and `CreditTracker` (balance protection) into a single
|
|
82
|
+
* source of truth used by the route-failover controller. The pool is
|
|
83
|
+
* process-local and rebuilds its entry list from the cache whenever the
|
|
84
|
+
* cache mutates; entries not yet present in the cache are not in the pool.
|
|
85
|
+
*/
|
|
86
|
+
export class SellerPool {
|
|
87
|
+
private readonly modelIndex: ModelIndex;
|
|
88
|
+
private readonly cache: PrewarmCache;
|
|
89
|
+
private readonly creditTracker: CreditTracker;
|
|
90
|
+
private readonly failureThreshold: number;
|
|
91
|
+
private readonly windowMs: number;
|
|
92
|
+
private readonly windowFailureRate: number;
|
|
93
|
+
private readonly openStateMs: number;
|
|
94
|
+
private readonly now: () => number;
|
|
95
|
+
|
|
96
|
+
private entries = new Map<string, PoolEntry>();
|
|
97
|
+
|
|
98
|
+
constructor(options: SellerPoolOptions) {
|
|
99
|
+
this.modelIndex = options.modelIndex;
|
|
100
|
+
this.cache = options.cache;
|
|
101
|
+
this.creditTracker = options.creditTracker;
|
|
102
|
+
this.failureThreshold = options.failureThreshold ?? DEFAULTS.failureThreshold;
|
|
103
|
+
this.windowMs = options.windowMs ?? DEFAULTS.windowMs;
|
|
104
|
+
this.windowFailureRate = options.windowFailureRate ?? DEFAULTS.windowFailureRate;
|
|
105
|
+
this.openStateMs = options.openStateMs ?? DEFAULTS.openStateMs;
|
|
106
|
+
this.now = options.now ?? Date.now;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Rebuild entries from the current prewarm cache. Called by
|
|
111
|
+
* `route-failover` whenever the cache is mutated (commit, invalidate,
|
|
112
|
+
* etc.) so the pool always reflects the latest probe results.
|
|
113
|
+
*/
|
|
114
|
+
sync(): number {
|
|
115
|
+
const fresh = new Map<string, PoolEntry>();
|
|
116
|
+
for (const entry of this.cache.snapshot()) {
|
|
117
|
+
for (const candidate of entry.candidates) {
|
|
118
|
+
const registry = this.modelIndex.getSeller(candidate.sellerId);
|
|
119
|
+
if (!registry) {
|
|
120
|
+
// Seller disappeared from the registry since the probe; skip.
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
const previous = this.entries.get(candidate.sellerId);
|
|
124
|
+
fresh.set(candidate.sellerId, {
|
|
125
|
+
sellerId: candidate.sellerId,
|
|
126
|
+
url: candidate.url,
|
|
127
|
+
registrySeller: registry,
|
|
128
|
+
circuit: previous?.circuit ?? "closed",
|
|
129
|
+
consecutiveFailures: previous?.consecutiveFailures ?? 0,
|
|
130
|
+
recentFailures: previous?.recentFailures ?? [],
|
|
131
|
+
lastSuccessAt: candidate.lastSuccessAt || previous?.lastSuccessAt || 0,
|
|
132
|
+
lastFailAt: candidate.lastFailAt || previous?.lastFailAt || 0,
|
|
133
|
+
lastProbeAt: entry.warmedAt,
|
|
134
|
+
healthScore: candidate.healthScore,
|
|
135
|
+
avgLatencyMs: candidate.avgLatencyMs
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
this.entries = fresh;
|
|
140
|
+
return this.entries.size;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Pick up to `limit` candidates for a (model, protocol, payment) triple.
|
|
145
|
+
* Sellers in the `open` circuit are skipped unless their open state has
|
|
146
|
+
* expired (they are flipped to `half_open` and included). Candidates are
|
|
147
|
+
* sorted by health score (descending) so the strongest seller goes first.
|
|
148
|
+
*/
|
|
149
|
+
pick(options: PickOptions): PickResult {
|
|
150
|
+
const now = options.now ?? this.now();
|
|
151
|
+
const limit = options.limit ?? 4;
|
|
152
|
+
const freshness = this.cache.freshness(options.modelId, options.protocol, options.paymentMethod);
|
|
153
|
+
const resolved = this.modelIndex.resolve(options.modelId, {
|
|
154
|
+
protocol: options.protocol,
|
|
155
|
+
paymentMethod: options.paymentMethod
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
if (freshness.entry && freshness.entry.candidates.length === 0) {
|
|
159
|
+
return {
|
|
160
|
+
candidates: [],
|
|
161
|
+
reason: "prewarm_cache_empty",
|
|
162
|
+
resolved: asResolution(resolved)
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
const candidates = (freshness.entry?.candidates ?? [])
|
|
167
|
+
.map((candidate) => {
|
|
168
|
+
const entry = this.entries.get(candidate.sellerId);
|
|
169
|
+
if (!entry) {
|
|
170
|
+
return null;
|
|
171
|
+
}
|
|
172
|
+
return { entry, registrySeller: entry.registrySeller, candidate };
|
|
173
|
+
})
|
|
174
|
+
.filter((row): row is { entry: PoolEntry; registrySeller: RegistrySeller; candidate: PrewarmCandidate } => row !== null)
|
|
175
|
+
.map((row) => {
|
|
176
|
+
const entry = this.maybeRecycleFromOpen(row.entry, now);
|
|
177
|
+
return { entry, registrySeller: row.registrySeller };
|
|
178
|
+
})
|
|
179
|
+
.filter((row) => row.entry.circuit !== "open")
|
|
180
|
+
.sort((a, b) => b.entry.healthScore - a.entry.healthScore)
|
|
181
|
+
.slice(0, limit);
|
|
182
|
+
|
|
183
|
+
return {
|
|
184
|
+
candidates,
|
|
185
|
+
reason: candidates.length > 0 ? "prewarm_cache" : "no_prewarm_candidates",
|
|
186
|
+
resolved: asResolution(resolved)
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Record a successful inference against `sellerId`. The circuit closes
|
|
192
|
+
* (if it was half-open) and the credit tracker observes the latest
|
|
193
|
+
* balance via `recordSpend`.
|
|
194
|
+
*/
|
|
195
|
+
recordSuccess(sellerId: string, balanceMicros: number, now: number = this.now()): PoolEntry | undefined {
|
|
196
|
+
const entry = this.entries.get(sellerId);
|
|
197
|
+
if (!entry) {
|
|
198
|
+
return undefined;
|
|
199
|
+
}
|
|
200
|
+
const next: PoolEntry = {
|
|
201
|
+
...entry,
|
|
202
|
+
circuit: "closed",
|
|
203
|
+
consecutiveFailures: 0,
|
|
204
|
+
recentFailures: [],
|
|
205
|
+
lastSuccessAt: now,
|
|
206
|
+
healthScore: Math.min(100, Math.max(entry.healthScore, 60))
|
|
207
|
+
};
|
|
208
|
+
this.entries.set(sellerId, next);
|
|
209
|
+
this.creditTracker.recordSpend(sellerId, balanceMicros);
|
|
210
|
+
logger.info("pool.success.recorded", "seller pool entry marked successful", {
|
|
211
|
+
sellerId,
|
|
212
|
+
balanceMicros,
|
|
213
|
+
healthScore: next.healthScore
|
|
214
|
+
});
|
|
215
|
+
return next;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Record a failure against `sellerId`. Returns the new PoolEntry. The
|
|
220
|
+
* caller (route-failover) uses the returned `entry.circuit` and the
|
|
221
|
+
* entry's `lastFailAt` to decide whether to fail over, retry, or stop.
|
|
222
|
+
* On a non-recoverable failure (`hard_4xx`, `auth_invalid`,
|
|
223
|
+
* `insufficient_funds`) the credit is also transferred to the wasted
|
|
224
|
+
* bucket so the wasted-micros counter stays accurate.
|
|
225
|
+
*/
|
|
226
|
+
recordFailure(
|
|
227
|
+
sellerId: string,
|
|
228
|
+
kind: FailureKind,
|
|
229
|
+
options: { transferLeftover?: boolean; reason?: string; now?: number } = {}
|
|
230
|
+
): PoolEntry | undefined {
|
|
231
|
+
const entry = this.entries.get(sellerId);
|
|
232
|
+
if (!entry) {
|
|
233
|
+
return undefined;
|
|
234
|
+
}
|
|
235
|
+
const now = options.now ?? this.now();
|
|
236
|
+
const recentFailures = [...entry.recentFailures, now].filter((ts) => ts >= now - this.windowMs);
|
|
237
|
+
const consecutiveFailures = entry.consecutiveFailures + 1;
|
|
238
|
+
const failureRate = recentFailures.length / Math.max(1, this.windowMs / 1000);
|
|
239
|
+
const overThreshold = consecutiveFailures >= this.failureThreshold;
|
|
240
|
+
const overRate = failureRate >= this.windowFailureRate;
|
|
241
|
+
const isHard = kind === "hard_4xx" || kind === "auth_invalid" || kind === "no_compatible";
|
|
242
|
+
const circuit: CircuitState = isHard || overThreshold || overRate ? "open" : entry.circuit;
|
|
243
|
+
const next: PoolEntry = {
|
|
244
|
+
...entry,
|
|
245
|
+
circuit,
|
|
246
|
+
consecutiveFailures,
|
|
247
|
+
recentFailures,
|
|
248
|
+
lastFailAt: now
|
|
249
|
+
};
|
|
250
|
+
this.entries.set(sellerId, next);
|
|
251
|
+
if (options.transferLeftover || isHard) {
|
|
252
|
+
this.creditTracker.transferLeftoverToWasted(sellerId, options.reason ?? kind);
|
|
253
|
+
}
|
|
254
|
+
if (circuit === "open") {
|
|
255
|
+
logger.warn("pool.circuit_opened", "seller pool entry transitioned to circuit_open", {
|
|
256
|
+
sellerId,
|
|
257
|
+
kind,
|
|
258
|
+
consecutiveFailures,
|
|
259
|
+
recentFailureRate: failureRate,
|
|
260
|
+
threshold: this.failureThreshold
|
|
261
|
+
});
|
|
262
|
+
}
|
|
263
|
+
return next;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Expose a per-seller credit / circuit snapshot to the route-failover.
|
|
268
|
+
* Used to decide whether a soft failure should retry on the same seller
|
|
269
|
+
* (刚买窗口保护) or fail over immediately.
|
|
270
|
+
*/
|
|
271
|
+
inspect(sellerId: string): { entry?: PoolEntry; freshPurchase: boolean; autoPurchaseAvailable: boolean } {
|
|
272
|
+
const entry = this.entries.get(sellerId);
|
|
273
|
+
const freshPurchase = this.creditTracker.isInFreshPurchaseWindow(sellerId, this.now());
|
|
274
|
+
const autoPurchaseAvailable = this.creditTracker.canAutoPurchase(this.now());
|
|
275
|
+
return { entry, freshPurchase, autoPurchaseAvailable };
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Manually mark an entry as `open`. Used by the registry loop when a
|
|
280
|
+
* seller is removed from the registry: the entry lingers for a grace
|
|
281
|
+
* period but is unreachable, so opening the circuit prevents any
|
|
282
|
+
* further selection.
|
|
283
|
+
*/
|
|
284
|
+
markOpen(sellerId: string, reason: string, now: number = this.now()): void {
|
|
285
|
+
const entry = this.entries.get(sellerId);
|
|
286
|
+
if (!entry) {
|
|
287
|
+
return;
|
|
288
|
+
}
|
|
289
|
+
this.entries.set(sellerId, { ...entry, circuit: "open", lastFailAt: now });
|
|
290
|
+
logger.warn("pool.circuit_force_opened", "seller pool entry forced to circuit_open", {
|
|
291
|
+
sellerId,
|
|
292
|
+
reason
|
|
293
|
+
});
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* List all known pool entries. Used by `tb doctor` and tests.
|
|
298
|
+
*/
|
|
299
|
+
snapshot(): PoolEntry[] {
|
|
300
|
+
return Array.from(this.entries.values()).map((entry) => ({ ...entry, recentFailures: [...entry.recentFailures] }));
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
size(): number {
|
|
304
|
+
return this.entries.size;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
private maybeRecycleFromOpen(entry: PoolEntry, now: number): PoolEntry {
|
|
308
|
+
if (entry.circuit !== "open") {
|
|
309
|
+
return entry;
|
|
310
|
+
}
|
|
311
|
+
if (now - entry.lastFailAt < this.openStateMs) {
|
|
312
|
+
return entry;
|
|
313
|
+
}
|
|
314
|
+
const recycled: PoolEntry = { ...entry, circuit: "half_open" };
|
|
315
|
+
this.entries.set(entry.sellerId, recycled);
|
|
316
|
+
logger.info("pool.circuit_half_opened", "seller pool entry recycled to half_open", {
|
|
317
|
+
sellerId: entry.sellerId,
|
|
318
|
+
openStateMs: this.openStateMs
|
|
319
|
+
});
|
|
320
|
+
return recycled;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function asResolution(resolved: { modelId: string; matched: boolean; sellers: RegistrySeller[]; missingModelsFlag: number }): ModelIndexResolution {
|
|
325
|
+
return {
|
|
326
|
+
modelId: resolved.modelId,
|
|
327
|
+
matched: resolved.matched,
|
|
328
|
+
candidates: resolved.sellers,
|
|
329
|
+
missingModelsFlag: resolved.missingModelsFlag
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
export type { PrewarmEntry };
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { STREAM_FAILOVER_RETRY_HINT, StreamFailover } from "../src/stream-failover.js";
|
|
2
|
+
|
|
3
|
+
describe("StreamFailover", () => {
|
|
4
|
+
test("fresh state reports no chunks committed", () => {
|
|
5
|
+
const sf = new StreamFailover();
|
|
6
|
+
expect(sf.snapshot()).toEqual({ firstChunkCommitted: false, bytesFlushed: 0 });
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
test("markFirstChunkCommitted transitions once and only once", () => {
|
|
10
|
+
const sf = new StreamFailover();
|
|
11
|
+
sf.markFirstChunkCommitted();
|
|
12
|
+
sf.markFirstChunkCommitted();
|
|
13
|
+
expect(sf.snapshot().firstChunkCommitted).toBe(true);
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
test("recordBytesWritten accumulates flushed bytes", () => {
|
|
17
|
+
const sf = new StreamFailover();
|
|
18
|
+
sf.recordBytesWritten(128);
|
|
19
|
+
sf.recordBytesWritten(64);
|
|
20
|
+
expect(sf.snapshot().bytesFlushed).toBe(192);
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
test("decideOnStreamAbort before first chunk defers to the controller", () => {
|
|
24
|
+
const sf = new StreamFailover();
|
|
25
|
+
const decision = sf.decideOnStreamAbort("upstream_reset");
|
|
26
|
+
expect(decision.action).toBe("let_stream_complete");
|
|
27
|
+
expect(decision.retryHintValue).toBe("0");
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("decideOnStreamAbort after first chunk aborts and surfaces retry hint", () => {
|
|
31
|
+
const sf = new StreamFailover();
|
|
32
|
+
sf.markFirstChunkCommitted();
|
|
33
|
+
sf.recordBytesWritten(2048);
|
|
34
|
+
const decision = sf.decideOnStreamAbort("upstream_reset");
|
|
35
|
+
expect(decision.action).toBe("abort_with_retry_hint");
|
|
36
|
+
expect(decision.retryHintValue).toBe(STREAM_FAILOVER_RETRY_HINT);
|
|
37
|
+
expect(decision.bytesFlushed).toBe(2048);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test("reset clears the chunk and byte counters", () => {
|
|
41
|
+
const sf = new StreamFailover();
|
|
42
|
+
sf.markFirstChunkCommitted();
|
|
43
|
+
sf.recordBytesWritten(999);
|
|
44
|
+
sf.reset();
|
|
45
|
+
expect(sf.snapshot()).toEqual({ firstChunkCommitted: false, bytesFlushed: 0 });
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
test("default header name is X-TokenBuddy-Retry-Hint and is overridable", () => {
|
|
49
|
+
expect(new StreamFailover().headerName).toBe("X-TokenBuddy-Retry-Hint");
|
|
50
|
+
expect(new StreamFailover({ retryHintHeader: "X-Custom-Retry" }).headerName).toBe("X-Custom-Retry");
|
|
51
|
+
});
|
|
52
|
+
});
|