@tokenbuddy/tokenbuddy 1.0.8 → 1.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/buyer-store.d.ts +13 -0
- package/dist/src/buyer-store.d.ts.map +1 -1
- package/dist/src/buyer-store.js +21 -2
- package/dist/src/buyer-store.js.map +1 -1
- package/dist/src/cli.d.ts.map +1 -1
- package/dist/src/cli.js +54 -0
- package/dist/src/cli.js.map +1 -1
- package/dist/src/credit-tracker.d.ts +118 -0
- package/dist/src/credit-tracker.d.ts.map +1 -0
- package/dist/src/credit-tracker.js +220 -0
- package/dist/src/credit-tracker.js.map +1 -0
- package/dist/src/daemon.d.ts +49 -4
- package/dist/src/daemon.d.ts.map +1 -1
- package/dist/src/daemon.js +541 -405
- package/dist/src/daemon.js.map +1 -1
- package/dist/src/model-index.d.ts +86 -0
- package/dist/src/model-index.d.ts.map +1 -0
- package/dist/src/model-index.js +214 -0
- package/dist/src/model-index.js.map +1 -0
- package/dist/src/prewarm-cache.d.ts +149 -0
- package/dist/src/prewarm-cache.d.ts.map +1 -0
- package/dist/src/prewarm-cache.js +288 -0
- package/dist/src/prewarm-cache.js.map +1 -0
- package/dist/src/prewarm-scheduler.d.ts +150 -0
- package/dist/src/prewarm-scheduler.d.ts.map +1 -0
- package/dist/src/prewarm-scheduler.js +484 -0
- package/dist/src/prewarm-scheduler.js.map +1 -0
- package/dist/src/provider-install.d.ts.map +1 -1
- package/dist/src/provider-install.js +9 -1
- package/dist/src/provider-install.js.map +1 -1
- package/dist/src/route-failover.d.ts +96 -0
- package/dist/src/route-failover.d.ts.map +1 -0
- package/dist/src/route-failover.js +177 -0
- package/dist/src/route-failover.js.map +1 -0
- package/dist/src/seller-catalog.d.ts +26 -0
- package/dist/src/seller-catalog.d.ts.map +1 -1
- package/dist/src/seller-catalog.js +40 -0
- package/dist/src/seller-catalog.js.map +1 -1
- package/dist/src/seller-pool.d.ts +127 -0
- package/dist/src/seller-pool.d.ts.map +1 -0
- package/dist/src/seller-pool.js +243 -0
- package/dist/src/seller-pool.js.map +1 -0
- package/dist/src/stream-failover.d.ts +78 -0
- package/dist/src/stream-failover.d.ts.map +1 -0
- package/dist/src/stream-failover.js +93 -0
- package/dist/src/stream-failover.js.map +1 -0
- package/package.json +1 -1
- package/src/buyer-store.ts +32 -2
- package/src/cli.ts +61 -0
- package/src/credit-tracker.test.ts +165 -0
- package/src/credit-tracker.ts +269 -0
- package/src/daemon.ts +569 -445
- package/src/model-index.test.ts +184 -0
- package/src/model-index.ts +266 -0
- package/src/prewarm-cache.test.ts +281 -0
- package/src/prewarm-cache.ts +373 -0
- package/src/prewarm-scheduler.test.ts +367 -0
- package/src/prewarm-scheduler.ts +581 -0
- package/src/provider-install.ts +9 -1
- package/src/route-failover.test.ts +193 -0
- package/src/route-failover.ts +233 -0
- package/src/seller-catalog-413.test.ts +61 -0
- package/src/seller-catalog.ts +47 -0
- package/src/seller-pool.test.ts +231 -0
- package/src/seller-pool.ts +333 -0
- package/src/stream-failover.test.ts +52 -0
- package/src/stream-failover.ts +129 -0
- package/src/thousand-seller.test.ts +151 -0
- package/tests/daemon-413-fallback.test.ts +92 -0
- package/tests/e2e.test.ts +3 -2
- package/tests/tokenbuddy.test.ts +68 -11
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_PREWARM_TTL_MS,
|
|
3
|
+
PrewarmCache,
|
|
4
|
+
prewarmKey
|
|
5
|
+
} from "../src/prewarm-cache.js";
|
|
6
|
+
|
|
7
|
+
describe("PrewarmCache", () => {
|
|
8
|
+
function makeCandidate(overrides: { sellerId: string; healthScore?: number; url?: string }) {
|
|
9
|
+
return {
|
|
10
|
+
sellerId: overrides.sellerId,
|
|
11
|
+
url: overrides.url ?? `https://${overrides.sellerId}.example.com`,
|
|
12
|
+
healthScore: overrides.healthScore ?? 80
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
test("key encoding is collision-free and case-insensitive", () => {
|
|
17
|
+
const a = prewarmKey("gpt-4o", "chat_completions", "clawtip");
|
|
18
|
+
const b = prewarmKey("GPT-4O", "Chat_Completions", "ClawTip");
|
|
19
|
+
expect(a).toBe(b);
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
test("get returns undefined for unknown keys; freshness reports empty", () => {
|
|
23
|
+
const cache = new PrewarmCache();
|
|
24
|
+
expect(cache.get("m1", "chat_completions", "clawtip")).toBeUndefined();
|
|
25
|
+
const f = cache.freshness("m1", "chat_completions", "clawtip");
|
|
26
|
+
expect(f.present).toBe(false);
|
|
27
|
+
expect(f.expired).toBe(true);
|
|
28
|
+
expect(f.expiringSoon).toBe(true);
|
|
29
|
+
expect(f.state).toBe("empty");
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test("beginWarming creates a new warming entry without mutating any prior candidates", () => {
|
|
33
|
+
const cache = new PrewarmCache();
|
|
34
|
+
cache.commitWarm({
|
|
35
|
+
modelId: "gpt-4o",
|
|
36
|
+
protocol: "chat_completions",
|
|
37
|
+
paymentMethod: "clawtip",
|
|
38
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
const begin = cache.beginWarming("gpt-4o", "chat_completions", "clawtip");
|
|
42
|
+
expect(begin.hadPrevious).toBe(true);
|
|
43
|
+
expect(begin.entry.state).toBe("warming");
|
|
44
|
+
// Prior warm candidates are preserved while a new probe is in flight;
|
|
45
|
+
// a re-probe that finds nothing must not silently wipe the cache.
|
|
46
|
+
expect(begin.entry.candidates).toHaveLength(1);
|
|
47
|
+
expect(begin.entry.candidates[0].sellerId).toBe("s1");
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
test("commitWarm resets warmedAt and replaces candidates", () => {
|
|
51
|
+
const fakeNow = (() => {
|
|
52
|
+
let t = 1_000_000;
|
|
53
|
+
return () => t;
|
|
54
|
+
})();
|
|
55
|
+
const cache = new PrewarmCache({ now: fakeNow });
|
|
56
|
+
cache.commitWarm({
|
|
57
|
+
modelId: "gpt-4o",
|
|
58
|
+
protocol: "chat_completions",
|
|
59
|
+
paymentMethod: "clawtip",
|
|
60
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
61
|
+
});
|
|
62
|
+
expect(cache.get("gpt-4o", "chat_completions", "clawtip")?.warmedAt).toBe(1_000_000);
|
|
63
|
+
|
|
64
|
+
fakeNow();
|
|
65
|
+
fakeNow();
|
|
66
|
+
const advanced = 1_000_000 + 9 * 60 * 1000; // 9 minutes later, still warm
|
|
67
|
+
const secondNow = (() => {
|
|
68
|
+
let t = advanced;
|
|
69
|
+
return () => t;
|
|
70
|
+
})();
|
|
71
|
+
const cache2 = new PrewarmCache({ now: secondNow });
|
|
72
|
+
cache2.commitWarm({
|
|
73
|
+
modelId: "gpt-4o",
|
|
74
|
+
protocol: "chat_completions",
|
|
75
|
+
paymentMethod: "clawtip",
|
|
76
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
77
|
+
});
|
|
78
|
+
cache2.commitWarm({
|
|
79
|
+
modelId: "gpt-4o",
|
|
80
|
+
protocol: "chat_completions",
|
|
81
|
+
paymentMethod: "clawtip",
|
|
82
|
+
candidates: [makeCandidate({ sellerId: "s2" }), makeCandidate({ sellerId: "s3" })]
|
|
83
|
+
});
|
|
84
|
+
const entry = cache2.get("gpt-4o", "chat_completions", "clawtip");
|
|
85
|
+
expect(entry?.warmedAt).toBe(advanced);
|
|
86
|
+
expect(entry?.state).toBe("warm");
|
|
87
|
+
expect(entry?.candidates.map((c) => c.sellerId).sort()).toEqual(["s2", "s3"]);
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
test("commitWarm with zero candidates marks the entry as empty", () => {
|
|
91
|
+
const cache = new PrewarmCache();
|
|
92
|
+
const result = cache.commitWarm({
|
|
93
|
+
modelId: "gpt-4o",
|
|
94
|
+
protocol: "chat_completions",
|
|
95
|
+
paymentMethod: "clawtip",
|
|
96
|
+
candidates: []
|
|
97
|
+
});
|
|
98
|
+
expect(result.entry.state).toBe("empty");
|
|
99
|
+
expect(result.entry.candidates).toEqual([]);
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
test("freshness reports expiringSoon when within the last 10% of TTL", () => {
|
|
103
|
+
// Use a controllable `now` so we can advance time deterministically.
|
|
104
|
+
let now = 0;
|
|
105
|
+
const cache = new PrewarmCache({ now: () => now, defaultTtlMs: 1000 });
|
|
106
|
+
cache.commitWarm({
|
|
107
|
+
modelId: "gpt-4o",
|
|
108
|
+
protocol: "chat_completions",
|
|
109
|
+
paymentMethod: "clawtip",
|
|
110
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
// At t=0: warmedAt=0, age=0, far from expiry.
|
|
114
|
+
const fresh = cache.freshness("gpt-4o", "chat_completions", "clawtip");
|
|
115
|
+
expect(fresh.expired).toBe(false);
|
|
116
|
+
expect(fresh.expiringSoon).toBe(false);
|
|
117
|
+
expect(fresh.remainingMs).toBe(1000);
|
|
118
|
+
|
|
119
|
+
// At t=850: 150ms left, not in the last 10%.
|
|
120
|
+
now = 850;
|
|
121
|
+
const midLife = cache.freshness("gpt-4o", "chat_completions", "clawtip");
|
|
122
|
+
expect(midLife.expired).toBe(false);
|
|
123
|
+
expect(midLife.expiringSoon).toBe(false);
|
|
124
|
+
expect(midLife.remainingMs).toBe(150);
|
|
125
|
+
|
|
126
|
+
// At t=950: 50ms left, in the last 10%.
|
|
127
|
+
now = 950;
|
|
128
|
+
const nearEnd = cache.freshness("gpt-4o", "chat_completions", "clawtip");
|
|
129
|
+
expect(nearEnd.expired).toBe(false);
|
|
130
|
+
expect(nearEnd.expiringSoon).toBe(true);
|
|
131
|
+
expect(nearEnd.remainingMs).toBe(50);
|
|
132
|
+
|
|
133
|
+
// At t=1100: past TTL, expired.
|
|
134
|
+
now = 1100;
|
|
135
|
+
const past = cache.freshness("gpt-4o", "chat_completions", "clawtip");
|
|
136
|
+
expect(past.expired).toBe(true);
|
|
137
|
+
expect(past.state).toBe("stale");
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
test("recordFailure increments consecutive failures and marks entry stale", () => {
|
|
141
|
+
const cache = new PrewarmCache();
|
|
142
|
+
cache.commitWarm({
|
|
143
|
+
modelId: "gpt-4o",
|
|
144
|
+
protocol: "chat_completions",
|
|
145
|
+
paymentMethod: "clawtip",
|
|
146
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
147
|
+
});
|
|
148
|
+
const first = cache.recordFailure("gpt-4o", "chat_completions", "clawtip", "503");
|
|
149
|
+
expect(first?.consecutiveWarmingFailures).toBe(1);
|
|
150
|
+
expect(first?.state).toBe("stale");
|
|
151
|
+
const second = cache.recordFailure("gpt-4o", "chat_completions", "clawtip");
|
|
152
|
+
expect(second?.consecutiveWarmingFailures).toBe(2);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
test("recordFailure is a no-op when the key is unknown", () => {
|
|
156
|
+
const cache = new PrewarmCache();
|
|
157
|
+
expect(cache.recordFailure("missing", "chat_completions", "clawtip")).toBeUndefined();
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test("invalidateSeller drops the seller from every entry", () => {
|
|
161
|
+
const cache = new PrewarmCache();
|
|
162
|
+
cache.commitWarm({
|
|
163
|
+
modelId: "gpt-4o",
|
|
164
|
+
protocol: "chat_completions",
|
|
165
|
+
paymentMethod: "clawtip",
|
|
166
|
+
candidates: [makeCandidate({ sellerId: "s1" }), makeCandidate({ sellerId: "s2" })]
|
|
167
|
+
});
|
|
168
|
+
cache.commitWarm({
|
|
169
|
+
modelId: "claude-sonnet-4-5",
|
|
170
|
+
protocol: "chat_completions",
|
|
171
|
+
paymentMethod: "clawtip",
|
|
172
|
+
candidates: [makeCandidate({ sellerId: "s2" })]
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
const affected = cache.invalidateSeller("s2");
|
|
176
|
+
expect(affected).toBe(2);
|
|
177
|
+
const gpt = cache.get("gpt-4o", "chat_completions", "clawtip");
|
|
178
|
+
expect(gpt?.candidates.map((c) => c.sellerId)).toEqual(["s1"]);
|
|
179
|
+
const claude = cache.get("claude-sonnet-4-5", "chat_completions", "clawtip");
|
|
180
|
+
expect(claude?.state).toBe("empty");
|
|
181
|
+
expect(claude?.candidates).toEqual([]);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
test("invalidateKey removes a single entry entirely", () => {
|
|
185
|
+
const cache = new PrewarmCache();
|
|
186
|
+
cache.commitWarm({
|
|
187
|
+
modelId: "gpt-4o",
|
|
188
|
+
protocol: "chat_completions",
|
|
189
|
+
paymentMethod: "clawtip",
|
|
190
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
191
|
+
});
|
|
192
|
+
expect(cache.invalidateKey("gpt-4o", "chat_completions", "clawtip")).toBe(true);
|
|
193
|
+
expect(cache.invalidateKey("gpt-4o", "chat_completions", "clawtip")).toBe(false);
|
|
194
|
+
expect(cache.size()).toBe(0);
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
test("evictExpired removes only entries past their TTL", () => {
|
|
198
|
+
// Use a single controllable clock so commit and evict see consistent time.
|
|
199
|
+
let now = 0;
|
|
200
|
+
const cache = new PrewarmCache({ now: () => now, defaultTtlMs: 1000 });
|
|
201
|
+
cache.commitWarm({
|
|
202
|
+
modelId: "gpt-4o",
|
|
203
|
+
protocol: "chat_completions",
|
|
204
|
+
paymentMethod: "clawtip",
|
|
205
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
206
|
+
});
|
|
207
|
+
cache.commitWarm({
|
|
208
|
+
modelId: "claude-sonnet-4-5",
|
|
209
|
+
protocol: "chat_completions",
|
|
210
|
+
paymentMethod: "clawtip",
|
|
211
|
+
candidates: [makeCandidate({ sellerId: "s2" })]
|
|
212
|
+
});
|
|
213
|
+
// Clock at 0; both fresh.
|
|
214
|
+
expect(cache.evictExpired()).toBe(0);
|
|
215
|
+
|
|
216
|
+
// Advance to t=500; still fresh.
|
|
217
|
+
now = 500;
|
|
218
|
+
expect(cache.evictExpired()).toBe(0);
|
|
219
|
+
|
|
220
|
+
// Advance to t=1500; both entries now older than TTL=1000.
|
|
221
|
+
now = 1500;
|
|
222
|
+
expect(cache.evictExpired()).toBe(2);
|
|
223
|
+
expect(cache.size()).toBe(0);
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
test("isExpiringSoon returns true only inside the expiry window", () => {
|
|
227
|
+
const cache = new PrewarmCache({ now: () => 0, defaultTtlMs: 1000 });
|
|
228
|
+
cache.commitWarm({
|
|
229
|
+
modelId: "gpt-4o",
|
|
230
|
+
protocol: "chat_completions",
|
|
231
|
+
paymentMethod: "clawtip",
|
|
232
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
233
|
+
});
|
|
234
|
+
// At t=0: warmedAt==0, age==0, not expiring.
|
|
235
|
+
expect(cache.isExpiringSoon("gpt-4o", "chat_completions", "clawtip", 100, 200)).toBe(false);
|
|
236
|
+
// At t=850 (150ms left, within 100ms window? no).
|
|
237
|
+
expect(cache.isExpiringSoon("gpt-4o", "chat_completions", "clawtip", 100, 850)).toBe(false);
|
|
238
|
+
// At t=950 (50ms left, within 100ms window).
|
|
239
|
+
expect(cache.isExpiringSoon("gpt-4o", "chat_completions", "clawtip", 100, 950)).toBe(true);
|
|
240
|
+
// At t=1000 (expired; not in the "soon" window anymore).
|
|
241
|
+
expect(cache.isExpiringSoon("gpt-4o", "chat_completions", "clawtip", 100, 1000)).toBe(false);
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
test("snapshot returns deep copies that do not mutate cache state", () => {
|
|
245
|
+
const cache = new PrewarmCache();
|
|
246
|
+
cache.commitWarm({
|
|
247
|
+
modelId: "gpt-4o",
|
|
248
|
+
protocol: "chat_completions",
|
|
249
|
+
paymentMethod: "clawtip",
|
|
250
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
251
|
+
});
|
|
252
|
+
const snap = cache.snapshot();
|
|
253
|
+
snap[0].candidates[0].healthScore = -1;
|
|
254
|
+
expect(cache.get("gpt-4o", "chat_completions", "clawtip")?.candidates[0].healthScore).not.toBe(-1);
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
test("keys() decodes the cache key back into the model/protocol/payment triple", () => {
|
|
258
|
+
const cache = new PrewarmCache();
|
|
259
|
+
cache.commitWarm({
|
|
260
|
+
modelId: "gpt-4o",
|
|
261
|
+
protocol: "chat_completions",
|
|
262
|
+
paymentMethod: "clawtip",
|
|
263
|
+
candidates: [makeCandidate({ sellerId: "s1" })]
|
|
264
|
+
});
|
|
265
|
+
cache.commitWarm({
|
|
266
|
+
modelId: "claude-sonnet-4-5",
|
|
267
|
+
protocol: "messages",
|
|
268
|
+
paymentMethod: "clawtip",
|
|
269
|
+
candidates: [makeCandidate({ sellerId: "s2" })]
|
|
270
|
+
});
|
|
271
|
+
const keys = cache.keys().sort((a, b) => a.modelId.localeCompare(b.modelId));
|
|
272
|
+
expect(keys).toEqual([
|
|
273
|
+
{ modelId: "claude-sonnet-4-5", protocol: "messages", paymentMethod: "clawtip" },
|
|
274
|
+
{ modelId: "gpt-4o", protocol: "chat_completions", paymentMethod: "clawtip" }
|
|
275
|
+
]);
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
test("default TTL is 10 minutes", () => {
|
|
279
|
+
expect(DEFAULT_PREWARM_TTL_MS).toBe(600_000);
|
|
280
|
+
});
|
|
281
|
+
});
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
import { createModuleLogger } from "@tokenbuddy/logging";
|
|
2
|
+
|
|
3
|
+
const logger = createModuleLogger("tb-proxyd:prewarm-cache");
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Default TTL for a successfully warmed entry. 10 minutes is the v1.2 starting
|
|
7
|
+
* point; see buyer-driven-fallback-design.md §18.13 for the trade-off. The
|
|
8
|
+
* cache constructor accepts an override so tests and the future PR-E config
|
|
9
|
+
* loader can change this without re-architecting.
|
|
10
|
+
*/
|
|
11
|
+
export const DEFAULT_PREWARM_TTL_MS = 10 * 60 * 1000;
|
|
12
|
+
|
|
13
|
+
export type PrewarmState = "warming" | "warm" | "stale" | "empty";
|
|
14
|
+
|
|
15
|
+
export interface PrewarmCandidate {
|
|
16
|
+
sellerId: string;
|
|
17
|
+
url: string;
|
|
18
|
+
healthScore: number; // 0-100
|
|
19
|
+
lastSuccessAt: number;
|
|
20
|
+
lastFailAt: number;
|
|
21
|
+
avgLatencyMs: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface PrewarmEntry {
|
|
25
|
+
modelId: string;
|
|
26
|
+
protocol: string;
|
|
27
|
+
paymentMethod: string;
|
|
28
|
+
state: PrewarmState;
|
|
29
|
+
candidates: PrewarmCandidate[];
|
|
30
|
+
warmedAt: number;
|
|
31
|
+
ttlMs: number;
|
|
32
|
+
consecutiveWarmingFailures: number;
|
|
33
|
+
lastInFlightAt?: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface PrewarmCandidateInput {
|
|
37
|
+
sellerId: string;
|
|
38
|
+
url: string;
|
|
39
|
+
healthScore?: number;
|
|
40
|
+
lastSuccessAt?: number;
|
|
41
|
+
lastFailAt?: number;
|
|
42
|
+
avgLatencyMs?: number;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Build the cache key for a (model, protocol, payment) triple. The colon
|
|
47
|
+
* separator is reserved at the model-id level because `RegistrySeller.models`
|
|
48
|
+
* entries are trimmed but not colon-escaped. v1.2 forbids `:` inside model
|
|
49
|
+
* ids so this format is collision-free.
|
|
50
|
+
*/
|
|
51
|
+
export function prewarmKey(modelId: string, protocol: string, paymentMethod: string): string {
|
|
52
|
+
return `${modelId.trim().toLowerCase()}\u0001${protocol.trim().toLowerCase()}\u0001${paymentMethod.trim().toLowerCase()}`;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function parseKey(key: string): { modelId: string; protocol: string; paymentMethod: string } | undefined {
|
|
56
|
+
const parts = key.split("\u0001");
|
|
57
|
+
if (parts.length !== 3) {
|
|
58
|
+
return undefined;
|
|
59
|
+
}
|
|
60
|
+
const [modelId, protocol, paymentMethod] = parts;
|
|
61
|
+
if (!modelId || !protocol || !paymentMethod) {
|
|
62
|
+
return undefined;
|
|
63
|
+
}
|
|
64
|
+
return { modelId, protocol, paymentMethod };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
interface PrewarmCacheOptions {
|
|
68
|
+
defaultTtlMs?: number;
|
|
69
|
+
now?: () => number;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export class PrewarmCache {
|
|
73
|
+
private readonly entries = new Map<string, PrewarmEntry>();
|
|
74
|
+
private readonly defaultTtlMs: number;
|
|
75
|
+
private readonly now: () => number;
|
|
76
|
+
|
|
77
|
+
constructor(options: PrewarmCacheOptions = {}) {
|
|
78
|
+
this.defaultTtlMs = options.defaultTtlMs ?? DEFAULT_PREWARM_TTL_MS;
|
|
79
|
+
this.now = options.now ?? Date.now;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Read an entry without mutating state. Returns `undefined` when the key is
|
|
84
|
+
* unknown; the caller decides whether "absent" should be treated as a miss
|
|
85
|
+
* (i.e. trigger a fresh prewarm) or as a known empty model.
|
|
86
|
+
*/
|
|
87
|
+
get(modelId: string, protocol: string, paymentMethod: string): PrewarmEntry | undefined {
|
|
88
|
+
return this.entries.get(prewarmKey(modelId, protocol, paymentMethod));
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Look up an entry and return a `Freshness` descriptor. This is the cheap
|
|
93
|
+
* path used on every inference request to decide whether a prewarm is
|
|
94
|
+
* still authoritative, expiring soon, or already stale.
|
|
95
|
+
*/
|
|
96
|
+
freshness(modelId: string, protocol: string, paymentMethod: string): PrewarmFreshness {
|
|
97
|
+
const entry = this.get(modelId, protocol, paymentMethod);
|
|
98
|
+
if (!entry) {
|
|
99
|
+
return { present: false, expired: true, expiringSoon: true, state: "empty" };
|
|
100
|
+
}
|
|
101
|
+
const now = this.now();
|
|
102
|
+
const ageMs = now - entry.warmedAt;
|
|
103
|
+
const expired = ageMs >= entry.ttlMs;
|
|
104
|
+
const remainingMs = Math.max(0, entry.ttlMs - ageMs);
|
|
105
|
+
return {
|
|
106
|
+
present: true,
|
|
107
|
+
expired,
|
|
108
|
+
expiringSoon: !expired && remainingMs <= entry.ttlMs * 0.1,
|
|
109
|
+
remainingMs,
|
|
110
|
+
state: expired ? "stale" : entry.state,
|
|
111
|
+
entry
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Mark a (model, protocol, payment) triple as currently being warmed. If an
|
|
117
|
+
* existing warm entry is present it is kept untouched (the new probe
|
|
118
|
+
* supersedes it on commit) and the previous state is reported to the
|
|
119
|
+
* caller via the returned descriptor.
|
|
120
|
+
*/
|
|
121
|
+
beginWarming(modelId: string, protocol: string, paymentMethod: string, ttlMs?: number): PrewarmBeginResult {
|
|
122
|
+
const key = prewarmKey(modelId, protocol, paymentMethod);
|
|
123
|
+
const previous = this.entries.get(key);
|
|
124
|
+
const now = this.now();
|
|
125
|
+
const entry: PrewarmEntry = {
|
|
126
|
+
modelId,
|
|
127
|
+
protocol,
|
|
128
|
+
paymentMethod,
|
|
129
|
+
state: "warming",
|
|
130
|
+
candidates: previous?.candidates ?? [],
|
|
131
|
+
warmedAt: previous?.warmedAt ?? now,
|
|
132
|
+
ttlMs: ttlMs ?? previous?.ttlMs ?? this.defaultTtlMs,
|
|
133
|
+
consecutiveWarmingFailures: previous?.consecutiveWarmingFailures ?? 0,
|
|
134
|
+
lastInFlightAt: now
|
|
135
|
+
};
|
|
136
|
+
this.entries.set(key, entry);
|
|
137
|
+
logger.debug("prewarm.cache.warming_started", "prewarm probe in flight", {
|
|
138
|
+
modelId,
|
|
139
|
+
protocol,
|
|
140
|
+
paymentMethod,
|
|
141
|
+
ttlMs: entry.ttlMs,
|
|
142
|
+
previousState: previous?.state
|
|
143
|
+
});
|
|
144
|
+
return { key, entry, hadPrevious: Boolean(previous) };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Commit a successful warm. The entry's `warmedAt` is reset to the current
|
|
149
|
+
* time so the TTL window starts fresh, and any prior stale candidates are
|
|
150
|
+
* replaced with the new probe results. The previous candidate set is
|
|
151
|
+
* returned for caller-side telemetry (e.g. detecting churn).
|
|
152
|
+
*/
|
|
153
|
+
commitWarm(input: {
|
|
154
|
+
modelId: string;
|
|
155
|
+
protocol: string;
|
|
156
|
+
paymentMethod: string;
|
|
157
|
+
candidates: PrewarmCandidateInput[];
|
|
158
|
+
ttlMs?: number;
|
|
159
|
+
}): PrewarmCommitResult {
|
|
160
|
+
const key = prewarmKey(input.modelId, input.protocol, input.paymentMethod);
|
|
161
|
+
const previous = this.entries.get(key);
|
|
162
|
+
const now = this.now();
|
|
163
|
+
const next: PrewarmEntry = {
|
|
164
|
+
modelId: input.modelId,
|
|
165
|
+
protocol: input.protocol,
|
|
166
|
+
paymentMethod: input.paymentMethod,
|
|
167
|
+
state: input.candidates.length > 0 ? "warm" : "empty",
|
|
168
|
+
candidates: input.candidates.map(toCandidate),
|
|
169
|
+
warmedAt: now,
|
|
170
|
+
ttlMs: input.ttlMs ?? previous?.ttlMs ?? this.defaultTtlMs,
|
|
171
|
+
consecutiveWarmingFailures: 0,
|
|
172
|
+
lastInFlightAt: now
|
|
173
|
+
};
|
|
174
|
+
this.entries.set(key, next);
|
|
175
|
+
|
|
176
|
+
if (input.candidates.length === 0) {
|
|
177
|
+
logger.warn("prewarm.cache.commit_empty", "prewarm commit returned no candidates", {
|
|
178
|
+
modelId: input.modelId,
|
|
179
|
+
protocol: input.protocol,
|
|
180
|
+
paymentMethod: input.paymentMethod
|
|
181
|
+
});
|
|
182
|
+
} else {
|
|
183
|
+
logger.info("prewarm.cache.committed", "prewarm commit updated candidates", {
|
|
184
|
+
modelId: input.modelId,
|
|
185
|
+
protocol: input.protocol,
|
|
186
|
+
paymentMethod: input.paymentMethod,
|
|
187
|
+
candidateCount: next.candidates.length,
|
|
188
|
+
ttlMs: next.ttlMs
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
key,
|
|
194
|
+
entry: next,
|
|
195
|
+
replacedSellers: previous?.candidates.map((c) => c.sellerId) ?? []
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Mark a warm as failed. Consecutive failures are tracked so the scheduler
|
|
201
|
+
* can apply exponential backoff and so `tb doctor` can surface persistently
|
|
202
|
+
* broken models.
|
|
203
|
+
*/
|
|
204
|
+
recordFailure(modelId: string, protocol: string, paymentMethod: string, errorMessage?: string): PrewarmEntry | undefined {
|
|
205
|
+
const key = prewarmKey(modelId, protocol, paymentMethod);
|
|
206
|
+
const previous = this.entries.get(key);
|
|
207
|
+
if (!previous) {
|
|
208
|
+
return undefined;
|
|
209
|
+
}
|
|
210
|
+
const next: PrewarmEntry = {
|
|
211
|
+
...previous,
|
|
212
|
+
state: "stale",
|
|
213
|
+
consecutiveWarmingFailures: previous.consecutiveWarmingFailures + 1,
|
|
214
|
+
lastInFlightAt: this.now()
|
|
215
|
+
};
|
|
216
|
+
this.entries.set(key, next);
|
|
217
|
+
logger.warn("prewarm.cache.failure_recorded", "prewarm commit failed; entry marked stale", {
|
|
218
|
+
modelId,
|
|
219
|
+
protocol,
|
|
220
|
+
paymentMethod,
|
|
221
|
+
consecutiveFailures: next.consecutiveWarmingFailures,
|
|
222
|
+
errorMessage
|
|
223
|
+
});
|
|
224
|
+
return next;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
/**
|
|
228
|
+
* Invalidate every entry that references the given seller. Used when the
|
|
229
|
+
* registry signals a seller is gone (grace period expires) or when a hard
|
|
230
|
+
* failure (e.g. 5xx storm) should drop the seller from the cache
|
|
231
|
+
* immediately.
|
|
232
|
+
*/
|
|
233
|
+
invalidateSeller(sellerId: string): number {
|
|
234
|
+
let removed = 0;
|
|
235
|
+
for (const [key, entry] of this.entries.entries()) {
|
|
236
|
+
const filtered = entry.candidates.filter((candidate) => candidate.sellerId !== sellerId);
|
|
237
|
+
if (filtered.length !== entry.candidates.length) {
|
|
238
|
+
removed += 1;
|
|
239
|
+
this.entries.set(key, {
|
|
240
|
+
...entry,
|
|
241
|
+
candidates: filtered,
|
|
242
|
+
state: filtered.length > 0 ? entry.state : "empty"
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
if (removed > 0) {
|
|
247
|
+
logger.info("prewarm.cache.seller_invalidated", "seller dropped from all prewarm entries", {
|
|
248
|
+
sellerId,
|
|
249
|
+
entriesAffected: removed
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
return removed;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Invalidate a specific cache key. Used by `tb doctor --refresh <model>`
|
|
257
|
+
* and by the registry loop when a model is removed from the focus set.
|
|
258
|
+
*/
|
|
259
|
+
invalidateKey(modelId: string, protocol: string, paymentMethod: string): boolean {
|
|
260
|
+
return this.entries.delete(prewarmKey(modelId, protocol, paymentMethod));
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Drop every entry whose TTL has expired. Returns the number of removed
|
|
265
|
+
* entries so the caller can log it.
|
|
266
|
+
*/
|
|
267
|
+
evictExpired(now: number = this.now()): number {
|
|
268
|
+
let removed = 0;
|
|
269
|
+
for (const [key, entry] of this.entries.entries()) {
|
|
270
|
+
if (now - entry.warmedAt >= entry.ttlMs) {
|
|
271
|
+
this.entries.delete(key);
|
|
272
|
+
removed += 1;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
if (removed > 0) {
|
|
276
|
+
logger.info("prewarm.cache.evicted", "expired prewarm entries evicted", { removed });
|
|
277
|
+
}
|
|
278
|
+
return removed;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Returns `true` when the entry's TTL is within `withinMs` of expiring. The
|
|
283
|
+
* scheduler uses this to schedule idle-cycle prewarms just-in-time rather
|
|
284
|
+
* than at fixed wall-clock intervals.
|
|
285
|
+
*/
|
|
286
|
+
isExpiringSoon(modelId: string, protocol: string, paymentMethod: string, withinMs: number, now: number = this.now()): boolean {
|
|
287
|
+
const entry = this.get(modelId, protocol, paymentMethod);
|
|
288
|
+
if (!entry) {
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
const age = now - entry.warmedAt;
|
|
292
|
+
return age >= entry.ttlMs - withinMs && age < entry.ttlMs;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/**
|
|
296
|
+
* Snapshot all entries for diagnostics. Returns a deep-copy of the values
|
|
297
|
+
* so callers can serialize without risking mutation of cache state.
|
|
298
|
+
*/
|
|
299
|
+
snapshot(): PrewarmEntry[] {
|
|
300
|
+
return Array.from(this.entries.values()).map((entry) => ({
|
|
301
|
+
...entry,
|
|
302
|
+
candidates: entry.candidates.map((candidate) => ({ ...candidate }))
|
|
303
|
+
}));
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* List every cached key, decoded back into its (model, protocol, payment)
|
|
308
|
+
* triple. Used by `tb doctor` to render the prewarm table.
|
|
309
|
+
*/
|
|
310
|
+
keys(): Array<{ modelId: string; protocol: string; paymentMethod: string }> {
|
|
311
|
+
const out: Array<{ modelId: string; protocol: string; paymentMethod: string }> = [];
|
|
312
|
+
for (const key of this.entries.keys()) {
|
|
313
|
+
const parsed = parseKey(key);
|
|
314
|
+
if (parsed) {
|
|
315
|
+
out.push(parsed);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
return out;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
size(): number {
|
|
322
|
+
return this.entries.size;
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
clear(): void {
|
|
326
|
+
this.entries.clear();
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
export interface PrewarmFreshness {
|
|
331
|
+
present: boolean;
|
|
332
|
+
expired: boolean;
|
|
333
|
+
expiringSoon: boolean;
|
|
334
|
+
remainingMs?: number;
|
|
335
|
+
state: PrewarmState;
|
|
336
|
+
entry?: PrewarmEntry;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
export interface PrewarmBeginResult {
|
|
340
|
+
key: string;
|
|
341
|
+
entry: PrewarmEntry;
|
|
342
|
+
hadPrevious: boolean;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
export interface PrewarmCommitResult {
|
|
346
|
+
key: string;
|
|
347
|
+
entry: PrewarmEntry;
|
|
348
|
+
replacedSellers: string[];
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
function toCandidate(input: PrewarmCandidateInput): PrewarmCandidate {
|
|
352
|
+
return {
|
|
353
|
+
sellerId: input.sellerId,
|
|
354
|
+
url: input.url,
|
|
355
|
+
healthScore: clampScore(input.healthScore ?? 50),
|
|
356
|
+
lastSuccessAt: input.lastSuccessAt ?? 0,
|
|
357
|
+
lastFailAt: input.lastFailAt ?? 0,
|
|
358
|
+
avgLatencyMs: Math.max(0, input.avgLatencyMs ?? 0)
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function clampScore(score: number): number {
|
|
363
|
+
if (!Number.isFinite(score)) {
|
|
364
|
+
return 50;
|
|
365
|
+
}
|
|
366
|
+
if (score < 0) {
|
|
367
|
+
return 0;
|
|
368
|
+
}
|
|
369
|
+
if (score > 100) {
|
|
370
|
+
return 100;
|
|
371
|
+
}
|
|
372
|
+
return score;
|
|
373
|
+
}
|