pi-model-auto 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +164 -0
- package/package.json +42 -0
- package/src/canonical-models.ts +141 -0
- package/src/index.ts +611 -0
- package/src/quota.ts +306 -0
- package/src/router-core.ts +916 -0
package/src/quota.ts
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { dirname } from "node:path";
|
|
4
|
+
import type { Pool, ResolvedModel } from "./router-core.ts";
|
|
5
|
+
|
|
6
|
+
export interface RateLimitSnapshot {
|
|
7
|
+
remaining?: number;
|
|
8
|
+
limit?: number;
|
|
9
|
+
resetAt?: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export type PlanStatus = "ok" | "cooldown";
|
|
13
|
+
|
|
14
|
+
export interface PlanState {
|
|
15
|
+
planKey: string;
|
|
16
|
+
status: PlanStatus;
|
|
17
|
+
cooldownUntil?: number;
|
|
18
|
+
reason?: string;
|
|
19
|
+
lastSnapshot?: RateLimitSnapshot;
|
|
20
|
+
updatedAt: number;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface QuotaConfig {
|
|
24
|
+
enabled: boolean;
|
|
25
|
+
reserveRatio: number;
|
|
26
|
+
inTurnRetry: boolean;
|
|
27
|
+
maxRetries: number;
|
|
28
|
+
defaultCooldownMs: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface PlanIdentity {
|
|
32
|
+
provider: string;
|
|
33
|
+
baseUrl: string;
|
|
34
|
+
apiKey?: string;
|
|
35
|
+
headers?: Record<string, string>;
|
|
36
|
+
env?: Record<string, string>;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export const DEFAULT_QUOTA_CONFIG: QuotaConfig = {
|
|
40
|
+
enabled: true,
|
|
41
|
+
reserveRatio: 0.05,
|
|
42
|
+
inTurnRetry: false,
|
|
43
|
+
maxRetries: 2,
|
|
44
|
+
defaultCooldownMs: 300_000,
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const QUOTA_STATE_VERSION = 2;
|
|
48
|
+
|
|
49
|
+
interface PersistedQuotaState {
|
|
50
|
+
version: typeof QUOTA_STATE_VERSION;
|
|
51
|
+
plans: PlanState[];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export class QuotaState {
|
|
55
|
+
readonly config: QuotaConfig;
|
|
56
|
+
private readonly plans = new Map<string, PlanState>();
|
|
57
|
+
|
|
58
|
+
constructor(config: QuotaConfig | Partial<QuotaConfig> = DEFAULT_QUOTA_CONFIG) {
|
|
59
|
+
this.config = { ...DEFAULT_QUOTA_CONFIG, ...config };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
isAvailable(planKey: string, now: number): boolean {
|
|
63
|
+
const state = this.plans.get(planKey);
|
|
64
|
+
if (!state || state.status === "ok") return true;
|
|
65
|
+
|
|
66
|
+
if (state.cooldownUntil != null && now >= state.cooldownUntil) {
|
|
67
|
+
state.status = "ok";
|
|
68
|
+
state.cooldownUntil = undefined;
|
|
69
|
+
state.reason = undefined;
|
|
70
|
+
state.updatedAt = now;
|
|
71
|
+
return true;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return false;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
recordResponse(
|
|
78
|
+
planKey: string,
|
|
79
|
+
status: number,
|
|
80
|
+
headers: Record<string, string>,
|
|
81
|
+
provider: string,
|
|
82
|
+
now: number,
|
|
83
|
+
): PlanState {
|
|
84
|
+
const snapshot = parseRateLimitHeaders(provider, headers, now);
|
|
85
|
+
const state = this.get(planKey, now);
|
|
86
|
+
state.lastSnapshot = isEmptySnapshot(snapshot) ? undefined : snapshot;
|
|
87
|
+
state.updatedAt = now;
|
|
88
|
+
|
|
89
|
+
if (status === 429) {
|
|
90
|
+
return this.recordRateLimited(planKey, parseRetryAfter(headers["retry-after"], now), snapshot.resetAt, now);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (snapshot.remaining != null && snapshot.limit != null && snapshot.limit > 0) {
|
|
94
|
+
if (snapshot.remaining / snapshot.limit <= this.config.reserveRatio) {
|
|
95
|
+
state.status = "cooldown";
|
|
96
|
+
state.cooldownUntil = snapshot.resetAt ?? now + this.config.defaultCooldownMs;
|
|
97
|
+
state.reason = "low-remaining";
|
|
98
|
+
return state;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
state.status = "ok";
|
|
103
|
+
state.cooldownUntil = undefined;
|
|
104
|
+
state.reason = undefined;
|
|
105
|
+
return state;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
recordRateLimited(planKey: string, retryAfterMs: number | undefined, resetAt: number | undefined, now: number): PlanState {
|
|
109
|
+
const state = this.get(planKey, now);
|
|
110
|
+
state.status = "cooldown";
|
|
111
|
+
state.cooldownUntil = resetAt ?? (retryAfterMs != null ? now + retryAfterMs : now + this.config.defaultCooldownMs);
|
|
112
|
+
state.reason = "429";
|
|
113
|
+
state.updatedAt = now;
|
|
114
|
+
return state;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
snapshot(planKey: string): PlanState | undefined {
|
|
118
|
+
const state = this.plans.get(planKey);
|
|
119
|
+
return state ? { ...state, lastSnapshot: state.lastSnapshot ? { ...state.lastSnapshot } : undefined } : undefined;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
snapshots(): PlanState[] {
|
|
123
|
+
return [...this.plans.values()].map((state) => ({
|
|
124
|
+
...state,
|
|
125
|
+
lastSnapshot: state.lastSnapshot ? { ...state.lastSnapshot } : undefined,
|
|
126
|
+
}));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
load(file: string): void {
|
|
130
|
+
if (!existsSync(file)) return;
|
|
131
|
+
|
|
132
|
+
try {
|
|
133
|
+
const parsed = JSON.parse(readFileSync(file, "utf8")) as Partial<PersistedQuotaState>;
|
|
134
|
+
if (parsed.version !== QUOTA_STATE_VERSION || !Array.isArray(parsed.plans)) return;
|
|
135
|
+
|
|
136
|
+
this.plans.clear();
|
|
137
|
+
for (const plan of parsed.plans) {
|
|
138
|
+
if (!isValidPlanState(plan)) continue;
|
|
139
|
+
this.plans.set(plan.planKey, { ...plan, lastSnapshot: plan.lastSnapshot ? { ...plan.lastSnapshot } : undefined });
|
|
140
|
+
}
|
|
141
|
+
} catch {
|
|
142
|
+
return;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
persist(file: string): void {
|
|
147
|
+
mkdirSync(dirname(file), { recursive: true });
|
|
148
|
+
const data: PersistedQuotaState = { version: QUOTA_STATE_VERSION, plans: this.snapshots() };
|
|
149
|
+
writeFileSync(file, `${JSON.stringify(data, null, 2)}\n`, "utf8");
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
private get(planKey: string, now: number): PlanState {
|
|
153
|
+
const existing = this.plans.get(planKey);
|
|
154
|
+
if (existing) return existing;
|
|
155
|
+
|
|
156
|
+
const state: PlanState = { planKey, status: "ok", updatedAt: now };
|
|
157
|
+
this.plans.set(planKey, state);
|
|
158
|
+
return state;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export function buildPlanKey(identity: PlanIdentity): string {
|
|
163
|
+
const provider = identity.provider.trim().toLowerCase();
|
|
164
|
+
const baseUrl = normalizeBaseUrl(identity.baseUrl);
|
|
165
|
+
const authHash = hashStable({
|
|
166
|
+
apiKey: identity.apiKey ?? null,
|
|
167
|
+
headers: normalizeRecord(identity.headers),
|
|
168
|
+
env: normalizeRecord(identity.env),
|
|
169
|
+
});
|
|
170
|
+
return `${provider}|${baseUrl}|auth:${authHash}`;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
export function filterPoolByQuota(
|
|
174
|
+
pool: Pool,
|
|
175
|
+
quota: QuotaState | undefined,
|
|
176
|
+
now: number,
|
|
177
|
+
excluded = new Set<string>(),
|
|
178
|
+
planKeyFor: (item: ResolvedModel) => string = (item) =>
|
|
179
|
+
buildPlanKey({ provider: item.model.provider, baseUrl: item.model.baseUrl }),
|
|
180
|
+
): Pool {
|
|
181
|
+
if (!quota?.config.enabled) return pool;
|
|
182
|
+
|
|
183
|
+
const keep = (item: ResolvedModel) => {
|
|
184
|
+
const planKey = planKeyFor(item);
|
|
185
|
+
return !excluded.has(planKey) && quota.isAvailable(planKey, now);
|
|
186
|
+
};
|
|
187
|
+
const filter = (items: ResolvedModel[]) => items.filter(keep);
|
|
188
|
+
const next: Pool = {
|
|
189
|
+
cheapPool: filter(pool.cheapPool),
|
|
190
|
+
standardPool: filter(pool.standardPool),
|
|
191
|
+
strongPool: filter(pool.strongPool),
|
|
192
|
+
unknownPool: filter(pool.unknownPool),
|
|
193
|
+
all: filter(pool.all),
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
return next.all.length === 0 ? pool : next;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export function parseRateLimitHeaders(
|
|
200
|
+
_provider: string,
|
|
201
|
+
headers: Record<string, string>,
|
|
202
|
+
now: number,
|
|
203
|
+
): RateLimitSnapshot {
|
|
204
|
+
const h = normalizeHeaders(headers);
|
|
205
|
+
const snapshot: RateLimitSnapshot = {};
|
|
206
|
+
|
|
207
|
+
if (h["anthropic-ratelimit-tokens-remaining"] != null) {
|
|
208
|
+
snapshot.remaining = num(h["anthropic-ratelimit-tokens-remaining"]);
|
|
209
|
+
snapshot.limit = num(h["anthropic-ratelimit-tokens-limit"]);
|
|
210
|
+
snapshot.resetAt = parseRfc3339(h["anthropic-ratelimit-tokens-reset"]);
|
|
211
|
+
} else if (h["x-ratelimit-remaining-tokens"] != null) {
|
|
212
|
+
snapshot.remaining = num(h["x-ratelimit-remaining-tokens"]);
|
|
213
|
+
snapshot.limit = num(h["x-ratelimit-limit-tokens"]);
|
|
214
|
+
const duration = parseDuration(h["x-ratelimit-reset-tokens"]);
|
|
215
|
+
if (duration != null) snapshot.resetAt = now + duration;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const retryAfter = parseRetryAfter(h["retry-after"], now);
|
|
219
|
+
if (retryAfter != null && snapshot.resetAt == null) snapshot.resetAt = now + retryAfter;
|
|
220
|
+
|
|
221
|
+
return pruneSnapshot(snapshot);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
export function parseRfc3339(value: string | undefined): number | undefined {
|
|
225
|
+
if (value == null) return undefined;
|
|
226
|
+
const parsed = Date.parse(value);
|
|
227
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
export function parseDuration(value: string | undefined): number | undefined {
|
|
231
|
+
if (value == null) return undefined;
|
|
232
|
+
const trimmed = value.trim().toLowerCase();
|
|
233
|
+
if (!trimmed) return undefined;
|
|
234
|
+
if (/^\d+(?:\.\d+)?$/.test(trimmed)) return Number(trimmed) * 1_000;
|
|
235
|
+
|
|
236
|
+
let total = 0;
|
|
237
|
+
let matched = false;
|
|
238
|
+
const pattern = /(\d+(?:\.\d+)?)(ms|h|m|s)/g;
|
|
239
|
+
for (const match of trimmed.matchAll(pattern)) {
|
|
240
|
+
matched = true;
|
|
241
|
+
const value = Number(match[1]);
|
|
242
|
+
if (!Number.isFinite(value)) return undefined;
|
|
243
|
+
if (match[2] === "ms") total += value;
|
|
244
|
+
if (match[2] === "s") total += value * 1_000;
|
|
245
|
+
if (match[2] === "m") total += value * 60_000;
|
|
246
|
+
if (match[2] === "h") total += value * 3_600_000;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return matched ? total : undefined;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
export function parseRetryAfter(value: string | undefined, now: number): number | undefined {
|
|
253
|
+
if (value == null) return undefined;
|
|
254
|
+
const trimmed = value.trim();
|
|
255
|
+
if (!trimmed) return undefined;
|
|
256
|
+
if (/^\d+(?:\.\d+)?$/.test(trimmed)) return Number(trimmed) * 1_000;
|
|
257
|
+
|
|
258
|
+
const parsed = Date.parse(trimmed);
|
|
259
|
+
if (!Number.isFinite(parsed)) return undefined;
|
|
260
|
+
return Math.max(0, parsed - now);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function normalizeHeaders(headers: Record<string, string>): Record<string, string> {
|
|
264
|
+
return Object.fromEntries(Object.entries(headers).map(([key, value]) => [key.toLowerCase(), value]));
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function normalizeBaseUrl(baseUrl: string): string {
|
|
268
|
+
return baseUrl.trim().replace(/\/+$/, "");
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function normalizeRecord(record: Record<string, string> | undefined): Record<string, string> | null {
|
|
272
|
+
if (!record || Object.keys(record).length === 0) return null;
|
|
273
|
+
return Object.fromEntries(
|
|
274
|
+
Object.entries(record)
|
|
275
|
+
.map(([key, value]) => [key.toLowerCase(), value] as const)
|
|
276
|
+
.sort(([a], [b]) => a.localeCompare(b)),
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function hashStable(value: unknown): string {
|
|
281
|
+
return createHash("sha256").update(JSON.stringify(value)).digest("hex").slice(0, 16);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
function num(value: string | undefined): number | undefined {
|
|
285
|
+
if (value == null) return undefined;
|
|
286
|
+
const parsed = Number(value);
|
|
287
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function pruneSnapshot(snapshot: RateLimitSnapshot): RateLimitSnapshot {
|
|
291
|
+
return Object.fromEntries(Object.entries(snapshot).filter(([, value]) => value != null)) as RateLimitSnapshot;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function isEmptySnapshot(snapshot: RateLimitSnapshot): boolean {
|
|
295
|
+
return snapshot.remaining == null && snapshot.limit == null && snapshot.resetAt == null;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function isValidPlanState(value: unknown): value is PlanState {
|
|
299
|
+
if (!value || typeof value !== "object") return false;
|
|
300
|
+
const plan = value as Partial<PlanState>;
|
|
301
|
+
return (
|
|
302
|
+
typeof plan.planKey === "string" &&
|
|
303
|
+
(plan.status === "ok" || plan.status === "cooldown") &&
|
|
304
|
+
typeof plan.updatedAt === "number"
|
|
305
|
+
);
|
|
306
|
+
}
|