aigetwey 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -1
- package/README.md +28 -7
- package/config.example.yaml +0 -1
- package/dashboard/src/app/(console)/quota/page.tsx +2 -2
- package/dashboard/src/components/BudgetForm.tsx +15 -17
- package/dashboard/src/components/{QuotaView.tsx → BudgetTracker.tsx} +71 -56
- package/dashboard/src/components/CooldownTimer.tsx +1 -1
- package/dashboard/src/components/EndpointView.tsx +255 -47
- package/dashboard/src/components/LogTable.tsx +32 -25
- package/dashboard/src/components/ProviderManager.tsx +3 -28
- package/dashboard/src/components/Rail.tsx +1 -1
- package/dashboard/src/components/RoutingView.tsx +6 -2
- package/dashboard/src/components/TopBar.tsx +1 -1
- package/dashboard/src/components/ui.tsx +6 -1
- package/dashboard/src/lib/client.ts +6 -5
- package/dashboard/src/lib/gateway.ts +23 -16
- package/dist/config.js +86 -23
- package/dist/config.js.map +1 -1
- package/dist/core/budget.js +1 -1
- package/dist/core/budget.js.map +1 -1
- package/dist/core/fallback.js +0 -6
- package/dist/core/fallback.js.map +1 -1
- package/dist/core/handler.js +6 -5
- package/dist/core/handler.js.map +1 -1
- package/dist/core/keysUsage.js +15 -0
- package/dist/core/keysUsage.js.map +1 -0
- package/dist/core/ratelimit.js +15 -0
- package/dist/core/ratelimit.js.map +1 -0
- package/dist/core/state.js +5 -13
- package/dist/core/state.js.map +1 -1
- package/dist/core/window.js +35 -0
- package/dist/core/window.js.map +1 -0
- package/dist/db.js +0 -20
- package/dist/db.js.map +1 -1
- package/dist/routes/admin.js +55 -10
- package/dist/routes/admin.js.map +1 -1
- package/dist/routes/v1.js +14 -1
- package/dist/routes/v1.js.map +1 -1
- package/dist/server.js +1 -7
- package/dist/server.js.map +1 -1
- package/dist/stream/openai-stream.js +3 -0
- package/dist/stream/openai-stream.js.map +1 -1
- package/package.json +1 -1
- package/src/config.ts +89 -23
- package/src/core/budget.ts +1 -1
- package/src/core/fallback.ts +0 -9
- package/src/core/handler.ts +9 -7
- package/src/core/keysUsage.ts +49 -0
- package/src/core/ratelimit.ts +25 -0
- package/src/core/state.ts +4 -14
- package/src/core/window.ts +45 -0
- package/src/db.ts +0 -23
- package/src/routes/admin.ts +61 -9
- package/src/routes/v1.ts +18 -1
- package/src/server.ts +1 -8
- package/src/stream/openai-stream.ts +3 -1
- package/src/core/quota.ts +0 -253
package/src/routes/v1.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import type { FastifyInstance, FastifyReply, FastifyRequest } from "fastify";
|
|
2
2
|
import { checkAuth, extractKey, clientKeyFingerprint } from "../middleware/auth.js";
|
|
3
|
+
import { isKeyExpired } from "../config.js";
|
|
3
4
|
import type { GatewayState } from "../core/state.js";
|
|
4
5
|
import { handle, GatewayError, type HandleDeps } from "../core/handler.js";
|
|
5
6
|
import type { WireFormat } from "../core/canonical.js";
|
|
6
7
|
import type { UsageDB } from "../db.js";
|
|
8
|
+
import { RateLimiter } from "../core/ratelimit.js";
|
|
7
9
|
|
|
8
10
|
/**
|
|
9
11
|
* /v1 proxy surface. Auth-gates on the gateway's own keys (read from state each
|
|
@@ -11,6 +13,8 @@ import type { UsageDB } from "../db.js";
|
|
|
11
13
|
* pipeline (non-stream JSON or SSE stream).
|
|
12
14
|
*/
|
|
13
15
|
export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?: UsageDB): void {
|
|
16
|
+
const limiter = new RateLimiter();
|
|
17
|
+
|
|
14
18
|
const requireAuth = {
|
|
15
19
|
preHandler: (req: FastifyRequest, reply: FastifyReply, done: (err?: Error) => void) => {
|
|
16
20
|
const res = checkAuth(req, state.config.server.api_keys);
|
|
@@ -18,6 +22,19 @@ export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?:
|
|
|
18
22
|
reply.code(res.status ?? 401).send({ error: res.error });
|
|
19
23
|
return; // skip done() to short-circuit the route
|
|
20
24
|
}
|
|
25
|
+
|
|
26
|
+
const presented = extractKey(req);
|
|
27
|
+
if (presented && isKeyExpired(state.config.server, presented, Date.now())) {
|
|
28
|
+
reply.code(403).send({ error: "key expired" });
|
|
29
|
+
return; // short-circuit
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const rpm = presented ? state.config.server.key_rpm?.[presented] : undefined;
|
|
33
|
+
if (presented && rpm && limiter.over(clientKeyFingerprint(presented), rpm)) {
|
|
34
|
+
reply.code(429).send({ error: "rate limit exceeded" });
|
|
35
|
+
return; // short-circuit
|
|
36
|
+
}
|
|
37
|
+
|
|
21
38
|
done();
|
|
22
39
|
},
|
|
23
40
|
};
|
|
@@ -28,9 +45,9 @@ export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?:
|
|
|
28
45
|
return {
|
|
29
46
|
config: state.config,
|
|
30
47
|
pool: state.pool,
|
|
31
|
-
quota: state.quota,
|
|
32
48
|
budget: state.budget,
|
|
33
49
|
db,
|
|
50
|
+
clientKeyModels: presented ? state.config.server.key_models?.[presented] : undefined,
|
|
34
51
|
clientKeyFp: presented ? clientKeyFingerprint(presented) : undefined,
|
|
35
52
|
log: (msg) => app.log.info(msg),
|
|
36
53
|
};
|
package/src/server.ts
CHANGED
|
@@ -4,7 +4,6 @@ import { loadConfig } from "./config.js";
|
|
|
4
4
|
import { registerRoutes } from "./routes/index.js";
|
|
5
5
|
import { GatewayState } from "./core/state.js";
|
|
6
6
|
import { UsageDB } from "./db.js";
|
|
7
|
-
import { QuotaTracker } from "./core/quota.js";
|
|
8
7
|
import { AuthStore } from "./core/authStore.js";
|
|
9
8
|
import { consoleBuffer } from "./core/console-buffer.js";
|
|
10
9
|
|
|
@@ -52,14 +51,8 @@ async function main(): Promise<void> {
|
|
|
52
51
|
const dataDir = resolve(process.env.AIGETWEY_DATA_DIR ?? "data");
|
|
53
52
|
const db = new UsageDB(join(dataDir, "usage.sqlite"));
|
|
54
53
|
|
|
55
|
-
// quota counts persist via the DB so a restart within a window keeps the budget.
|
|
56
|
-
const quota = new QuotaTracker(Date.now, {
|
|
57
|
-
load: () => db.loadQuota(),
|
|
58
|
-
save: (id, start, consumed) => db.saveQuota(id, start, consumed),
|
|
59
|
-
});
|
|
60
|
-
|
|
61
54
|
// holder enables runtime config edits (hot-reload) from the dashboard.
|
|
62
|
-
const state = new GatewayState(configPath, config,
|
|
55
|
+
const state = new GatewayState(configPath, config, db);
|
|
63
56
|
// admin password lives in a hash store (seeded from the env on first run,
|
|
64
57
|
// changeable at runtime from the dashboard).
|
|
65
58
|
const auth = AuthStore.open(dataDir, process.env.AIGETWEY_ADMIN_PASSWORD);
|
|
@@ -24,7 +24,9 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
24
24
|
/** Lift vendor reasoning fields into the canonical `delta.reasoning`. */
|
|
25
25
|
function normalize(chunk: CanonicalChunk): CanonicalChunk {
|
|
26
26
|
for (const choice of chunk.choices ?? []) {
|
|
27
|
-
|
|
27
|
+
// a finish_reason chunk carries no `delta`; skip it (and any delta-less choice).
|
|
28
|
+
const d = choice.delta as (Record<string, unknown> & { reasoning?: string }) | undefined;
|
|
29
|
+
if (!d) continue;
|
|
28
30
|
if (d.reasoning === undefined) {
|
|
29
31
|
const vendor = (d["reasoning_content"] as string | undefined) ?? (d["reasoning"] as string | undefined);
|
|
30
32
|
if (vendor) d.reasoning = vendor;
|
package/src/core/quota.ts
DELETED
|
@@ -1,253 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Per-provider token quota tracking with scheduled window resets.
|
|
3
|
-
*
|
|
4
|
-
* Distinct from the key-pool cooldown: a cooldown is a transient penalty after a
|
|
5
|
-
* 429; a quota is a budget that refills on a schedule (a 5-hour rolling window, a
|
|
6
|
-
* daily/weekly/monthly calendar boundary). When a provider's `limit_tokens` is
|
|
7
|
-
* reached before its window resets, routing skips it — like a key that's cooling
|
|
8
|
-
* down, but for the whole provider.
|
|
9
|
-
*
|
|
10
|
-
* State is in-memory, optionally persisted so counts survive a restart within
|
|
11
|
-
* the same window. Calendar boundaries are computed in the provider's timezone.
|
|
12
|
-
*/
|
|
13
|
-
import type { Provider, Quota } from "../config.js";
|
|
14
|
-
|
|
15
|
-
const HOUR_MS = 3600_000;
|
|
16
|
-
const DAY_MS = 24 * HOUR_MS;
|
|
17
|
-
|
|
18
|
-
const WEEKDAYS = ["sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"];
|
|
19
|
-
|
|
20
|
-
/** Optional persistence hook so counts survive a restart within a window. */
|
|
21
|
-
export interface QuotaStore {
|
|
22
|
-
load(): Array<{ provider_id: string; window_start: number; consumed: number }>;
|
|
23
|
-
save(providerId: string, windowStart: number, consumed: number): void;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
interface QuotaState {
|
|
27
|
-
windowStart: number;
|
|
28
|
-
consumed: number;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface QuotaSnapshot {
|
|
32
|
-
provider: string;
|
|
33
|
-
window: Quota["window"];
|
|
34
|
-
consumed: number;
|
|
35
|
-
limit_tokens?: number;
|
|
36
|
-
/** ms until the next scheduled reset */
|
|
37
|
-
reset_in_ms: number;
|
|
38
|
-
/** 0..1 fraction of the limit used, if a limit is set */
|
|
39
|
-
pct?: number;
|
|
40
|
-
exhausted: boolean;
|
|
41
|
-
/** true when a limit is set and pct >= the quota's alert_at (default 0.8) */
|
|
42
|
-
alert: boolean;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// ---- timezone-aware calendar math -----------------------------------------
|
|
46
|
-
|
|
47
|
-
/** Wall-clock offset (ms) of `tz` at instant `date`: tzWallAsUTC - actualUTC. */
|
|
48
|
-
function tzOffsetMs(date: Date, tz: string): number {
|
|
49
|
-
const dtf = new Intl.DateTimeFormat("en-US", {
|
|
50
|
-
timeZone: tz,
|
|
51
|
-
hourCycle: "h23",
|
|
52
|
-
year: "numeric",
|
|
53
|
-
month: "2-digit",
|
|
54
|
-
day: "2-digit",
|
|
55
|
-
hour: "2-digit",
|
|
56
|
-
minute: "2-digit",
|
|
57
|
-
second: "2-digit",
|
|
58
|
-
});
|
|
59
|
-
const parts = Object.fromEntries(dtf.formatToParts(date).map((p) => [p.type, p.value]));
|
|
60
|
-
const asUTC = Date.UTC(
|
|
61
|
-
Number(parts.year),
|
|
62
|
-
Number(parts.month) - 1,
|
|
63
|
-
Number(parts.day),
|
|
64
|
-
Number(parts.hour),
|
|
65
|
-
Number(parts.minute),
|
|
66
|
-
Number(parts.second),
|
|
67
|
-
);
|
|
68
|
-
return asUTC - date.getTime();
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
/** Convert a desired wall-clock time in `tz` to an epoch ms. DST-corrected once. */
|
|
72
|
-
function zonedWallToEpoch(y: number, mo: number, d: number, h: number, mi: number, tz: string): number {
|
|
73
|
-
const guessUTC = Date.UTC(y, mo, d, h, mi);
|
|
74
|
-
const offset = tzOffsetMs(new Date(guessUTC), tz);
|
|
75
|
-
let epoch = guessUTC - offset;
|
|
76
|
-
// re-check once: the offset can differ across a DST boundary
|
|
77
|
-
const offset2 = tzOffsetMs(new Date(epoch), tz);
|
|
78
|
-
if (offset2 !== offset) epoch = guessUTC - offset2;
|
|
79
|
-
return epoch;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/** Wall-clock parts of `nowMs` in `tz`. */
|
|
83
|
-
function zonedParts(nowMs: number, tz: string) {
|
|
84
|
-
const dtf = new Intl.DateTimeFormat("en-US", {
|
|
85
|
-
timeZone: tz,
|
|
86
|
-
hourCycle: "h23",
|
|
87
|
-
weekday: "long",
|
|
88
|
-
year: "numeric",
|
|
89
|
-
month: "2-digit",
|
|
90
|
-
day: "2-digit",
|
|
91
|
-
hour: "2-digit",
|
|
92
|
-
minute: "2-digit",
|
|
93
|
-
});
|
|
94
|
-
const p = Object.fromEntries(dtf.formatToParts(nowMs).map((x) => [x.type, x.value]));
|
|
95
|
-
return {
|
|
96
|
-
year: Number(p.year),
|
|
97
|
-
month: Number(p.month) - 1,
|
|
98
|
-
day: Number(p.day),
|
|
99
|
-
hour: Number(p.hour),
|
|
100
|
-
minute: Number(p.minute),
|
|
101
|
-
weekday: String(p.weekday).toLowerCase(),
|
|
102
|
-
};
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
function parseHHMM(reset_at: string | undefined): { h: number; m: number } {
|
|
106
|
-
const m = /^(\d{1,2}):(\d{2})$/.exec(reset_at ?? "");
|
|
107
|
-
if (!m) return { h: 0, m: 0 };
|
|
108
|
-
return { h: Math.min(23, Number(m[1])), m: Math.min(59, Number(m[2])) };
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Next reset instant (epoch ms) strictly after `now` for a quota schedule.
|
|
113
|
-
* - 5h: rolling — windowStart + 5h.
|
|
114
|
-
* - daily: next `reset_at` (HH:MM, default 00:00) wall-clock in tz.
|
|
115
|
-
* - weekly: next `reset_at` weekday (default monday) at 00:00 in tz.
|
|
116
|
-
* - monthly: next 1st of month at 00:00 in tz.
|
|
117
|
-
*/
|
|
118
|
-
export type WindowSpec = Pick<Quota, "window" | "reset_at" | "timezone">;
|
|
119
|
-
|
|
120
|
-
export function nextResetAt(quota: WindowSpec, windowStart: number, now: number): number {
|
|
121
|
-
const tz = quota.timezone || "UTC";
|
|
122
|
-
if (quota.window === "5h") return windowStart + 5 * HOUR_MS;
|
|
123
|
-
|
|
124
|
-
const p = zonedParts(now, tz);
|
|
125
|
-
|
|
126
|
-
if (quota.window === "daily") {
|
|
127
|
-
const { h, m } = parseHHMM(quota.reset_at);
|
|
128
|
-
let candidate = zonedWallToEpoch(p.year, p.month, p.day, h, m, tz);
|
|
129
|
-
if (candidate <= now) candidate = zonedWallToEpoch(p.year, p.month, p.day + 1, h, m, tz);
|
|
130
|
-
return candidate;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
if (quota.window === "weekly") {
|
|
134
|
-
const target = WEEKDAYS.indexOf((quota.reset_at ?? "monday").toLowerCase());
|
|
135
|
-
const targetIdx = target === -1 ? 1 : target;
|
|
136
|
-
const curIdx = WEEKDAYS.indexOf(p.weekday);
|
|
137
|
-
let daysAhead = (targetIdx - curIdx + 7) % 7;
|
|
138
|
-
let candidate = zonedWallToEpoch(p.year, p.month, p.day + daysAhead, 0, 0, tz);
|
|
139
|
-
if (candidate <= now) candidate = zonedWallToEpoch(p.year, p.month, p.day + daysAhead + 7, 0, 0, tz);
|
|
140
|
-
return candidate;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// monthly: first of next month at 00:00
|
|
144
|
-
return zonedWallToEpoch(p.year, p.month + 1, 1, 0, 0, tz);
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
/**
|
|
148
|
-
* Epoch ms of the START of the window containing `now`.
|
|
149
|
-
* - 5h: fixed 5-hour grid floor (stateless; no per-provider anchor).
|
|
150
|
-
* - daily: today's reset_at in tz, or yesterday's if that's still ahead.
|
|
151
|
-
* - weekly: the most recent occurrence of the target weekday at 00:00 in tz.
|
|
152
|
-
* - monthly: the 1st of the current month at 00:00 in tz.
|
|
153
|
-
*/
|
|
154
|
-
export function currentWindowStart(spec: WindowSpec, now: number): number {
|
|
155
|
-
const tz = spec.timezone || "UTC";
|
|
156
|
-
if (spec.window === "5h") return Math.floor(now / (5 * HOUR_MS)) * (5 * HOUR_MS);
|
|
157
|
-
|
|
158
|
-
const p = zonedParts(now, tz);
|
|
159
|
-
|
|
160
|
-
if (spec.window === "daily") {
|
|
161
|
-
const { h, m } = parseHHMM(spec.reset_at);
|
|
162
|
-
let start = zonedWallToEpoch(p.year, p.month, p.day, h, m, tz);
|
|
163
|
-
if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - 1, h, m, tz);
|
|
164
|
-
return start;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
if (spec.window === "weekly") {
|
|
168
|
-
const target = WEEKDAYS.indexOf((spec.reset_at ?? "monday").toLowerCase());
|
|
169
|
-
const targetIdx = target === -1 ? 1 : target;
|
|
170
|
-
const curIdx = WEEKDAYS.indexOf(p.weekday);
|
|
171
|
-
const daysBehind = (curIdx - targetIdx + 7) % 7;
|
|
172
|
-
let start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind, 0, 0, tz);
|
|
173
|
-
if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind - 7, 0, 0, tz);
|
|
174
|
-
return start;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// monthly
|
|
178
|
-
return zonedWallToEpoch(p.year, p.month, 1, 0, 0, tz);
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
export class QuotaTracker {
|
|
182
|
-
private readonly states = new Map<string, QuotaState>();
|
|
183
|
-
|
|
184
|
-
constructor(
|
|
185
|
-
private readonly now: () => number = Date.now,
|
|
186
|
-
private readonly store?: QuotaStore,
|
|
187
|
-
) {
|
|
188
|
-
if (store) {
|
|
189
|
-
for (const row of store.load()) {
|
|
190
|
-
this.states.set(row.provider_id, { windowStart: row.window_start, consumed: row.consumed });
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* Return the live state for a provider, rolling the window over (resetting
|
|
197
|
-
* consumed to 0) if `now` has crossed the scheduled reset boundary.
|
|
198
|
-
*/
|
|
199
|
-
private current(provider: Provider): QuotaState | null {
|
|
200
|
-
if (!provider.quota) return null;
|
|
201
|
-
const t = this.now();
|
|
202
|
-
const state = this.states.get(provider.id) ?? { windowStart: t, consumed: 0 };
|
|
203
|
-
if (!this.states.has(provider.id)) this.states.set(provider.id, state);
|
|
204
|
-
// boundary is the first reset AFTER this window opened — computed from
|
|
205
|
-
// windowStart, not `now`. Computing it from `now` would always return the
|
|
206
|
-
// NEXT future boundary and so never detect that we've crossed one.
|
|
207
|
-
const reset = nextResetAt(provider.quota, state.windowStart, state.windowStart);
|
|
208
|
-
if (t >= reset) {
|
|
209
|
-
state.windowStart = t;
|
|
210
|
-
state.consumed = 0;
|
|
211
|
-
this.store?.save(provider.id, state.windowStart, state.consumed);
|
|
212
|
-
}
|
|
213
|
-
return state;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
/** Add consumed tokens for a provider (no-op if it has no quota config). */
|
|
217
|
-
consume(provider: Provider, tokens: number): void {
|
|
218
|
-
const state = this.current(provider);
|
|
219
|
-
if (!state) return;
|
|
220
|
-
state.consumed += Math.max(0, tokens);
|
|
221
|
-
this.store?.save(provider.id, state.windowStart, state.consumed);
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
/** True when a token limit is set AND it's been reached in the current window. */
|
|
225
|
-
isExhausted(provider: Provider): boolean {
|
|
226
|
-
const state = this.current(provider);
|
|
227
|
-
if (!state || !provider.quota?.limit_tokens) return false;
|
|
228
|
-
return state.consumed >= provider.quota.limit_tokens;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
/** Dashboard view: window, consumed, countdown, and progress for each provider. */
|
|
232
|
-
snapshot(providers: Provider[]): QuotaSnapshot[] {
|
|
233
|
-
const t = this.now();
|
|
234
|
-
return providers.flatMap((provider) => {
|
|
235
|
-
if (!provider.quota) return [];
|
|
236
|
-
const state = this.current(provider)!;
|
|
237
|
-
const reset = nextResetAt(provider.quota, state.windowStart, t);
|
|
238
|
-
const limit = provider.quota.limit_tokens;
|
|
239
|
-
return [
|
|
240
|
-
{
|
|
241
|
-
provider: provider.id,
|
|
242
|
-
window: provider.quota.window,
|
|
243
|
-
consumed: state.consumed,
|
|
244
|
-
limit_tokens: limit,
|
|
245
|
-
reset_in_ms: Math.max(0, reset - t),
|
|
246
|
-
pct: limit ? Math.min(1, state.consumed / limit) : undefined,
|
|
247
|
-
exhausted: limit ? state.consumed >= limit : false,
|
|
248
|
-
alert: limit ? state.consumed / limit >= (provider.quota.alert_at ?? 0.8) : false,
|
|
249
|
-
},
|
|
250
|
-
];
|
|
251
|
-
});
|
|
252
|
-
}
|
|
253
|
-
}
|