aigetwey 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +58 -3
  2. package/README.md +4 -4
  3. package/config.example.yaml +6 -5
  4. package/dashboard/next.config.ts +6 -0
  5. package/dashboard/src/app/globals.css +47 -0
  6. package/dashboard/src/components/BudgetForm.tsx +258 -0
  7. package/dashboard/src/components/EndpointView.tsx +30 -0
  8. package/dashboard/src/components/LogTable.tsx +90 -25
  9. package/dashboard/src/components/ModelPicker.tsx +15 -7
  10. package/dashboard/src/components/ProviderDetail.tsx +27 -29
  11. package/dashboard/src/components/ProviderManager.tsx +36 -3
  12. package/dashboard/src/components/QuotaView.tsx +106 -18
  13. package/dashboard/src/components/Rail.tsx +1 -1
  14. package/dashboard/src/components/RoutingView.tsx +2 -2
  15. package/dashboard/src/components/ToolDetail.tsx +5 -3
  16. package/dashboard/src/components/TopBar.tsx +1 -1
  17. package/dashboard/src/components/UsageView.tsx +25 -6
  18. package/dashboard/src/lib/cliTools.ts +0 -43
  19. package/dashboard/src/lib/client.ts +17 -1
  20. package/dashboard/src/lib/gateway.ts +25 -1
  21. package/dashboard/src/{middleware.ts → proxy.ts} +8 -6
  22. package/dist/cli.js +43 -8
  23. package/dist/cli.js.map +1 -1
  24. package/dist/config.js +75 -0
  25. package/dist/config.js.map +1 -1
  26. package/dist/core/budget.js +97 -0
  27. package/dist/core/budget.js.map +1 -0
  28. package/dist/core/handler.js +21 -1
  29. package/dist/core/handler.js.map +1 -1
  30. package/dist/core/quota.js +33 -7
  31. package/dist/core/quota.js.map +1 -1
  32. package/dist/core/state.js +17 -2
  33. package/dist/core/state.js.map +1 -1
  34. package/dist/db.js +39 -5
  35. package/dist/db.js.map +1 -1
  36. package/dist/middleware/auth.js +15 -8
  37. package/dist/middleware/auth.js.map +1 -1
  38. package/dist/routes/admin.js +34 -4
  39. package/dist/routes/admin.js.map +1 -1
  40. package/dist/routes/v1.js +15 -10
  41. package/dist/routes/v1.js.map +1 -1
  42. package/dist/server.js +5 -1
  43. package/dist/server.js.map +1 -1
  44. package/dist/upstream/client.js +9 -0
  45. package/dist/upstream/client.js.map +1 -1
  46. package/package.json +3 -4
  47. package/src/cli.ts +44 -8
  48. package/src/config.ts +81 -0
  49. package/src/core/budget.ts +128 -0
  50. package/src/core/handler.ts +26 -1
  51. package/src/core/quota.ts +40 -1
  52. package/src/core/state.ts +24 -0
  53. package/src/db.ts +50 -5
  54. package/src/middleware/auth.ts +18 -8
  55. package/src/routes/admin.ts +45 -7
  56. package/src/routes/v1.ts +15 -10
  57. package/src/server.ts +5 -1
  58. package/src/upstream/client.ts +9 -0
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Scoped spend budgets, derived from the usage table (the single source of
3
+ * truth) rather than a parallel counter. Each budget targets the whole gateway,
4
+ * one provider, or one upstream model. statuses() computes every budget's spend
5
+ * over its window; the result list is cached a few seconds so the per-request
6
+ * hard-stop check stays cheap. blocks() answers "is a route to this
7
+ * provider/model barred by an exhausted budget?".
8
+ */
9
+ import type { Budget, BudgetScope } from "../config.js";
10
+ import { budgetKey } from "../config.js";
11
+ import { currentWindowStart, nextResetAt } from "./quota.js";
12
+
13
+ export interface BudgetStatus {
14
+ scope: BudgetScope;
15
+ key: string;
16
+ label: string;
17
+ note?: string;
18
+ unit: "usd" | "tokens";
19
+ limit: number;
20
+ spent: number;
21
+ pct: number;
22
+ alert: boolean;
23
+ alert_at: number;
24
+ exhausted: boolean;
25
+ est_converse: number | null;
26
+ reset_in_ms: number;
27
+ window: Budget["window"];
28
+ }
29
+
30
+ interface TotalsReader {
31
+ totals(sinceMs: number, filter?: { provider?: string; model?: string; client_key?: string }): {
32
+ tokens_in: number;
33
+ tokens_out: number;
34
+ cost: number;
35
+ };
36
+ }
37
+
38
+ function scopeLabel(scope: BudgetScope, keyName: (fp: string) => string): string {
39
+ if (scope.type === "global") return "Global";
40
+ if (scope.type === "key") return keyName(scope.id);
41
+ return scope.id;
42
+ }
43
+
44
+ function scopeFilter(scope: BudgetScope): { provider?: string; model?: string; client_key?: string } | undefined {
45
+ if (scope.type === "provider") return { provider: scope.id };
46
+ if (scope.type === "model") return { model: scope.id };
47
+ if (scope.type === "key") return { client_key: scope.id };
48
+ return undefined;
49
+ }
50
+
51
+ export class BudgetTracker {
52
+ private cached?: { at: number; list: BudgetStatus[] };
53
+
54
+ constructor(
55
+ private readonly getBudgets: () => Budget[],
56
+ private readonly db: TotalsReader,
57
+ private readonly now: () => number = Date.now,
58
+ private readonly cacheMs = 5000,
59
+ private readonly keyName: (fp: string) => string = (fp) => `key …${fp}`,
60
+ ) {}
61
+
62
+ clearCache(): void {
63
+ this.cached = undefined;
64
+ }
65
+
66
+ statuses(): BudgetStatus[] {
67
+ const t = this.now();
68
+ if (this.cached && t - this.cached.at < this.cacheMs) return this.cached.list;
69
+ const list = this.getBudgets().map((b) => this.compute(b, t));
70
+ this.cached = { at: t, list };
71
+ return list;
72
+ }
73
+
74
+ globalStatus(): BudgetStatus | null {
75
+ return this.statuses().find((s) => s.scope.type === "global") ?? null;
76
+ }
77
+
78
+ /** First exhausted provider/model budget matching a route, or null. */
79
+ blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null {
80
+ for (const s of this.statuses()) {
81
+ if (!s.exhausted) continue;
82
+ if (s.scope.type === "provider" && s.scope.id === providerId)
83
+ return { exhausted: true, reset_in_ms: s.reset_in_ms };
84
+ if (s.scope.type === "model" && s.scope.id === model)
85
+ return { exhausted: true, reset_in_ms: s.reset_in_ms };
86
+ }
87
+ return null;
88
+ }
89
+
90
+ /** The exhausted key-scoped budget for this fingerprint, or null. */
91
+ blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null {
92
+ for (const s of this.statuses()) {
93
+ if (s.exhausted && s.scope.type === "key" && s.scope.id === fp) {
94
+ return { exhausted: true, reset_in_ms: s.reset_in_ms };
95
+ }
96
+ }
97
+ return null;
98
+ }
99
+
100
+ private compute(spec: Budget, t: number): BudgetStatus {
101
+ const windowStart = currentWindowStart(spec, t);
102
+ const total = this.db.totals(windowStart, scopeFilter(spec.scope));
103
+ const tokens = total.tokens_in + total.tokens_out;
104
+ const cost = total.cost;
105
+ const rate = tokens > 0 ? cost / tokens : undefined;
106
+ const spent = spec.unit === "usd" ? cost : tokens;
107
+ const limit = spec.limit;
108
+ const pct = limit > 0 ? Math.min(1, spent / limit) : 0;
109
+ const alertAt = spec.alert_at ?? 0.8;
110
+ const est_converse = rate === undefined ? null : spec.unit === "usd" ? limit / rate : limit * rate;
111
+ return {
112
+ scope: spec.scope,
113
+ key: budgetKey(spec.scope),
114
+ label: scopeLabel(spec.scope, this.keyName),
115
+ note: spec.note,
116
+ unit: spec.unit,
117
+ limit,
118
+ spent,
119
+ pct,
120
+ alert: pct >= alertAt,
121
+ alert_at: alertAt,
122
+ exhausted: spent >= limit,
123
+ est_converse,
124
+ reset_in_ms: Math.max(0, nextResetAt(spec, windowStart, t) - t),
125
+ window: spec.window,
126
+ };
127
+ }
128
+ }
@@ -51,6 +51,12 @@ export interface HandleDeps {
51
51
  pool: KeyPool;
52
52
  db?: UsageDB;
53
53
  quota?: QuotaTracker;
54
+ budget?: {
55
+ globalStatus(): { exhausted: boolean; reset_in_ms: number } | null;
56
+ blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null;
57
+ blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null;
58
+ };
59
+ clientKeyFp?: string;
54
60
  log?: (msg: string) => void;
55
61
  now?: () => number;
56
62
  }
@@ -84,6 +90,7 @@ function recordUsage(
84
90
  status,
85
91
  latency_ms: latencyMs,
86
92
  stream: stream ? 1 : 0,
93
+ client_key: deps.clientKeyFp ?? "",
87
94
  });
88
95
  }
89
96
 
@@ -114,11 +121,29 @@ export async function handle(
114
121
  const thinkingIntent: ThinkingConfig | null =
115
122
  override ?? captureThinking(canonical as Record<string, unknown>);
116
123
 
117
- const routes = config.resolve(canonical.model);
124
+ let routes = config.resolve(canonical.model);
118
125
  if (routes.length === 0) {
119
126
  throw new GatewayError(404, { error: `unknown model "${canonical.model}"` });
120
127
  }
121
128
 
129
+ // Budget hard-stop. Global overrun fails fast. Provider/model budgets bar the
130
+ // matching routes (like the token-quota skip); if every candidate is barred,
131
+ // there's nothing to serve → 402.
132
+ if (deps.budget) {
133
+ const g = deps.budget.globalStatus();
134
+ if (g?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: g.reset_in_ms });
135
+ if (deps.clientKeyFp) {
136
+ const kb = deps.budget.blocksKey(deps.clientKeyFp);
137
+ if (kb?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: kb.reset_in_ms });
138
+ }
139
+ const eligible = routes.filter((r) => !deps.budget!.blocks(r.provider.id, r.model));
140
+ if (eligible.length === 0) {
141
+ const b = deps.budget.blocks(routes[0]!.provider.id, routes[0]!.model);
142
+ throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: b?.reset_in_ms ?? 0 });
143
+ }
144
+ routes = eligible;
145
+ }
146
+
122
147
  // Pipeline order matters: RTK compresses tool_result in the INPUT first, then
123
148
  // inject prepends the output-style system prompt. They touch different parts
124
149
  // of the request and stack cleanly. Both run before routing so every fallback
package/src/core/quota.ts CHANGED
@@ -38,6 +38,8 @@ export interface QuotaSnapshot {
38
38
  /** 0..1 fraction of the limit used, if a limit is set */
39
39
  pct?: number;
40
40
  exhausted: boolean;
41
+ /** true when a limit is set and pct >= the quota's alert_at (default 0.8) */
42
+ alert: boolean;
41
43
  }
42
44
 
43
45
  // ---- timezone-aware calendar math -----------------------------------------
@@ -113,7 +115,9 @@ function parseHHMM(reset_at: string | undefined): { h: number; m: number } {
113
115
  * - weekly: next `reset_at` weekday (default monday) at 00:00 in tz.
114
116
  * - monthly: next 1st of month at 00:00 in tz.
115
117
  */
116
- export function nextResetAt(quota: Quota, windowStart: number, now: number): number {
118
+ export type WindowSpec = Pick<Quota, "window" | "reset_at" | "timezone">;
119
+
120
+ export function nextResetAt(quota: WindowSpec, windowStart: number, now: number): number {
117
121
  const tz = quota.timezone || "UTC";
118
122
  if (quota.window === "5h") return windowStart + 5 * HOUR_MS;
119
123
 
@@ -140,6 +144,40 @@ export function nextResetAt(quota: Quota, windowStart: number, now: number): num
140
144
  return zonedWallToEpoch(p.year, p.month + 1, 1, 0, 0, tz);
141
145
  }
142
146
 
147
+ /**
148
+ * Epoch ms of the START of the window containing `now`.
149
+ * - 5h: fixed 5-hour grid floor (stateless; no per-provider anchor).
150
+ * - daily: today's reset_at in tz, or yesterday's if that's still ahead.
151
+ * - weekly: the most recent occurrence of the target weekday at 00:00 in tz.
152
+ * - monthly: the 1st of the current month at 00:00 in tz.
153
+ */
154
+ export function currentWindowStart(spec: WindowSpec, now: number): number {
155
+ const tz = spec.timezone || "UTC";
156
+ if (spec.window === "5h") return Math.floor(now / (5 * HOUR_MS)) * (5 * HOUR_MS);
157
+
158
+ const p = zonedParts(now, tz);
159
+
160
+ if (spec.window === "daily") {
161
+ const { h, m } = parseHHMM(spec.reset_at);
162
+ let start = zonedWallToEpoch(p.year, p.month, p.day, h, m, tz);
163
+ if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - 1, h, m, tz);
164
+ return start;
165
+ }
166
+
167
+ if (spec.window === "weekly") {
168
+ const target = WEEKDAYS.indexOf((spec.reset_at ?? "monday").toLowerCase());
169
+ const targetIdx = target === -1 ? 1 : target;
170
+ const curIdx = WEEKDAYS.indexOf(p.weekday);
171
+ const daysBehind = (curIdx - targetIdx + 7) % 7;
172
+ let start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind, 0, 0, tz);
173
+ if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind - 7, 0, 0, tz);
174
+ return start;
175
+ }
176
+
177
+ // monthly
178
+ return zonedWallToEpoch(p.year, p.month, 1, 0, 0, tz);
179
+ }
180
+
143
181
  export class QuotaTracker {
144
182
  private readonly states = new Map<string, QuotaState>();
145
183
 
@@ -207,6 +245,7 @@ export class QuotaTracker {
207
245
  reset_in_ms: Math.max(0, reset - t),
208
246
  pct: limit ? Math.min(1, state.consumed / limit) : undefined,
209
247
  exhausted: limit ? state.consumed >= limit : false,
248
+ alert: limit ? state.consumed / limit >= (provider.quota.alert_at ?? 0.8) : false,
210
249
  },
211
250
  ];
212
251
  });
package/src/core/state.ts CHANGED
@@ -17,23 +17,42 @@ import {
17
17
  validateConfig,
18
18
  unmaskSecrets,
19
19
  writeConfigFile,
20
+ maskKey,
20
21
  } from "../config.js";
22
+ import { clientKeyFingerprint } from "../middleware/auth.js";
21
23
  import { KeyPool } from "./keypool.js";
22
24
  import { QuotaTracker } from "./quota.js";
25
+ import { BudgetTracker } from "./budget.js";
26
+
27
+ function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string, string> }, fp: string): string {
28
+ for (const k of server.api_keys) {
29
+ if (clientKeyFingerprint(k) === fp) return server.key_names?.[k] ?? maskKey(k);
30
+ }
31
+ return `key …${fp}`;
32
+ }
23
33
 
24
34
  export class GatewayState {
25
35
  private _config: GatewayConfig;
26
36
  private _pool: KeyPool;
27
37
  private readonly _quota: QuotaTracker;
38
+ private readonly _budget: BudgetTracker;
28
39
 
29
40
  constructor(
30
41
  private readonly configPath: string,
31
42
  initial: GatewayConfig,
32
43
  quota?: QuotaTracker,
44
+ budgetDb?: { totals(since: number, filter?: { provider?: string; model?: string; client_key?: string }): { tokens_in: number; tokens_out: number; cost: number } },
33
45
  ) {
34
46
  this._config = initial;
35
47
  this._pool = new KeyPool();
36
48
  this._quota = quota ?? new QuotaTracker();
49
+ this._budget = new BudgetTracker(
50
+ () => this._config.raw.budgets,
51
+ budgetDb ?? { totals: () => ({ tokens_in: 0, tokens_out: 0, cost: 0 }) },
52
+ undefined,
53
+ undefined,
54
+ (fp) => serverKeyLabel(this._config.raw.server, fp),
55
+ );
37
56
  }
38
57
 
39
58
  get config(): GatewayConfig {
@@ -48,6 +67,10 @@ export class GatewayState {
48
67
  return this._quota;
49
68
  }
50
69
 
70
+ get budget(): BudgetTracker {
71
+ return this._budget;
72
+ }
73
+
51
74
  /**
52
75
  * Validate edited config text, restore masked secrets from the live config,
53
76
  * persist atomically, then swap in a fresh config + pool. Throws without
@@ -61,5 +84,6 @@ export class GatewayState {
61
84
  writeConfigFile(this.configPath, next.raw);
62
85
  this._config = next;
63
86
  this._pool = new KeyPool();
87
+ this._budget.clearCache();
64
88
  }
65
89
  }
package/src/db.ts CHANGED
@@ -27,6 +27,7 @@ export interface UsageRow {
27
27
  status: number;
28
28
  latency_ms: number;
29
29
  stream: number; // 0/1
30
+ client_key: string;
30
31
  }
31
32
 
32
33
  export interface LogRow {
@@ -38,6 +39,12 @@ export interface LogRow {
38
39
  response_summary: string;
39
40
  }
40
41
 
42
+ export interface UsageTotals {
43
+ tokens_in: number;
44
+ tokens_out: number;
45
+ cost: number;
46
+ }
47
+
41
48
  export interface UsageSummary {
42
49
  total: { requests: number; tokens_in: number; tokens_out: number; cost: number };
43
50
  by_provider: Array<{ provider: string; requests: number; tokens_in: number; tokens_out: number; cost: number }>;
@@ -79,7 +86,8 @@ export class UsageDB {
79
86
  cost REAL NOT NULL DEFAULT 0,
80
87
  status INTEGER NOT NULL,
81
88
  latency_ms INTEGER NOT NULL DEFAULT 0,
82
- stream INTEGER NOT NULL DEFAULT 0
89
+ stream INTEGER NOT NULL DEFAULT 0,
90
+ client_key TEXT NOT NULL DEFAULT ''
83
91
  );
84
92
  CREATE INDEX IF NOT EXISTS idx_usage_ts ON usage(ts);
85
93
  CREATE TABLE IF NOT EXISTS logs (
@@ -99,10 +107,15 @@ export class UsageDB {
99
107
  last_reset INTEGER NOT NULL DEFAULT 0
100
108
  );
101
109
  `);
110
+ // migrate older DBs created before client_key existed.
111
+ const cols = this.db.prepare(`PRAGMA table_info(usage)`).all() as SqlRow[];
112
+ if (!cols.some((c) => String(c.name) === "client_key")) {
113
+ this.db.exec(`ALTER TABLE usage ADD COLUMN client_key TEXT NOT NULL DEFAULT ''`);
114
+ }
102
115
  this.now = now;
103
116
  this.insertUsage = this.db.prepare(`
104
- INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, cached_tokens, cost, status, latency_ms, stream)
105
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
117
+ INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, cached_tokens, cost, status, latency_ms, stream, client_key)
118
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
106
119
  `);
107
120
  this.insertLog = this.db.prepare(`
108
121
  INSERT INTO logs (ts, direction, provider, status, request_summary, response_summary)
@@ -117,7 +130,7 @@ export class UsageDB {
117
130
  `);
118
131
  }
119
132
 
120
- record(row: Omit<UsageRow, "ts"> & { ts?: number }): void {
133
+ record(row: Omit<UsageRow, "ts" | "client_key"> & { ts?: number; client_key?: string }): void {
121
134
  this.insertUsage.run(
122
135
  row.ts ?? this.now(),
123
136
  row.alias,
@@ -130,6 +143,7 @@ export class UsageDB {
130
143
  row.status,
131
144
  row.latency_ms,
132
145
  row.stream,
146
+ row.client_key ?? "",
133
147
  );
134
148
  }
135
149
 
@@ -195,6 +209,36 @@ export class UsageDB {
195
209
  };
196
210
  }
197
211
 
212
+ /**
213
+ * Summed token + cost totals over rows with ts >= sinceMs, optionally filtered
214
+ * to one provider and/or one model. Backs the scoped budget tracker — the usage
215
+ * table stays the single source of truth (no parallel counter).
216
+ */
217
+ totals(sinceMs: number, filter?: { provider?: string; model?: string; client_key?: string }): UsageTotals {
218
+ const clauses = ["ts >= ?"];
219
+ const params: Array<number | string> = [sinceMs];
220
+ if (filter?.provider) {
221
+ clauses.push("provider = ?");
222
+ params.push(filter.provider);
223
+ }
224
+ if (filter?.model) {
225
+ clauses.push("model = ?");
226
+ params.push(filter.model);
227
+ }
228
+ if (filter?.client_key) {
229
+ clauses.push("client_key = ?");
230
+ params.push(filter.client_key);
231
+ }
232
+ const row = this.db
233
+ .prepare(
234
+ `SELECT COALESCE(SUM(tokens_in),0) tokens_in, COALESCE(SUM(tokens_out),0) tokens_out,
235
+ COALESCE(SUM(cost),0) cost
236
+ FROM usage WHERE ${clauses.join(" AND ")}`,
237
+ )
238
+ .get(...params) as SqlRow;
239
+ return { tokens_in: num(row.tokens_in), tokens_out: num(row.tokens_out), cost: num(row.cost) };
240
+ }
241
+
198
242
  /**
199
243
  * Bucketed time-series for charts: one point per `bucketMs` interval from
200
244
  * `sinceMs` to now, aligned to the bucket boundary, with zero-filled gaps.
@@ -233,7 +277,7 @@ export class UsageDB {
233
277
  const rows = this.db
234
278
  .prepare(
235
279
  `SELECT ts, alias, provider, model, tokens_in, tokens_out, cached_tokens,
236
- cost, status, latency_ms, stream
280
+ cost, status, latency_ms, stream, client_key
237
281
  FROM usage ORDER BY id DESC LIMIT ?`,
238
282
  )
239
283
  .all(Math.max(1, Math.min(limit, 1000))) as SqlRow[];
@@ -249,6 +293,7 @@ export class UsageDB {
249
293
  status: num(r.status),
250
294
  latency_ms: num(r.latency_ms),
251
295
  stream: num(r.stream),
296
+ client_key: String(r.client_key ?? ""),
252
297
  }));
253
298
  }
254
299
 
@@ -15,15 +15,23 @@ function digest(s: string): Buffer {
15
15
  return createHash("sha256").update(s).digest();
16
16
  }
17
17
 
18
- /** Constant-time membership test over fixed-length digests. */
19
- export function isValidKey(presented: string, validKeys: string[]): boolean {
18
+ /** Non-secret stable id for a client key: sha256 truncated to 8 hex chars. */
19
+ export function clientKeyFingerprint(key: string): string {
20
+ return createHash("sha256").update(key).digest("hex").slice(0, 8);
21
+ }
22
+
23
+ /** Constant-time: returns the matching key (digest every candidate) or null. */
24
+ export function matchKey(presented: string, validKeys: string[]): string | null {
20
25
  const p = digest(presented);
21
- // compare against every key so timing can't reveal which one matched.
22
- let ok = false;
26
+ let found: string | null = null;
23
27
  for (const k of validKeys) {
24
- if (timingSafeEqual(p, digest(k))) ok = true;
28
+ if (timingSafeEqual(p, digest(k))) found = k;
25
29
  }
26
- return ok;
30
+ return found;
31
+ }
32
+
33
+ export function isValidKey(presented: string, validKeys: string[]): boolean {
34
+ return matchKey(presented, validKeys) !== null;
27
35
  }
28
36
 
29
37
  export function extractKey(req: FastifyRequest): string | null {
@@ -40,14 +48,16 @@ export interface AuthResult {
40
48
  ok: boolean;
41
49
  status?: number;
42
50
  error?: string;
51
+ keyFp?: string;
43
52
  }
44
53
 
45
54
  export function checkAuth(req: FastifyRequest, validKeys: string[]): AuthResult {
46
55
  if (validKeys.length === 0) return { ok: true }; // auth disabled
47
56
  const key = extractKey(req);
48
57
  if (!key) return { ok: false, status: 401, error: "missing API key" };
49
- if (!isValidKey(key, validKeys)) return { ok: false, status: 401, error: "invalid API key" };
50
- return { ok: true };
58
+ const matched = matchKey(key, validKeys);
59
+ if (!matched) return { ok: false, status: 401, error: "invalid API key" };
60
+ return { ok: true, keyFp: clientKeyFingerprint(matched) };
51
61
  }
52
62
 
53
63
  /** Verifies a presented admin password (against the persisted hash store). */
@@ -15,7 +15,7 @@ import { resolve } from "node:path";
15
15
  import type { FastifyInstance, FastifyRequest, FastifyReply } from "fastify";
16
16
  import type { GatewayState } from "../core/state.js";
17
17
  import type { UsageDB } from "../db.js";
18
- import { checkAdminAuth, type AdminVerifier } from "../middleware/auth.js";
18
+ import { checkAdminAuth, clientKeyFingerprint, type AdminVerifier } from "../middleware/auth.js";
19
19
  import {
20
20
  maskKey,
21
21
  serializeConfig,
@@ -44,9 +44,12 @@ import {
44
44
  addServerKey,
45
45
  editServerKey,
46
46
  removeServerKey,
47
+ setBudget,
48
+ clearBudget,
47
49
  type Config,
48
50
  type Provider,
49
51
  type EndpointSettings,
52
+ type Budget,
50
53
  } from "../config.js";
51
54
  import { pingProvider } from "../upstream/client.js";
52
55
  import { handle, GatewayError } from "../core/handler.js";
@@ -142,7 +145,23 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
142
145
 
143
146
  // per-provider quota: consumed, limit, and ms until the next scheduled reset.
144
147
  app.get("/admin/quota", requireAdmin, (_req, reply) => {
145
- reply.send({ quota: deps.state.quota.snapshot(deps.state.config.listProviders()) });
148
+ reply.send({
149
+ quota: deps.state.quota.snapshot(deps.state.config.listProviders()),
150
+ budgets: deps.state.budget.statuses(),
151
+ });
152
+ });
153
+
154
+ // add or replace a budget (keyed by scope). Body = Budget; invalid shape or an
155
+ // unknown provider scope -> 400 via zod / setBudget through state.reload().
156
+ app.put("/admin/budgets", requireAdmin, (req, reply) => {
157
+ const b = (req.body ?? {}) as Budget;
158
+ applyMutation(reply, (c) => setBudget(c, b));
159
+ });
160
+
161
+ // remove a budget by scope key: global | provider:<id> | model:<id>.
162
+ app.delete("/admin/budgets/:key", requireAdmin, (req, reply) => {
163
+ const key = decodeURIComponent((req.params as { key: string }).key);
164
+ applyMutation(reply, (c) => clearBudget(c, key));
146
165
  });
147
166
 
148
167
  // current config, secrets masked
@@ -426,11 +445,17 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
426
445
 
427
446
  // every callable model: provider/model catalog entries + routing aliases.
428
447
  app.get("/admin/models", requireAdmin, (_req, reply) => {
429
- const providers = deps.state.config.listProviders().map((p) => ({
430
- id: p.id,
431
- format: p.format,
432
- models: p.models.map((m) => ({ id: m.id, ref: `${p.id}/${m.id}`, price_in: m.price_in, price_out: m.price_out })),
433
- }));
448
+ // disabled providers are skipped in routing, so their models must not be
449
+ // selectable anywhere this catalog feeds (combos, CLI-tool setup, budget
450
+ // scopes) — drop them here at the single source.
451
+ const providers = deps.state.config
452
+ .listProviders()
453
+ .filter((p) => !p.disabled)
454
+ .map((p) => ({
455
+ id: p.id,
456
+ format: p.format,
457
+ models: p.models.map((m) => ({ id: m.id, ref: `${p.id}/${m.id}`, price_in: m.price_in, price_out: m.price_out })),
458
+ }));
434
459
  const routes = deps.state.config.listRoutes();
435
460
  reply.send({ providers, routes });
436
461
  });
@@ -546,6 +571,19 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
546
571
  applyMutation(reply, (c) => removeServerKey(c, i));
547
572
  });
548
573
 
574
+ // server keys with a non-secret fingerprint + display name, for the budget
575
+ // key-scope picker. Never returns the raw key.
576
+ app.get("/admin/keys", requireAdmin, (_req, reply) => {
577
+ const s = deps.state.config.raw.server;
578
+ reply.send(
579
+ s.api_keys.map((k) => ({
580
+ fingerprint: clientKeyFingerprint(k),
581
+ name: s.key_names?.[k] ?? maskKey(k),
582
+ masked: maskKey(k),
583
+ })),
584
+ );
585
+ });
586
+
549
587
  // reveal ONE raw gateway key (the "show key" button on the Endpoint page).
550
588
  app.get("/admin/endpoint/keys/:index/reveal", requireAdmin, (req, reply) => {
551
589
  const { index } = req.params as { index: string };
package/src/routes/v1.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import type { FastifyInstance, FastifyReply, FastifyRequest } from "fastify";
2
- import { checkAuth } from "../middleware/auth.js";
2
+ import { checkAuth, extractKey, clientKeyFingerprint } from "../middleware/auth.js";
3
3
  import type { GatewayState } from "../core/state.js";
4
4
  import { handle, GatewayError, type HandleDeps } from "../core/handler.js";
5
5
  import type { WireFormat } from "../core/canonical.js";
@@ -23,16 +23,21 @@ export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?:
23
23
  };
24
24
 
25
25
  // build deps from the live holder per request (never close over config/pool).
26
- const depsNow = (): HandleDeps => ({
27
- config: state.config,
28
- pool: state.pool,
29
- quota: state.quota,
30
- db,
31
- log: (msg) => app.log.info(msg),
32
- });
26
+ const depsNow = (req: FastifyRequest): HandleDeps => {
27
+ const presented = extractKey(req);
28
+ return {
29
+ config: state.config,
30
+ pool: state.pool,
31
+ quota: state.quota,
32
+ budget: state.budget,
33
+ db,
34
+ clientKeyFp: presented ? clientKeyFingerprint(presented) : undefined,
35
+ log: (msg) => app.log.info(msg),
36
+ };
37
+ };
33
38
 
34
- app.post("/v1/chat/completions", requireAuth, (req, reply) => dispatch(depsNow(), "openai", req, reply));
35
- app.post("/v1/messages", requireAuth, (req, reply) => dispatch(depsNow(), "anthropic", req, reply));
39
+ app.post("/v1/chat/completions", requireAuth, (req, reply) => dispatch(depsNow(req), "openai", req, reply));
40
+ app.post("/v1/messages", requireAuth, (req, reply) => dispatch(depsNow(req), "anthropic", req, reply));
36
41
  }
37
42
 
38
43
  const SSE_HEADERS = {
package/src/server.ts CHANGED
@@ -59,7 +59,7 @@ async function main(): Promise<void> {
59
59
  });
60
60
 
61
61
  // holder enables runtime config edits (hot-reload) from the dashboard.
62
- const state = new GatewayState(configPath, config, quota);
62
+ const state = new GatewayState(configPath, config, quota, db);
63
63
  // admin password lives in a hash store (seeded from the env on first run,
64
64
  // changeable at runtime from the dashboard).
65
65
  const auth = AuthStore.open(dataDir, process.env.AIGETWEY_ADMIN_PASSWORD);
@@ -78,6 +78,10 @@ async function main(): Promise<void> {
78
78
  prefix: "/",
79
79
  // forward the whole HTTP surface the dashboard needs (pages + its API).
80
80
  httpMethods: ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"],
81
+ // forward WebSocket upgrades too, so `next dev`'s HMR socket works when the
82
+ // dashboard is proxied — this is what lets dev run single-URL on the gateway
83
+ // port like production. Harmless for the prebuilt prod dashboard (no socket).
84
+ websocket: true,
81
85
  // keep the ORIGINAL Host so Next builds redirects (e.g. → /login) against
82
86
  // the gateway's address, not the internal dashboard port.
83
87
  replyOptions: {
@@ -68,6 +68,15 @@ function buildBody(
68
68
  const adapter = adapterFor(provider.format);
69
69
  const upstreamReq: CanonicalRequest = { ...req, model, stream };
70
70
  const out = adapter.requestFromCanonical(upstreamReq) as Record<string, unknown>;
71
+ // OpenAI-compatible streams omit usage entirely unless you opt in — without this
72
+ // every streamed call through an openai-format provider logs 0 tokens in/out
73
+ // (anthropic/gemini report usage inline, so they're unaffected). Ask for the
74
+ // final usage chunk; the handler taps it for accounting. Preserve a usage opt-in
75
+ // the client already set.
76
+ if (stream && provider.format === "openai") {
77
+ const existing = (out.stream_options ?? {}) as Record<string, unknown>;
78
+ out.stream_options = { ...existing, include_usage: true };
79
+ }
71
80
  // Normalize thinking into THIS provider's native format, keyed by the upstream
72
81
  // model's capabilities. No-op for non-reasoning models. Runs per-attempt so each
73
82
  // provider in a fallback chain gets the right shape.