aigetwey 1.0.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +58 -3
- package/README.md +4 -4
- package/config.example.yaml +6 -5
- package/dashboard/next.config.ts +6 -0
- package/dashboard/src/app/globals.css +47 -0
- package/dashboard/src/components/BudgetForm.tsx +258 -0
- package/dashboard/src/components/EndpointView.tsx +30 -0
- package/dashboard/src/components/LogTable.tsx +90 -25
- package/dashboard/src/components/ModelPicker.tsx +15 -7
- package/dashboard/src/components/ProviderDetail.tsx +27 -29
- package/dashboard/src/components/ProviderManager.tsx +36 -3
- package/dashboard/src/components/QuotaView.tsx +106 -18
- package/dashboard/src/components/Rail.tsx +1 -1
- package/dashboard/src/components/RoutingView.tsx +2 -2
- package/dashboard/src/components/ToolDetail.tsx +5 -3
- package/dashboard/src/components/TopBar.tsx +1 -1
- package/dashboard/src/components/UsageView.tsx +25 -6
- package/dashboard/src/lib/cliTools.ts +0 -43
- package/dashboard/src/lib/client.ts +17 -1
- package/dashboard/src/lib/gateway.ts +25 -1
- package/dashboard/src/{middleware.ts → proxy.ts} +8 -6
- package/dist/cli.js +43 -8
- package/dist/cli.js.map +1 -1
- package/dist/config.js +75 -0
- package/dist/config.js.map +1 -1
- package/dist/core/budget.js +97 -0
- package/dist/core/budget.js.map +1 -0
- package/dist/core/handler.js +21 -1
- package/dist/core/handler.js.map +1 -1
- package/dist/core/quota.js +33 -7
- package/dist/core/quota.js.map +1 -1
- package/dist/core/state.js +17 -2
- package/dist/core/state.js.map +1 -1
- package/dist/db.js +39 -5
- package/dist/db.js.map +1 -1
- package/dist/middleware/auth.js +15 -8
- package/dist/middleware/auth.js.map +1 -1
- package/dist/routes/admin.js +34 -4
- package/dist/routes/admin.js.map +1 -1
- package/dist/routes/v1.js +15 -10
- package/dist/routes/v1.js.map +1 -1
- package/dist/server.js +5 -1
- package/dist/server.js.map +1 -1
- package/dist/upstream/client.js +9 -0
- package/dist/upstream/client.js.map +1 -1
- package/package.json +3 -4
- package/src/cli.ts +44 -8
- package/src/config.ts +81 -0
- package/src/core/budget.ts +128 -0
- package/src/core/handler.ts +26 -1
- package/src/core/quota.ts +40 -1
- package/src/core/state.ts +24 -0
- package/src/db.ts +50 -5
- package/src/middleware/auth.ts +18 -8
- package/src/routes/admin.ts +45 -7
- package/src/routes/v1.ts +15 -10
- package/src/server.ts +5 -1
- package/src/upstream/client.ts +9 -0
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scoped spend budgets, derived from the usage table (the single source of
|
|
3
|
+
* truth) rather than a parallel counter. Each budget targets the whole gateway,
|
|
4
|
+
* one provider, or one upstream model. statuses() computes every budget's spend
|
|
5
|
+
* over its window; the result list is cached a few seconds so the per-request
|
|
6
|
+
* hard-stop check stays cheap. blocks() answers "is a route to this
|
|
7
|
+
* provider/model barred by an exhausted budget?".
|
|
8
|
+
*/
|
|
9
|
+
import type { Budget, BudgetScope } from "../config.js";
|
|
10
|
+
import { budgetKey } from "../config.js";
|
|
11
|
+
import { currentWindowStart, nextResetAt } from "./quota.js";
|
|
12
|
+
|
|
13
|
+
export interface BudgetStatus {
|
|
14
|
+
scope: BudgetScope;
|
|
15
|
+
key: string;
|
|
16
|
+
label: string;
|
|
17
|
+
note?: string;
|
|
18
|
+
unit: "usd" | "tokens";
|
|
19
|
+
limit: number;
|
|
20
|
+
spent: number;
|
|
21
|
+
pct: number;
|
|
22
|
+
alert: boolean;
|
|
23
|
+
alert_at: number;
|
|
24
|
+
exhausted: boolean;
|
|
25
|
+
est_converse: number | null;
|
|
26
|
+
reset_in_ms: number;
|
|
27
|
+
window: Budget["window"];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface TotalsReader {
|
|
31
|
+
totals(sinceMs: number, filter?: { provider?: string; model?: string; client_key?: string }): {
|
|
32
|
+
tokens_in: number;
|
|
33
|
+
tokens_out: number;
|
|
34
|
+
cost: number;
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function scopeLabel(scope: BudgetScope, keyName: (fp: string) => string): string {
|
|
39
|
+
if (scope.type === "global") return "Global";
|
|
40
|
+
if (scope.type === "key") return keyName(scope.id);
|
|
41
|
+
return scope.id;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function scopeFilter(scope: BudgetScope): { provider?: string; model?: string; client_key?: string } | undefined {
|
|
45
|
+
if (scope.type === "provider") return { provider: scope.id };
|
|
46
|
+
if (scope.type === "model") return { model: scope.id };
|
|
47
|
+
if (scope.type === "key") return { client_key: scope.id };
|
|
48
|
+
return undefined;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export class BudgetTracker {
|
|
52
|
+
private cached?: { at: number; list: BudgetStatus[] };
|
|
53
|
+
|
|
54
|
+
constructor(
|
|
55
|
+
private readonly getBudgets: () => Budget[],
|
|
56
|
+
private readonly db: TotalsReader,
|
|
57
|
+
private readonly now: () => number = Date.now,
|
|
58
|
+
private readonly cacheMs = 5000,
|
|
59
|
+
private readonly keyName: (fp: string) => string = (fp) => `key …${fp}`,
|
|
60
|
+
) {}
|
|
61
|
+
|
|
62
|
+
clearCache(): void {
|
|
63
|
+
this.cached = undefined;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
statuses(): BudgetStatus[] {
|
|
67
|
+
const t = this.now();
|
|
68
|
+
if (this.cached && t - this.cached.at < this.cacheMs) return this.cached.list;
|
|
69
|
+
const list = this.getBudgets().map((b) => this.compute(b, t));
|
|
70
|
+
this.cached = { at: t, list };
|
|
71
|
+
return list;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
globalStatus(): BudgetStatus | null {
|
|
75
|
+
return this.statuses().find((s) => s.scope.type === "global") ?? null;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** First exhausted provider/model budget matching a route, or null. */
|
|
79
|
+
blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null {
|
|
80
|
+
for (const s of this.statuses()) {
|
|
81
|
+
if (!s.exhausted) continue;
|
|
82
|
+
if (s.scope.type === "provider" && s.scope.id === providerId)
|
|
83
|
+
return { exhausted: true, reset_in_ms: s.reset_in_ms };
|
|
84
|
+
if (s.scope.type === "model" && s.scope.id === model)
|
|
85
|
+
return { exhausted: true, reset_in_ms: s.reset_in_ms };
|
|
86
|
+
}
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** The exhausted key-scoped budget for this fingerprint, or null. */
|
|
91
|
+
blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null {
|
|
92
|
+
for (const s of this.statuses()) {
|
|
93
|
+
if (s.exhausted && s.scope.type === "key" && s.scope.id === fp) {
|
|
94
|
+
return { exhausted: true, reset_in_ms: s.reset_in_ms };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
private compute(spec: Budget, t: number): BudgetStatus {
|
|
101
|
+
const windowStart = currentWindowStart(spec, t);
|
|
102
|
+
const total = this.db.totals(windowStart, scopeFilter(spec.scope));
|
|
103
|
+
const tokens = total.tokens_in + total.tokens_out;
|
|
104
|
+
const cost = total.cost;
|
|
105
|
+
const rate = tokens > 0 ? cost / tokens : undefined;
|
|
106
|
+
const spent = spec.unit === "usd" ? cost : tokens;
|
|
107
|
+
const limit = spec.limit;
|
|
108
|
+
const pct = limit > 0 ? Math.min(1, spent / limit) : 0;
|
|
109
|
+
const alertAt = spec.alert_at ?? 0.8;
|
|
110
|
+
const est_converse = rate === undefined ? null : spec.unit === "usd" ? limit / rate : limit * rate;
|
|
111
|
+
return {
|
|
112
|
+
scope: spec.scope,
|
|
113
|
+
key: budgetKey(spec.scope),
|
|
114
|
+
label: scopeLabel(spec.scope, this.keyName),
|
|
115
|
+
note: spec.note,
|
|
116
|
+
unit: spec.unit,
|
|
117
|
+
limit,
|
|
118
|
+
spent,
|
|
119
|
+
pct,
|
|
120
|
+
alert: pct >= alertAt,
|
|
121
|
+
alert_at: alertAt,
|
|
122
|
+
exhausted: spent >= limit,
|
|
123
|
+
est_converse,
|
|
124
|
+
reset_in_ms: Math.max(0, nextResetAt(spec, windowStart, t) - t),
|
|
125
|
+
window: spec.window,
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
}
|
package/src/core/handler.ts
CHANGED
|
@@ -51,6 +51,12 @@ export interface HandleDeps {
|
|
|
51
51
|
pool: KeyPool;
|
|
52
52
|
db?: UsageDB;
|
|
53
53
|
quota?: QuotaTracker;
|
|
54
|
+
budget?: {
|
|
55
|
+
globalStatus(): { exhausted: boolean; reset_in_ms: number } | null;
|
|
56
|
+
blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null;
|
|
57
|
+
blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null;
|
|
58
|
+
};
|
|
59
|
+
clientKeyFp?: string;
|
|
54
60
|
log?: (msg: string) => void;
|
|
55
61
|
now?: () => number;
|
|
56
62
|
}
|
|
@@ -84,6 +90,7 @@ function recordUsage(
|
|
|
84
90
|
status,
|
|
85
91
|
latency_ms: latencyMs,
|
|
86
92
|
stream: stream ? 1 : 0,
|
|
93
|
+
client_key: deps.clientKeyFp ?? "",
|
|
87
94
|
});
|
|
88
95
|
}
|
|
89
96
|
|
|
@@ -114,11 +121,29 @@ export async function handle(
|
|
|
114
121
|
const thinkingIntent: ThinkingConfig | null =
|
|
115
122
|
override ?? captureThinking(canonical as Record<string, unknown>);
|
|
116
123
|
|
|
117
|
-
|
|
124
|
+
let routes = config.resolve(canonical.model);
|
|
118
125
|
if (routes.length === 0) {
|
|
119
126
|
throw new GatewayError(404, { error: `unknown model "${canonical.model}"` });
|
|
120
127
|
}
|
|
121
128
|
|
|
129
|
+
// Budget hard-stop. Global overrun fails fast. Provider/model budgets bar the
|
|
130
|
+
// matching routes (like the token-quota skip); if every candidate is barred,
|
|
131
|
+
// there's nothing to serve → 402.
|
|
132
|
+
if (deps.budget) {
|
|
133
|
+
const g = deps.budget.globalStatus();
|
|
134
|
+
if (g?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: g.reset_in_ms });
|
|
135
|
+
if (deps.clientKeyFp) {
|
|
136
|
+
const kb = deps.budget.blocksKey(deps.clientKeyFp);
|
|
137
|
+
if (kb?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: kb.reset_in_ms });
|
|
138
|
+
}
|
|
139
|
+
const eligible = routes.filter((r) => !deps.budget!.blocks(r.provider.id, r.model));
|
|
140
|
+
if (eligible.length === 0) {
|
|
141
|
+
const b = deps.budget.blocks(routes[0]!.provider.id, routes[0]!.model);
|
|
142
|
+
throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: b?.reset_in_ms ?? 0 });
|
|
143
|
+
}
|
|
144
|
+
routes = eligible;
|
|
145
|
+
}
|
|
146
|
+
|
|
122
147
|
// Pipeline order matters: RTK compresses tool_result in the INPUT first, then
|
|
123
148
|
// inject prepends the output-style system prompt. They touch different parts
|
|
124
149
|
// of the request and stack cleanly. Both run before routing so every fallback
|
package/src/core/quota.ts
CHANGED
|
@@ -38,6 +38,8 @@ export interface QuotaSnapshot {
|
|
|
38
38
|
/** 0..1 fraction of the limit used, if a limit is set */
|
|
39
39
|
pct?: number;
|
|
40
40
|
exhausted: boolean;
|
|
41
|
+
/** true when a limit is set and pct >= the quota's alert_at (default 0.8) */
|
|
42
|
+
alert: boolean;
|
|
41
43
|
}
|
|
42
44
|
|
|
43
45
|
// ---- timezone-aware calendar math -----------------------------------------
|
|
@@ -113,7 +115,9 @@ function parseHHMM(reset_at: string | undefined): { h: number; m: number } {
|
|
|
113
115
|
* - weekly: next `reset_at` weekday (default monday) at 00:00 in tz.
|
|
114
116
|
* - monthly: next 1st of month at 00:00 in tz.
|
|
115
117
|
*/
|
|
116
|
-
export
|
|
118
|
+
export type WindowSpec = Pick<Quota, "window" | "reset_at" | "timezone">;
|
|
119
|
+
|
|
120
|
+
export function nextResetAt(quota: WindowSpec, windowStart: number, now: number): number {
|
|
117
121
|
const tz = quota.timezone || "UTC";
|
|
118
122
|
if (quota.window === "5h") return windowStart + 5 * HOUR_MS;
|
|
119
123
|
|
|
@@ -140,6 +144,40 @@ export function nextResetAt(quota: Quota, windowStart: number, now: number): num
|
|
|
140
144
|
return zonedWallToEpoch(p.year, p.month + 1, 1, 0, 0, tz);
|
|
141
145
|
}
|
|
142
146
|
|
|
147
|
+
/**
|
|
148
|
+
* Epoch ms of the START of the window containing `now`.
|
|
149
|
+
* - 5h: fixed 5-hour grid floor (stateless; no per-provider anchor).
|
|
150
|
+
* - daily: today's reset_at in tz, or yesterday's if that's still ahead.
|
|
151
|
+
* - weekly: the most recent occurrence of the target weekday at 00:00 in tz.
|
|
152
|
+
* - monthly: the 1st of the current month at 00:00 in tz.
|
|
153
|
+
*/
|
|
154
|
+
export function currentWindowStart(spec: WindowSpec, now: number): number {
|
|
155
|
+
const tz = spec.timezone || "UTC";
|
|
156
|
+
if (spec.window === "5h") return Math.floor(now / (5 * HOUR_MS)) * (5 * HOUR_MS);
|
|
157
|
+
|
|
158
|
+
const p = zonedParts(now, tz);
|
|
159
|
+
|
|
160
|
+
if (spec.window === "daily") {
|
|
161
|
+
const { h, m } = parseHHMM(spec.reset_at);
|
|
162
|
+
let start = zonedWallToEpoch(p.year, p.month, p.day, h, m, tz);
|
|
163
|
+
if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - 1, h, m, tz);
|
|
164
|
+
return start;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (spec.window === "weekly") {
|
|
168
|
+
const target = WEEKDAYS.indexOf((spec.reset_at ?? "monday").toLowerCase());
|
|
169
|
+
const targetIdx = target === -1 ? 1 : target;
|
|
170
|
+
const curIdx = WEEKDAYS.indexOf(p.weekday);
|
|
171
|
+
const daysBehind = (curIdx - targetIdx + 7) % 7;
|
|
172
|
+
let start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind, 0, 0, tz);
|
|
173
|
+
if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind - 7, 0, 0, tz);
|
|
174
|
+
return start;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// monthly
|
|
178
|
+
return zonedWallToEpoch(p.year, p.month, 1, 0, 0, tz);
|
|
179
|
+
}
|
|
180
|
+
|
|
143
181
|
export class QuotaTracker {
|
|
144
182
|
private readonly states = new Map<string, QuotaState>();
|
|
145
183
|
|
|
@@ -207,6 +245,7 @@ export class QuotaTracker {
|
|
|
207
245
|
reset_in_ms: Math.max(0, reset - t),
|
|
208
246
|
pct: limit ? Math.min(1, state.consumed / limit) : undefined,
|
|
209
247
|
exhausted: limit ? state.consumed >= limit : false,
|
|
248
|
+
alert: limit ? state.consumed / limit >= (provider.quota.alert_at ?? 0.8) : false,
|
|
210
249
|
},
|
|
211
250
|
];
|
|
212
251
|
});
|
package/src/core/state.ts
CHANGED
|
@@ -17,23 +17,42 @@ import {
|
|
|
17
17
|
validateConfig,
|
|
18
18
|
unmaskSecrets,
|
|
19
19
|
writeConfigFile,
|
|
20
|
+
maskKey,
|
|
20
21
|
} from "../config.js";
|
|
22
|
+
import { clientKeyFingerprint } from "../middleware/auth.js";
|
|
21
23
|
import { KeyPool } from "./keypool.js";
|
|
22
24
|
import { QuotaTracker } from "./quota.js";
|
|
25
|
+
import { BudgetTracker } from "./budget.js";
|
|
26
|
+
|
|
27
|
+
function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string, string> }, fp: string): string {
|
|
28
|
+
for (const k of server.api_keys) {
|
|
29
|
+
if (clientKeyFingerprint(k) === fp) return server.key_names?.[k] ?? maskKey(k);
|
|
30
|
+
}
|
|
31
|
+
return `key …${fp}`;
|
|
32
|
+
}
|
|
23
33
|
|
|
24
34
|
export class GatewayState {
|
|
25
35
|
private _config: GatewayConfig;
|
|
26
36
|
private _pool: KeyPool;
|
|
27
37
|
private readonly _quota: QuotaTracker;
|
|
38
|
+
private readonly _budget: BudgetTracker;
|
|
28
39
|
|
|
29
40
|
constructor(
|
|
30
41
|
private readonly configPath: string,
|
|
31
42
|
initial: GatewayConfig,
|
|
32
43
|
quota?: QuotaTracker,
|
|
44
|
+
budgetDb?: { totals(since: number, filter?: { provider?: string; model?: string; client_key?: string }): { tokens_in: number; tokens_out: number; cost: number } },
|
|
33
45
|
) {
|
|
34
46
|
this._config = initial;
|
|
35
47
|
this._pool = new KeyPool();
|
|
36
48
|
this._quota = quota ?? new QuotaTracker();
|
|
49
|
+
this._budget = new BudgetTracker(
|
|
50
|
+
() => this._config.raw.budgets,
|
|
51
|
+
budgetDb ?? { totals: () => ({ tokens_in: 0, tokens_out: 0, cost: 0 }) },
|
|
52
|
+
undefined,
|
|
53
|
+
undefined,
|
|
54
|
+
(fp) => serverKeyLabel(this._config.raw.server, fp),
|
|
55
|
+
);
|
|
37
56
|
}
|
|
38
57
|
|
|
39
58
|
get config(): GatewayConfig {
|
|
@@ -48,6 +67,10 @@ export class GatewayState {
|
|
|
48
67
|
return this._quota;
|
|
49
68
|
}
|
|
50
69
|
|
|
70
|
+
get budget(): BudgetTracker {
|
|
71
|
+
return this._budget;
|
|
72
|
+
}
|
|
73
|
+
|
|
51
74
|
/**
|
|
52
75
|
* Validate edited config text, restore masked secrets from the live config,
|
|
53
76
|
* persist atomically, then swap in a fresh config + pool. Throws without
|
|
@@ -61,5 +84,6 @@ export class GatewayState {
|
|
|
61
84
|
writeConfigFile(this.configPath, next.raw);
|
|
62
85
|
this._config = next;
|
|
63
86
|
this._pool = new KeyPool();
|
|
87
|
+
this._budget.clearCache();
|
|
64
88
|
}
|
|
65
89
|
}
|
package/src/db.ts
CHANGED
|
@@ -27,6 +27,7 @@ export interface UsageRow {
|
|
|
27
27
|
status: number;
|
|
28
28
|
latency_ms: number;
|
|
29
29
|
stream: number; // 0/1
|
|
30
|
+
client_key: string;
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
export interface LogRow {
|
|
@@ -38,6 +39,12 @@ export interface LogRow {
|
|
|
38
39
|
response_summary: string;
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
export interface UsageTotals {
|
|
43
|
+
tokens_in: number;
|
|
44
|
+
tokens_out: number;
|
|
45
|
+
cost: number;
|
|
46
|
+
}
|
|
47
|
+
|
|
41
48
|
export interface UsageSummary {
|
|
42
49
|
total: { requests: number; tokens_in: number; tokens_out: number; cost: number };
|
|
43
50
|
by_provider: Array<{ provider: string; requests: number; tokens_in: number; tokens_out: number; cost: number }>;
|
|
@@ -79,7 +86,8 @@ export class UsageDB {
|
|
|
79
86
|
cost REAL NOT NULL DEFAULT 0,
|
|
80
87
|
status INTEGER NOT NULL,
|
|
81
88
|
latency_ms INTEGER NOT NULL DEFAULT 0,
|
|
82
|
-
stream INTEGER NOT NULL DEFAULT 0
|
|
89
|
+
stream INTEGER NOT NULL DEFAULT 0,
|
|
90
|
+
client_key TEXT NOT NULL DEFAULT ''
|
|
83
91
|
);
|
|
84
92
|
CREATE INDEX IF NOT EXISTS idx_usage_ts ON usage(ts);
|
|
85
93
|
CREATE TABLE IF NOT EXISTS logs (
|
|
@@ -99,10 +107,15 @@ export class UsageDB {
|
|
|
99
107
|
last_reset INTEGER NOT NULL DEFAULT 0
|
|
100
108
|
);
|
|
101
109
|
`);
|
|
110
|
+
// migrate older DBs created before client_key existed.
|
|
111
|
+
const cols = this.db.prepare(`PRAGMA table_info(usage)`).all() as SqlRow[];
|
|
112
|
+
if (!cols.some((c) => String(c.name) === "client_key")) {
|
|
113
|
+
this.db.exec(`ALTER TABLE usage ADD COLUMN client_key TEXT NOT NULL DEFAULT ''`);
|
|
114
|
+
}
|
|
102
115
|
this.now = now;
|
|
103
116
|
this.insertUsage = this.db.prepare(`
|
|
104
|
-
INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, cached_tokens, cost, status, latency_ms, stream)
|
|
105
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
117
|
+
INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, cached_tokens, cost, status, latency_ms, stream, client_key)
|
|
118
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
106
119
|
`);
|
|
107
120
|
this.insertLog = this.db.prepare(`
|
|
108
121
|
INSERT INTO logs (ts, direction, provider, status, request_summary, response_summary)
|
|
@@ -117,7 +130,7 @@ export class UsageDB {
|
|
|
117
130
|
`);
|
|
118
131
|
}
|
|
119
132
|
|
|
120
|
-
record(row: Omit<UsageRow, "ts"> & { ts?: number }): void {
|
|
133
|
+
record(row: Omit<UsageRow, "ts" | "client_key"> & { ts?: number; client_key?: string }): void {
|
|
121
134
|
this.insertUsage.run(
|
|
122
135
|
row.ts ?? this.now(),
|
|
123
136
|
row.alias,
|
|
@@ -130,6 +143,7 @@ export class UsageDB {
|
|
|
130
143
|
row.status,
|
|
131
144
|
row.latency_ms,
|
|
132
145
|
row.stream,
|
|
146
|
+
row.client_key ?? "",
|
|
133
147
|
);
|
|
134
148
|
}
|
|
135
149
|
|
|
@@ -195,6 +209,36 @@ export class UsageDB {
|
|
|
195
209
|
};
|
|
196
210
|
}
|
|
197
211
|
|
|
212
|
+
/**
|
|
213
|
+
* Summed token + cost totals over rows with ts >= sinceMs, optionally filtered
|
|
214
|
+
* to one provider and/or one model. Backs the scoped budget tracker — the usage
|
|
215
|
+
* table stays the single source of truth (no parallel counter).
|
|
216
|
+
*/
|
|
217
|
+
totals(sinceMs: number, filter?: { provider?: string; model?: string; client_key?: string }): UsageTotals {
|
|
218
|
+
const clauses = ["ts >= ?"];
|
|
219
|
+
const params: Array<number | string> = [sinceMs];
|
|
220
|
+
if (filter?.provider) {
|
|
221
|
+
clauses.push("provider = ?");
|
|
222
|
+
params.push(filter.provider);
|
|
223
|
+
}
|
|
224
|
+
if (filter?.model) {
|
|
225
|
+
clauses.push("model = ?");
|
|
226
|
+
params.push(filter.model);
|
|
227
|
+
}
|
|
228
|
+
if (filter?.client_key) {
|
|
229
|
+
clauses.push("client_key = ?");
|
|
230
|
+
params.push(filter.client_key);
|
|
231
|
+
}
|
|
232
|
+
const row = this.db
|
|
233
|
+
.prepare(
|
|
234
|
+
`SELECT COALESCE(SUM(tokens_in),0) tokens_in, COALESCE(SUM(tokens_out),0) tokens_out,
|
|
235
|
+
COALESCE(SUM(cost),0) cost
|
|
236
|
+
FROM usage WHERE ${clauses.join(" AND ")}`,
|
|
237
|
+
)
|
|
238
|
+
.get(...params) as SqlRow;
|
|
239
|
+
return { tokens_in: num(row.tokens_in), tokens_out: num(row.tokens_out), cost: num(row.cost) };
|
|
240
|
+
}
|
|
241
|
+
|
|
198
242
|
/**
|
|
199
243
|
* Bucketed time-series for charts: one point per `bucketMs` interval from
|
|
200
244
|
* `sinceMs` to now, aligned to the bucket boundary, with zero-filled gaps.
|
|
@@ -233,7 +277,7 @@ export class UsageDB {
|
|
|
233
277
|
const rows = this.db
|
|
234
278
|
.prepare(
|
|
235
279
|
`SELECT ts, alias, provider, model, tokens_in, tokens_out, cached_tokens,
|
|
236
|
-
cost, status, latency_ms, stream
|
|
280
|
+
cost, status, latency_ms, stream, client_key
|
|
237
281
|
FROM usage ORDER BY id DESC LIMIT ?`,
|
|
238
282
|
)
|
|
239
283
|
.all(Math.max(1, Math.min(limit, 1000))) as SqlRow[];
|
|
@@ -249,6 +293,7 @@ export class UsageDB {
|
|
|
249
293
|
status: num(r.status),
|
|
250
294
|
latency_ms: num(r.latency_ms),
|
|
251
295
|
stream: num(r.stream),
|
|
296
|
+
client_key: String(r.client_key ?? ""),
|
|
252
297
|
}));
|
|
253
298
|
}
|
|
254
299
|
|
package/src/middleware/auth.ts
CHANGED
|
@@ -15,15 +15,23 @@ function digest(s: string): Buffer {
|
|
|
15
15
|
return createHash("sha256").update(s).digest();
|
|
16
16
|
}
|
|
17
17
|
|
|
18
|
-
/**
|
|
19
|
-
export function
|
|
18
|
+
/** Non-secret stable id for a client key: sha256 truncated to 8 hex chars. */
|
|
19
|
+
export function clientKeyFingerprint(key: string): string {
|
|
20
|
+
return createHash("sha256").update(key).digest("hex").slice(0, 8);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Constant-time: returns the matching key (digest every candidate) or null. */
|
|
24
|
+
export function matchKey(presented: string, validKeys: string[]): string | null {
|
|
20
25
|
const p = digest(presented);
|
|
21
|
-
|
|
22
|
-
let ok = false;
|
|
26
|
+
let found: string | null = null;
|
|
23
27
|
for (const k of validKeys) {
|
|
24
|
-
if (timingSafeEqual(p, digest(k)))
|
|
28
|
+
if (timingSafeEqual(p, digest(k))) found = k;
|
|
25
29
|
}
|
|
26
|
-
return
|
|
30
|
+
return found;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function isValidKey(presented: string, validKeys: string[]): boolean {
|
|
34
|
+
return matchKey(presented, validKeys) !== null;
|
|
27
35
|
}
|
|
28
36
|
|
|
29
37
|
export function extractKey(req: FastifyRequest): string | null {
|
|
@@ -40,14 +48,16 @@ export interface AuthResult {
|
|
|
40
48
|
ok: boolean;
|
|
41
49
|
status?: number;
|
|
42
50
|
error?: string;
|
|
51
|
+
keyFp?: string;
|
|
43
52
|
}
|
|
44
53
|
|
|
45
54
|
export function checkAuth(req: FastifyRequest, validKeys: string[]): AuthResult {
|
|
46
55
|
if (validKeys.length === 0) return { ok: true }; // auth disabled
|
|
47
56
|
const key = extractKey(req);
|
|
48
57
|
if (!key) return { ok: false, status: 401, error: "missing API key" };
|
|
49
|
-
|
|
50
|
-
return { ok:
|
|
58
|
+
const matched = matchKey(key, validKeys);
|
|
59
|
+
if (!matched) return { ok: false, status: 401, error: "invalid API key" };
|
|
60
|
+
return { ok: true, keyFp: clientKeyFingerprint(matched) };
|
|
51
61
|
}
|
|
52
62
|
|
|
53
63
|
/** Verifies a presented admin password (against the persisted hash store). */
|
package/src/routes/admin.ts
CHANGED
|
@@ -15,7 +15,7 @@ import { resolve } from "node:path";
|
|
|
15
15
|
import type { FastifyInstance, FastifyRequest, FastifyReply } from "fastify";
|
|
16
16
|
import type { GatewayState } from "../core/state.js";
|
|
17
17
|
import type { UsageDB } from "../db.js";
|
|
18
|
-
import { checkAdminAuth, type AdminVerifier } from "../middleware/auth.js";
|
|
18
|
+
import { checkAdminAuth, clientKeyFingerprint, type AdminVerifier } from "../middleware/auth.js";
|
|
19
19
|
import {
|
|
20
20
|
maskKey,
|
|
21
21
|
serializeConfig,
|
|
@@ -44,9 +44,12 @@ import {
|
|
|
44
44
|
addServerKey,
|
|
45
45
|
editServerKey,
|
|
46
46
|
removeServerKey,
|
|
47
|
+
setBudget,
|
|
48
|
+
clearBudget,
|
|
47
49
|
type Config,
|
|
48
50
|
type Provider,
|
|
49
51
|
type EndpointSettings,
|
|
52
|
+
type Budget,
|
|
50
53
|
} from "../config.js";
|
|
51
54
|
import { pingProvider } from "../upstream/client.js";
|
|
52
55
|
import { handle, GatewayError } from "../core/handler.js";
|
|
@@ -142,7 +145,23 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
142
145
|
|
|
143
146
|
// per-provider quota: consumed, limit, and ms until the next scheduled reset.
|
|
144
147
|
app.get("/admin/quota", requireAdmin, (_req, reply) => {
|
|
145
|
-
reply.send({
|
|
148
|
+
reply.send({
|
|
149
|
+
quota: deps.state.quota.snapshot(deps.state.config.listProviders()),
|
|
150
|
+
budgets: deps.state.budget.statuses(),
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
// add or replace a budget (keyed by scope). Body = Budget; invalid shape or an
|
|
155
|
+
// unknown provider scope -> 400 via zod / setBudget through state.reload().
|
|
156
|
+
app.put("/admin/budgets", requireAdmin, (req, reply) => {
|
|
157
|
+
const b = (req.body ?? {}) as Budget;
|
|
158
|
+
applyMutation(reply, (c) => setBudget(c, b));
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
// remove a budget by scope key: global | provider:<id> | model:<id>.
|
|
162
|
+
app.delete("/admin/budgets/:key", requireAdmin, (req, reply) => {
|
|
163
|
+
const key = decodeURIComponent((req.params as { key: string }).key);
|
|
164
|
+
applyMutation(reply, (c) => clearBudget(c, key));
|
|
146
165
|
});
|
|
147
166
|
|
|
148
167
|
// current config, secrets masked
|
|
@@ -426,11 +445,17 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
426
445
|
|
|
427
446
|
// every callable model: provider/model catalog entries + routing aliases.
|
|
428
447
|
app.get("/admin/models", requireAdmin, (_req, reply) => {
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
448
|
+
// disabled providers are skipped in routing, so their models must not be
|
|
449
|
+
// selectable anywhere this catalog feeds (combos, CLI-tool setup, budget
|
|
450
|
+
// scopes) — drop them here at the single source.
|
|
451
|
+
const providers = deps.state.config
|
|
452
|
+
.listProviders()
|
|
453
|
+
.filter((p) => !p.disabled)
|
|
454
|
+
.map((p) => ({
|
|
455
|
+
id: p.id,
|
|
456
|
+
format: p.format,
|
|
457
|
+
models: p.models.map((m) => ({ id: m.id, ref: `${p.id}/${m.id}`, price_in: m.price_in, price_out: m.price_out })),
|
|
458
|
+
}));
|
|
434
459
|
const routes = deps.state.config.listRoutes();
|
|
435
460
|
reply.send({ providers, routes });
|
|
436
461
|
});
|
|
@@ -546,6 +571,19 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
546
571
|
applyMutation(reply, (c) => removeServerKey(c, i));
|
|
547
572
|
});
|
|
548
573
|
|
|
574
|
+
// server keys with a non-secret fingerprint + display name, for the budget
|
|
575
|
+
// key-scope picker. Never returns the raw key.
|
|
576
|
+
app.get("/admin/keys", requireAdmin, (_req, reply) => {
|
|
577
|
+
const s = deps.state.config.raw.server;
|
|
578
|
+
reply.send(
|
|
579
|
+
s.api_keys.map((k) => ({
|
|
580
|
+
fingerprint: clientKeyFingerprint(k),
|
|
581
|
+
name: s.key_names?.[k] ?? maskKey(k),
|
|
582
|
+
masked: maskKey(k),
|
|
583
|
+
})),
|
|
584
|
+
);
|
|
585
|
+
});
|
|
586
|
+
|
|
549
587
|
// reveal ONE raw gateway key (the "show key" button on the Endpoint page).
|
|
550
588
|
app.get("/admin/endpoint/keys/:index/reveal", requireAdmin, (req, reply) => {
|
|
551
589
|
const { index } = req.params as { index: string };
|
package/src/routes/v1.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { FastifyInstance, FastifyReply, FastifyRequest } from "fastify";
|
|
2
|
-
import { checkAuth } from "../middleware/auth.js";
|
|
2
|
+
import { checkAuth, extractKey, clientKeyFingerprint } from "../middleware/auth.js";
|
|
3
3
|
import type { GatewayState } from "../core/state.js";
|
|
4
4
|
import { handle, GatewayError, type HandleDeps } from "../core/handler.js";
|
|
5
5
|
import type { WireFormat } from "../core/canonical.js";
|
|
@@ -23,16 +23,21 @@ export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?:
|
|
|
23
23
|
};
|
|
24
24
|
|
|
25
25
|
// build deps from the live holder per request (never close over config/pool).
|
|
26
|
-
const depsNow = (): HandleDeps =>
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
26
|
+
const depsNow = (req: FastifyRequest): HandleDeps => {
|
|
27
|
+
const presented = extractKey(req);
|
|
28
|
+
return {
|
|
29
|
+
config: state.config,
|
|
30
|
+
pool: state.pool,
|
|
31
|
+
quota: state.quota,
|
|
32
|
+
budget: state.budget,
|
|
33
|
+
db,
|
|
34
|
+
clientKeyFp: presented ? clientKeyFingerprint(presented) : undefined,
|
|
35
|
+
log: (msg) => app.log.info(msg),
|
|
36
|
+
};
|
|
37
|
+
};
|
|
33
38
|
|
|
34
|
-
app.post("/v1/chat/completions", requireAuth, (req, reply) => dispatch(depsNow(), "openai", req, reply));
|
|
35
|
-
app.post("/v1/messages", requireAuth, (req, reply) => dispatch(depsNow(), "anthropic", req, reply));
|
|
39
|
+
app.post("/v1/chat/completions", requireAuth, (req, reply) => dispatch(depsNow(req), "openai", req, reply));
|
|
40
|
+
app.post("/v1/messages", requireAuth, (req, reply) => dispatch(depsNow(req), "anthropic", req, reply));
|
|
36
41
|
}
|
|
37
42
|
|
|
38
43
|
const SSE_HEADERS = {
|
package/src/server.ts
CHANGED
|
@@ -59,7 +59,7 @@ async function main(): Promise<void> {
|
|
|
59
59
|
});
|
|
60
60
|
|
|
61
61
|
// holder enables runtime config edits (hot-reload) from the dashboard.
|
|
62
|
-
const state = new GatewayState(configPath, config, quota);
|
|
62
|
+
const state = new GatewayState(configPath, config, quota, db);
|
|
63
63
|
// admin password lives in a hash store (seeded from the env on first run,
|
|
64
64
|
// changeable at runtime from the dashboard).
|
|
65
65
|
const auth = AuthStore.open(dataDir, process.env.AIGETWEY_ADMIN_PASSWORD);
|
|
@@ -78,6 +78,10 @@ async function main(): Promise<void> {
|
|
|
78
78
|
prefix: "/",
|
|
79
79
|
// forward the whole HTTP surface the dashboard needs (pages + its API).
|
|
80
80
|
httpMethods: ["GET", "POST", "PUT", "DELETE", "PATCH", "HEAD", "OPTIONS"],
|
|
81
|
+
// forward WebSocket upgrades too, so `next dev`'s HMR socket works when the
|
|
82
|
+
// dashboard is proxied — this is what lets dev run single-URL on the gateway
|
|
83
|
+
// port like production. Harmless for the prebuilt prod dashboard (no socket).
|
|
84
|
+
websocket: true,
|
|
81
85
|
// keep the ORIGINAL Host so Next builds redirects (e.g. → /login) against
|
|
82
86
|
// the gateway's address, not the internal dashboard port.
|
|
83
87
|
replyOptions: {
|
package/src/upstream/client.ts
CHANGED
|
@@ -68,6 +68,15 @@ function buildBody(
|
|
|
68
68
|
const adapter = adapterFor(provider.format);
|
|
69
69
|
const upstreamReq: CanonicalRequest = { ...req, model, stream };
|
|
70
70
|
const out = adapter.requestFromCanonical(upstreamReq) as Record<string, unknown>;
|
|
71
|
+
// OpenAI-compatible streams omit usage entirely unless you opt in — without this
|
|
72
|
+
// every streamed call through an openai-format provider logs 0 tokens in/out
|
|
73
|
+
// (anthropic/gemini report usage inline, so they're unaffected). Ask for the
|
|
74
|
+
// final usage chunk; the handler taps it for accounting. Preserve a usage opt-in
|
|
75
|
+
// the client already set.
|
|
76
|
+
if (stream && provider.format === "openai") {
|
|
77
|
+
const existing = (out.stream_options ?? {}) as Record<string, unknown>;
|
|
78
|
+
out.stream_options = { ...existing, include_usage: true };
|
|
79
|
+
}
|
|
71
80
|
// Normalize thinking into THIS provider's native format, keyed by the upstream
|
|
72
81
|
// model's capabilities. No-op for non-reasoning models. Runs per-attempt so each
|
|
73
82
|
// provider in a fallback chain gets the right shape.
|