aigetwey 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +19 -1
- package/README.md +28 -7
- package/config.example.yaml +0 -1
- package/dashboard/src/app/(console)/quota/page.tsx +2 -2
- package/dashboard/src/components/BudgetForm.tsx +15 -17
- package/dashboard/src/components/{QuotaView.tsx → BudgetTracker.tsx} +71 -56
- package/dashboard/src/components/CooldownTimer.tsx +1 -1
- package/dashboard/src/components/EndpointView.tsx +255 -47
- package/dashboard/src/components/LogTable.tsx +32 -25
- package/dashboard/src/components/ProviderManager.tsx +3 -28
- package/dashboard/src/components/Rail.tsx +1 -1
- package/dashboard/src/components/RoutingView.tsx +6 -2
- package/dashboard/src/components/TopBar.tsx +1 -1
- package/dashboard/src/components/ui.tsx +6 -1
- package/dashboard/src/lib/client.ts +6 -5
- package/dashboard/src/lib/gateway.ts +23 -16
- package/dist/config.js +86 -23
- package/dist/config.js.map +1 -1
- package/dist/core/budget.js +1 -1
- package/dist/core/budget.js.map +1 -1
- package/dist/core/fallback.js +0 -6
- package/dist/core/fallback.js.map +1 -1
- package/dist/core/handler.js +6 -5
- package/dist/core/handler.js.map +1 -1
- package/dist/core/keysUsage.js +15 -0
- package/dist/core/keysUsage.js.map +1 -0
- package/dist/core/ratelimit.js +15 -0
- package/dist/core/ratelimit.js.map +1 -0
- package/dist/core/state.js +5 -13
- package/dist/core/state.js.map +1 -1
- package/dist/core/window.js +35 -0
- package/dist/core/window.js.map +1 -0
- package/dist/db.js +0 -20
- package/dist/db.js.map +1 -1
- package/dist/routes/admin.js +55 -10
- package/dist/routes/admin.js.map +1 -1
- package/dist/routes/v1.js +14 -1
- package/dist/routes/v1.js.map +1 -1
- package/dist/server.js +1 -7
- package/dist/server.js.map +1 -1
- package/dist/stream/openai-stream.js +3 -0
- package/dist/stream/openai-stream.js.map +1 -1
- package/package.json +1 -1
- package/src/config.ts +89 -23
- package/src/core/budget.ts +1 -1
- package/src/core/fallback.ts +0 -9
- package/src/core/handler.ts +9 -7
- package/src/core/keysUsage.ts +49 -0
- package/src/core/ratelimit.ts +25 -0
- package/src/core/state.ts +4 -14
- package/src/core/window.ts +45 -0
- package/src/db.ts +0 -23
- package/src/routes/admin.ts +61 -9
- package/src/routes/v1.ts +18 -1
- package/src/server.ts +1 -8
- package/src/stream/openai-stream.ts +3 -1
- package/src/core/quota.ts +0 -253
package/src/config.ts
CHANGED
|
@@ -18,21 +18,9 @@ export { clientKeyFingerprint } from "./middleware/auth.js";
|
|
|
18
18
|
// Shape differs from a flat OpenAI gateway: routing lives in a top-level
|
|
19
19
|
// `models[]` layer (alias -> provider chain), the endpoint block carries the
|
|
20
20
|
// token-saver toggles, and providers may be free passthroughs or service-account
|
|
21
|
-
// backed. The handler/keypool
|
|
21
|
+
// backed. The handler/keypool phases read these fields; defining the full
|
|
22
22
|
// shape up front avoids reshaping config across later phases.
|
|
23
23
|
|
|
24
|
-
/** Token quota window for a provider — drives the dashboard reset countdown. */
|
|
25
|
-
const QuotaSchema = z.object({
|
|
26
|
-
window: z.enum(["5h", "daily", "weekly", "monthly"]),
|
|
27
|
-
// daily: "HH:MM" local reset; weekly: weekday name ("monday"); others: ignored.
|
|
28
|
-
reset_at: z.string().optional(),
|
|
29
|
-
timezone: z.string().default("UTC"),
|
|
30
|
-
// optional ceiling for a progress bar; quota tracking works without it.
|
|
31
|
-
limit_tokens: z.number().int().positive().optional(),
|
|
32
|
-
// soft-alert threshold (0..1); UI flags the quota when pct >= this. Default 0.8.
|
|
33
|
-
alert_at: z.number().gt(0).lte(1).optional(),
|
|
34
|
-
});
|
|
35
|
-
|
|
36
24
|
const ProviderModelSchema = z.object({
|
|
37
25
|
id: z.string().min(1),
|
|
38
26
|
price_in: z.number().nonnegative().optional(),
|
|
@@ -59,7 +47,6 @@ const ProviderSchema = z
|
|
|
59
47
|
service_account: z.string().optional(),
|
|
60
48
|
models: z.array(ProviderModelSchema).default([]),
|
|
61
49
|
headers: z.record(z.string()).optional(),
|
|
62
|
-
quota: QuotaSchema.optional(),
|
|
63
50
|
// when true the provider is skipped in routing (kept in config, like a key's
|
|
64
51
|
// disabled state but for the whole provider).
|
|
65
52
|
disabled: z.boolean().optional(),
|
|
@@ -120,14 +107,20 @@ const ServerSchema = z
|
|
|
120
107
|
// optional friendly label per key, keyed by the key itself. Kept separate so
|
|
121
108
|
// api_keys stays a plain string[] (auth/masking paths untouched).
|
|
122
109
|
key_names: z.record(z.string()).optional(),
|
|
110
|
+
// per-key model allowlist (call-strings) + rate limit (req/min), keyed by the
|
|
111
|
+
// raw key like key_names. Absent → unrestricted / unlimited.
|
|
112
|
+
key_models: z.record(z.array(z.string().min(1))).optional(),
|
|
113
|
+
key_rpm: z.record(z.number().int().positive()).optional(),
|
|
114
|
+
// per-key access expiry, epoch ms, keyed by the RAW key. Absent → never expires.
|
|
115
|
+
key_expires: z.record(z.number().int().positive()).optional(),
|
|
123
116
|
})
|
|
124
117
|
.default({ host: "127.0.0.1", port: 18080, api_keys: [] });
|
|
125
118
|
|
|
126
119
|
/**
|
|
127
120
|
* A spend budget scoped to the whole gateway, one provider, or one upstream
|
|
128
121
|
* model. unit picks what `limit` means — USD cost or total tokens. Soft-alert at
|
|
129
|
-
* alert_at (default 0.8), hard-stop at 100%.
|
|
130
|
-
*
|
|
122
|
+
* alert_at (default 0.8), hard-stop at 100%. Each window is a rolling tumbling
|
|
123
|
+
* bucket on the epoch grid (window.ts). Opt-in: omit / empty list to disable.
|
|
131
124
|
*/
|
|
132
125
|
const BudgetScopeSchema = z.discriminatedUnion("type", [
|
|
133
126
|
z.object({ type: z.literal("global") }),
|
|
@@ -136,13 +129,21 @@ const BudgetScopeSchema = z.discriminatedUnion("type", [
|
|
|
136
129
|
z.object({ type: z.literal("key"), id: z.string().min(1) }),
|
|
137
130
|
]);
|
|
138
131
|
|
|
132
|
+
// rolling windows replaced the old calendar windows; coerce any legacy value so
|
|
133
|
+
// existing config.yaml budgets keep loading (daily→24h, weekly→7day, monthly→30day).
|
|
134
|
+
const LEGACY_WINDOW: Record<string, string> = { daily: "24h", weekly: "7day", monthly: "30day" };
|
|
135
|
+
const WindowSchema = z.preprocess(
|
|
136
|
+
(v) => (typeof v === "string" && v in LEGACY_WINDOW ? LEGACY_WINDOW[v] : v),
|
|
137
|
+
z.enum(["5h", "24h", "7day", "30day"]),
|
|
138
|
+
);
|
|
139
|
+
|
|
139
140
|
const BudgetSchema = z.object({
|
|
140
141
|
scope: BudgetScopeSchema,
|
|
141
142
|
unit: z.enum(["usd", "tokens"]),
|
|
142
143
|
limit: z.number().positive(),
|
|
143
|
-
window:
|
|
144
|
-
|
|
145
|
-
|
|
144
|
+
window: WindowSchema,
|
|
145
|
+
// epoch ms the recurring cycle is anchored to; stamped by setBudget on create.
|
|
146
|
+
anchor: z.number().int().nonnegative().optional(),
|
|
146
147
|
alert_at: z.number().gt(0).lte(1).optional(),
|
|
147
148
|
// optional free-text label so an operator remembers what a budget is for.
|
|
148
149
|
note: z.string().max(200).optional(),
|
|
@@ -157,7 +158,6 @@ const ConfigSchema = z.object({
|
|
|
157
158
|
budgets: z.array(BudgetSchema).default([]),
|
|
158
159
|
});
|
|
159
160
|
|
|
160
|
-
export type Quota = z.infer<typeof QuotaSchema>;
|
|
161
161
|
export type ProviderModel = z.infer<typeof ProviderModelSchema>;
|
|
162
162
|
export type Provider = z.infer<typeof ProviderSchema>;
|
|
163
163
|
export type ModelRoute = z.infer<typeof ModelRouteSchema>;
|
|
@@ -790,7 +790,7 @@ export function budgetKey(scope: BudgetScope): string {
|
|
|
790
790
|
}
|
|
791
791
|
|
|
792
792
|
/** Add a budget, or replace the existing one with the same scope key. */
|
|
793
|
-
export function setBudget(config: Config, budget: Budget): Config {
|
|
793
|
+
export function setBudget(config: Config, budget: Budget, now: number = Date.now()): Config {
|
|
794
794
|
if (budget.scope.type === "provider") {
|
|
795
795
|
const { id } = budget.scope;
|
|
796
796
|
if (!config.providers.some((p) => p.id === id)) {
|
|
@@ -806,8 +806,16 @@ export function setBudget(config: Config, budget: Budget): Config {
|
|
|
806
806
|
const next = cloneConfig(config);
|
|
807
807
|
const key = budgetKey(budget.scope);
|
|
808
808
|
const idx = next.budgets.findIndex((b) => budgetKey(b.scope) === key);
|
|
809
|
-
if (idx === -1)
|
|
810
|
-
|
|
809
|
+
if (idx === -1) {
|
|
810
|
+
next.budgets.push({ ...budget, anchor: budget.anchor ?? now });
|
|
811
|
+
} else {
|
|
812
|
+
const prev = next.budgets[idx]!;
|
|
813
|
+
// keep the running cycle on edit (preserve prev anchor as-is, including a
|
|
814
|
+
// legacy undefined = epoch grid, so editing a limit never resets spend);
|
|
815
|
+
// start a fresh cycle only when the window length actually changed.
|
|
816
|
+
const anchor = budget.anchor ?? (prev.window === budget.window ? prev.anchor : now);
|
|
817
|
+
next.budgets[idx] = { ...budget, anchor };
|
|
818
|
+
}
|
|
811
819
|
return next;
|
|
812
820
|
}
|
|
813
821
|
|
|
@@ -854,5 +862,63 @@ export function removeServerKey(config: Config, index: number): Config {
|
|
|
854
862
|
if (removed && next.server.key_names && removed in next.server.key_names) {
|
|
855
863
|
delete next.server.key_names[removed];
|
|
856
864
|
}
|
|
865
|
+
if (removed && next.server.key_models && removed in next.server.key_models) {
|
|
866
|
+
delete next.server.key_models[removed];
|
|
867
|
+
if (Object.keys(next.server.key_models).length === 0) next.server.key_models = undefined;
|
|
868
|
+
}
|
|
869
|
+
if (removed && next.server.key_rpm && removed in next.server.key_rpm) {
|
|
870
|
+
delete next.server.key_rpm[removed];
|
|
871
|
+
if (Object.keys(next.server.key_rpm).length === 0) next.server.key_rpm = undefined;
|
|
872
|
+
}
|
|
873
|
+
if (removed && next.server.key_expires && removed in next.server.key_expires) {
|
|
874
|
+
delete next.server.key_expires[removed];
|
|
875
|
+
if (Object.keys(next.server.key_expires).length === 0) next.server.key_expires = undefined;
|
|
876
|
+
}
|
|
877
|
+
return next;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
/**
|
|
881
|
+
* Set or clear a gateway key's scopes (by index, since keys are masked in the
|
|
882
|
+
* API). `models`/`rpm` are each applied only when present in the patch; an empty
|
|
883
|
+
* list or null/0 clears that scope. Empty maps are pruned to undefined.
|
|
884
|
+
*/
|
|
885
|
+
export function setServerKeyScope(
|
|
886
|
+
config: Config,
|
|
887
|
+
index: number,
|
|
888
|
+
patch: { models?: string[] | null; rpm?: number | null; expires?: number | null },
|
|
889
|
+
): Config {
|
|
890
|
+
const next = cloneConfig(config);
|
|
891
|
+
const keys = next.server.api_keys;
|
|
892
|
+
if (index < 0 || index >= keys.length) throw new Error(`no gateway key at index ${index}`);
|
|
893
|
+
const key = keys[index]!;
|
|
894
|
+
|
|
895
|
+
if (patch.models !== undefined) {
|
|
896
|
+
const models = { ...(next.server.key_models ?? {}) };
|
|
897
|
+
const list = (patch.models ?? []).map((m) => m.trim()).filter(Boolean);
|
|
898
|
+
if (list.length > 0) models[key] = list;
|
|
899
|
+
else delete models[key];
|
|
900
|
+
next.server.key_models = Object.keys(models).length > 0 ? models : undefined;
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
if (patch.rpm !== undefined) {
|
|
904
|
+
const rpm = { ...(next.server.key_rpm ?? {}) };
|
|
905
|
+
if (patch.rpm && patch.rpm > 0) rpm[key] = Math.floor(patch.rpm);
|
|
906
|
+
else delete rpm[key];
|
|
907
|
+
next.server.key_rpm = Object.keys(rpm).length > 0 ? rpm : undefined;
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
if (patch.expires !== undefined) {
|
|
911
|
+
const exp = { ...(next.server.key_expires ?? {}) };
|
|
912
|
+
if (patch.expires && patch.expires > 0) exp[key] = Math.floor(patch.expires);
|
|
913
|
+
else delete exp[key];
|
|
914
|
+
next.server.key_expires = Object.keys(exp).length > 0 ? exp : undefined;
|
|
915
|
+
}
|
|
916
|
+
|
|
857
917
|
return next;
|
|
858
918
|
}
|
|
919
|
+
|
|
920
|
+
/** True when `rawKey` has an expiry set and `now` is strictly past it. */
|
|
921
|
+
export function isKeyExpired(server: Config["server"], rawKey: string, now: number): boolean {
|
|
922
|
+
const at = server.key_expires?.[rawKey];
|
|
923
|
+
return at !== undefined && now > at;
|
|
924
|
+
}
|
package/src/core/budget.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import type { Budget, BudgetScope } from "../config.js";
|
|
10
10
|
import { budgetKey } from "../config.js";
|
|
11
|
-
import { currentWindowStart, nextResetAt } from "./
|
|
11
|
+
import { currentWindowStart, nextResetAt } from "./window.js";
|
|
12
12
|
|
|
13
13
|
export interface BudgetStatus {
|
|
14
14
|
scope: BudgetScope;
|
package/src/core/fallback.ts
CHANGED
|
@@ -32,8 +32,6 @@ export interface FallbackOpts {
|
|
|
32
32
|
onAttempt?: (log: AttemptLog) => void;
|
|
33
33
|
/** which key the pool handed out for the winning attempt (handler uses it for usage). */
|
|
34
34
|
onServed?: (route: ResolvedRoute, key: string) => void;
|
|
35
|
-
/** when set, a provider this returns true for is skipped (quota exhausted). */
|
|
36
|
-
isExhausted?: (provider: ResolvedRoute["provider"]) => boolean;
|
|
37
35
|
/** captured client thinking intent, applied per-attempt in the provider's format. */
|
|
38
36
|
thinkingIntent?: ThinkingConfig | null;
|
|
39
37
|
}
|
|
@@ -56,13 +54,6 @@ export async function executeWithFallback(
|
|
|
56
54
|
for (const route of routes) {
|
|
57
55
|
const { provider } = route;
|
|
58
56
|
|
|
59
|
-
// skip a provider whose token budget is spent for this window — like a key
|
|
60
|
-
// cooling down, but for the whole provider. Falls through to the next route.
|
|
61
|
-
if (opts.isExhausted?.(provider)) {
|
|
62
|
-
log({ provider: provider.id, model: route.model, outcome: "skip", detail: "quota exhausted" });
|
|
63
|
-
continue;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
57
|
const attempts = provider.max_retries + 1;
|
|
67
58
|
|
|
68
59
|
for (let i = 0; i < attempts; i++) {
|
package/src/core/handler.ts
CHANGED
|
@@ -20,7 +20,6 @@ import { parseSSE, encodeSSE } from "../stream/sse.js";
|
|
|
20
20
|
import { streamAdapterFor } from "../stream/index.js";
|
|
21
21
|
import type { CanonicalChunk } from "../stream/chunk.js";
|
|
22
22
|
import type { KeyPool } from "./keypool.js";
|
|
23
|
-
import type { QuotaTracker } from "./quota.js";
|
|
24
23
|
import { executeWithFallback } from "./fallback.js";
|
|
25
24
|
import { type UsageDB, computeCost } from "../db.js";
|
|
26
25
|
import { compressMessages } from "../rtk/index.js";
|
|
@@ -50,12 +49,12 @@ export interface HandleDeps {
|
|
|
50
49
|
config: GatewayConfig;
|
|
51
50
|
pool: KeyPool;
|
|
52
51
|
db?: UsageDB;
|
|
53
|
-
quota?: QuotaTracker;
|
|
54
52
|
budget?: {
|
|
55
53
|
globalStatus(): { exhausted: boolean; reset_in_ms: number } | null;
|
|
56
54
|
blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null;
|
|
57
55
|
blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null;
|
|
58
56
|
};
|
|
57
|
+
clientKeyModels?: string[];
|
|
59
58
|
clientKeyFp?: string;
|
|
60
59
|
log?: (msg: string) => void;
|
|
61
60
|
now?: () => number;
|
|
@@ -71,8 +70,6 @@ function recordUsage(
|
|
|
71
70
|
): void {
|
|
72
71
|
const tokensIn = usage?.prompt_tokens ?? 0;
|
|
73
72
|
const tokensOut = usage?.completion_tokens ?? 0;
|
|
74
|
-
// count the full request against the served provider's window budget.
|
|
75
|
-
deps.quota?.consume(route.provider, tokensIn + tokensOut);
|
|
76
73
|
if (!deps.db) return;
|
|
77
74
|
// Cost: a combo/route may set explicit prices; otherwise fall back to the ported
|
|
78
75
|
// aigetwey pricing table so cost auto-resolves per model instead of showing $0.
|
|
@@ -118,6 +115,13 @@ export async function handle(
|
|
|
118
115
|
// that can't reason. Matches aigetwey's capture-before-translate flow.
|
|
119
116
|
const { cleanModel, override } = parseSuffix(canonical.model);
|
|
120
117
|
canonical.model = cleanModel;
|
|
118
|
+
|
|
119
|
+
// per-key allowlist: a key may be restricted to specific call-strings. Empty/
|
|
120
|
+
// absent → unrestricted. Match the literal clean model the client asked for.
|
|
121
|
+
if (deps.clientKeyModels && deps.clientKeyModels.length > 0 && !deps.clientKeyModels.includes(cleanModel)) {
|
|
122
|
+
throw new GatewayError(403, { error: "model not allowed for this key" });
|
|
123
|
+
}
|
|
124
|
+
|
|
121
125
|
const thinkingIntent: ThinkingConfig | null =
|
|
122
126
|
override ?? captureThinking(canonical as Record<string, unknown>);
|
|
123
127
|
|
|
@@ -127,8 +131,7 @@ export async function handle(
|
|
|
127
131
|
}
|
|
128
132
|
|
|
129
133
|
// Budget hard-stop. Global overrun fails fast. Provider/model budgets bar the
|
|
130
|
-
// matching routes
|
|
131
|
-
// there's nothing to serve → 402.
|
|
134
|
+
// matching routes; if every candidate is barred, there's nothing to serve → 402.
|
|
132
135
|
if (deps.budget) {
|
|
133
136
|
const g = deps.budget.globalStatus();
|
|
134
137
|
if (g?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: g.reset_in_ms });
|
|
@@ -193,7 +196,6 @@ export async function handle(
|
|
|
193
196
|
stream: wantStream,
|
|
194
197
|
signal,
|
|
195
198
|
thinkingIntent,
|
|
196
|
-
isExhausted: deps.quota ? (p) => deps.quota!.isExhausted(p) : undefined,
|
|
197
199
|
onAttempt: (a) =>
|
|
198
200
|
deps.log?.(`[fallback] ${a.provider}/${a.model} ${a.status ?? "-"} -> ${a.outcome}${a.detail ? ` (${a.detail})` : ""}`),
|
|
199
201
|
});
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shapes one row for the Budgets page "Keys" section: a gateway key joined with
|
|
3
|
+
* its all-time spend/tokens, optional expiry, and its key-scoped budget status
|
|
4
|
+
* (null when the key is uncapped). Pure — the admin route feeds it real data.
|
|
5
|
+
*/
|
|
6
|
+
import type { BudgetStatus } from "./budget.js";
|
|
7
|
+
|
|
8
|
+
export interface KeyBudgetView {
|
|
9
|
+
unit: "usd" | "tokens";
|
|
10
|
+
limit: number;
|
|
11
|
+
spent: number;
|
|
12
|
+
pct: number;
|
|
13
|
+
window: BudgetStatus["window"];
|
|
14
|
+
reset_in_ms: number;
|
|
15
|
+
exhausted: boolean;
|
|
16
|
+
alert: boolean;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface KeyUsageRow {
|
|
20
|
+
fingerprint: string;
|
|
21
|
+
name: string;
|
|
22
|
+
masked: string;
|
|
23
|
+
expires?: number;
|
|
24
|
+
spent: number;
|
|
25
|
+
tokens: number;
|
|
26
|
+
budget: KeyBudgetView | null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function buildKeyUsageRow(input: {
|
|
30
|
+
fingerprint: string;
|
|
31
|
+
name: string;
|
|
32
|
+
masked: string;
|
|
33
|
+
expires?: number;
|
|
34
|
+
totals: { tokens_in: number; tokens_out: number; cost: number };
|
|
35
|
+
budget: BudgetStatus | null;
|
|
36
|
+
}): KeyUsageRow {
|
|
37
|
+
const b = input.budget;
|
|
38
|
+
return {
|
|
39
|
+
fingerprint: input.fingerprint,
|
|
40
|
+
name: input.name,
|
|
41
|
+
masked: input.masked,
|
|
42
|
+
expires: input.expires,
|
|
43
|
+
spent: input.totals.cost,
|
|
44
|
+
tokens: input.totals.tokens_in + input.totals.tokens_out,
|
|
45
|
+
budget: b
|
|
46
|
+
? { unit: b.unit, limit: b.limit, spent: b.spent, pct: b.pct, window: b.window, reset_in_ms: b.reset_in_ms, exhausted: b.exhausted, alert: b.alert }
|
|
47
|
+
: null,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-key request counter on a fixed calendar-minute window. In-memory only —
|
|
3
|
+
* counts reset on restart, which is fine for a 1-minute window. Used to rate-limit
|
|
4
|
+
* gateway keys that opt in via server.key_rpm.
|
|
5
|
+
*/
|
|
6
|
+
interface Bucket {
|
|
7
|
+
minute: number;
|
|
8
|
+
count: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export class RateLimiter {
|
|
12
|
+
private readonly buckets = new Map<string, Bucket>();
|
|
13
|
+
|
|
14
|
+
/** Record a hit for `key`; return true if it now EXCEEDS `limit` this minute. */
|
|
15
|
+
over(key: string, limit: number, now: number = Date.now()): boolean {
|
|
16
|
+
const minute = Math.floor(now / 60_000);
|
|
17
|
+
const b = this.buckets.get(key);
|
|
18
|
+
if (!b || b.minute !== minute) {
|
|
19
|
+
this.buckets.set(key, { minute, count: 1 });
|
|
20
|
+
return 1 > limit;
|
|
21
|
+
}
|
|
22
|
+
b.count += 1;
|
|
23
|
+
return b.count > limit;
|
|
24
|
+
}
|
|
25
|
+
}
|
package/src/core/state.ts
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Mutable holder for the live gateway config, key pool, and
|
|
2
|
+
* Mutable holder for the live gateway config, key pool, and budget tracker.
|
|
3
3
|
*
|
|
4
4
|
* Config loads once at boot, but the dashboard edits it at runtime. Routes read
|
|
5
|
-
* `state.config` / `state.pool` / `state.
|
|
5
|
+
* `state.config` / `state.pool` / `state.budget` fresh per request (never close
|
|
6
6
|
* over them), so a successful reload swaps in the new config + pool atomically —
|
|
7
7
|
* no restart.
|
|
8
8
|
*
|
|
9
9
|
* reload() validates and persists BEFORE swapping: an invalid edit throws and
|
|
10
|
-
* the old config keeps serving. The pool is rebuilt (cooldown is transient)
|
|
11
|
-
* the quota tracker is KEPT across reloads — a budget consumed this window must
|
|
12
|
-
* survive a config edit, else editing config would silently reset every quota.
|
|
10
|
+
* the old config keeps serving. The pool is rebuilt (cooldown is transient).
|
|
13
11
|
*/
|
|
14
12
|
import {
|
|
15
13
|
GatewayConfig,
|
|
@@ -21,7 +19,6 @@ import {
|
|
|
21
19
|
} from "../config.js";
|
|
22
20
|
import { clientKeyFingerprint } from "../middleware/auth.js";
|
|
23
21
|
import { KeyPool } from "./keypool.js";
|
|
24
|
-
import { QuotaTracker } from "./quota.js";
|
|
25
22
|
import { BudgetTracker } from "./budget.js";
|
|
26
23
|
|
|
27
24
|
function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string, string> }, fp: string): string {
|
|
@@ -34,18 +31,15 @@ function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string,
|
|
|
34
31
|
export class GatewayState {
|
|
35
32
|
private _config: GatewayConfig;
|
|
36
33
|
private _pool: KeyPool;
|
|
37
|
-
private readonly _quota: QuotaTracker;
|
|
38
34
|
private readonly _budget: BudgetTracker;
|
|
39
35
|
|
|
40
36
|
constructor(
|
|
41
37
|
private readonly configPath: string,
|
|
42
38
|
initial: GatewayConfig,
|
|
43
|
-
quota?: QuotaTracker,
|
|
44
39
|
budgetDb?: { totals(since: number, filter?: { provider?: string; model?: string; client_key?: string }): { tokens_in: number; tokens_out: number; cost: number } },
|
|
45
40
|
) {
|
|
46
41
|
this._config = initial;
|
|
47
42
|
this._pool = new KeyPool();
|
|
48
|
-
this._quota = quota ?? new QuotaTracker();
|
|
49
43
|
this._budget = new BudgetTracker(
|
|
50
44
|
() => this._config.raw.budgets,
|
|
51
45
|
budgetDb ?? { totals: () => ({ tokens_in: 0, tokens_out: 0, cost: 0 }) },
|
|
@@ -63,10 +57,6 @@ export class GatewayState {
|
|
|
63
57
|
return this._pool;
|
|
64
58
|
}
|
|
65
59
|
|
|
66
|
-
get quota(): QuotaTracker {
|
|
67
|
-
return this._quota;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
60
|
get budget(): BudgetTracker {
|
|
71
61
|
return this._budget;
|
|
72
62
|
}
|
|
@@ -75,7 +65,7 @@ export class GatewayState {
|
|
|
75
65
|
* Validate edited config text, restore masked secrets from the live config,
|
|
76
66
|
* persist atomically, then swap in a fresh config + pool. Throws without
|
|
77
67
|
* changing anything if validation fails or a masked key can't be resolved —
|
|
78
|
-
* the old config keeps serving.
|
|
68
|
+
* the old config keeps serving.
|
|
79
69
|
*/
|
|
80
70
|
reload(text: string): void {
|
|
81
71
|
const parsed = parseConfigText(text);
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rolling-window engine: every budget window is a fixed-duration tumbling bucket
|
|
3
|
+
* aligned to the epoch grid (no calendar/timezone math). `5h` resets every five
|
|
4
|
+
* hours, `24h` daily, `7day` weekly, `30day` monthly — each on a rolling grid
|
|
5
|
+
* rather than a calendar boundary. Shared by the budget tracker.
|
|
6
|
+
*/
|
|
7
|
+
const HOUR_MS = 3600_000;
|
|
8
|
+
const DAY_MS = 24 * HOUR_MS;
|
|
9
|
+
|
|
10
|
+
export type WindowName = "5h" | "24h" | "7day" | "30day";
|
|
11
|
+
|
|
12
|
+
export type WindowSpec = {
|
|
13
|
+
window: WindowName;
|
|
14
|
+
/** Epoch ms the recurring cycle is anchored to. Absent ⇒ epoch-grid (legacy). */
|
|
15
|
+
anchor?: number;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const DURATION_MS: Record<WindowName, number> = {
|
|
19
|
+
"5h": 5 * HOUR_MS,
|
|
20
|
+
"24h": 24 * HOUR_MS,
|
|
21
|
+
"7day": 7 * DAY_MS,
|
|
22
|
+
"30day": 30 * DAY_MS,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/** Length (ms) of one window bucket. */
|
|
26
|
+
export function windowDuration(spec: WindowSpec): number {
|
|
27
|
+
return DURATION_MS[spec.window];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Epoch ms of the START of the bucket containing `now`. Anchored to `spec.anchor`
|
|
31
|
+
* when present (cycles tumble from the anchor); otherwise floored to the epoch grid. */
|
|
32
|
+
export function currentWindowStart(spec: WindowSpec, now: number): number {
|
|
33
|
+
const dur = DURATION_MS[spec.window];
|
|
34
|
+
if (spec.anchor === undefined) return Math.floor(now / dur) * dur;
|
|
35
|
+
if (now <= spec.anchor) return spec.anchor;
|
|
36
|
+
return spec.anchor + Math.floor((now - spec.anchor) / dur) * dur;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Next reset instant: the end of the current bucket (windowStart + duration). */
|
|
40
|
+
export function nextResetAt(spec: WindowSpec, windowStart: number, _now: number): number {
|
|
41
|
+
return windowStart + DURATION_MS[spec.window];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// `DAY_MS` is exported for any future window math that needs a day constant.
|
|
45
|
+
export { DAY_MS };
|
package/src/db.ts
CHANGED
|
@@ -67,7 +67,6 @@ export class UsageDB {
|
|
|
67
67
|
private readonly db: DatabaseSync;
|
|
68
68
|
private readonly insertUsage;
|
|
69
69
|
private readonly insertLog;
|
|
70
|
-
private readonly upsertQuota;
|
|
71
70
|
private readonly now: () => number;
|
|
72
71
|
|
|
73
72
|
constructor(path: string, now: () => number = Date.now) {
|
|
@@ -121,13 +120,6 @@ export class UsageDB {
|
|
|
121
120
|
INSERT INTO logs (ts, direction, provider, status, request_summary, response_summary)
|
|
122
121
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
123
122
|
`);
|
|
124
|
-
// upsert keyed on provider_id so each provider keeps one live window row.
|
|
125
|
-
this.upsertQuota = this.db.prepare(`
|
|
126
|
-
INSERT INTO quota_state (provider_id, window_start, consumed, last_reset)
|
|
127
|
-
VALUES (?, ?, ?, ?)
|
|
128
|
-
ON CONFLICT(provider_id) DO UPDATE SET window_start = excluded.window_start,
|
|
129
|
-
consumed = excluded.consumed, last_reset = excluded.last_reset
|
|
130
|
-
`);
|
|
131
123
|
}
|
|
132
124
|
|
|
133
125
|
record(row: Omit<UsageRow, "ts" | "client_key"> & { ts?: number; client_key?: string }): void {
|
|
@@ -297,21 +289,6 @@ export class UsageDB {
|
|
|
297
289
|
}));
|
|
298
290
|
}
|
|
299
291
|
|
|
300
|
-
// ---- QuotaStore: one live window row per provider (survives restart) ----
|
|
301
|
-
|
|
302
|
-
loadQuota(): Array<{ provider_id: string; window_start: number; consumed: number }> {
|
|
303
|
-
const rows = this.db.prepare(`SELECT provider_id, window_start, consumed FROM quota_state`).all() as SqlRow[];
|
|
304
|
-
return rows.map((r) => ({
|
|
305
|
-
provider_id: String(r.provider_id),
|
|
306
|
-
window_start: num(r.window_start),
|
|
307
|
-
consumed: num(r.consumed),
|
|
308
|
-
}));
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
saveQuota(providerId: string, windowStart: number, consumed: number): void {
|
|
312
|
-
this.upsertQuota.run(providerId, windowStart, consumed, this.now());
|
|
313
|
-
}
|
|
314
|
-
|
|
315
292
|
close(): void {
|
|
316
293
|
this.db.close();
|
|
317
294
|
}
|
package/src/routes/admin.ts
CHANGED
|
@@ -16,6 +16,7 @@ import type { FastifyInstance, FastifyRequest, FastifyReply } from "fastify";
|
|
|
16
16
|
import type { GatewayState } from "../core/state.js";
|
|
17
17
|
import type { UsageDB } from "../db.js";
|
|
18
18
|
import { checkAdminAuth, clientKeyFingerprint, type AdminVerifier } from "../middleware/auth.js";
|
|
19
|
+
import { buildKeyUsageRow } from "../core/keysUsage.js";
|
|
19
20
|
import {
|
|
20
21
|
maskKey,
|
|
21
22
|
serializeConfig,
|
|
@@ -44,6 +45,7 @@ import {
|
|
|
44
45
|
addServerKey,
|
|
45
46
|
editServerKey,
|
|
46
47
|
removeServerKey,
|
|
48
|
+
setServerKeyScope,
|
|
47
49
|
setBudget,
|
|
48
50
|
clearBudget,
|
|
49
51
|
type Config,
|
|
@@ -87,6 +89,23 @@ function maskedConfig(config: Config): Config {
|
|
|
87
89
|
Object.entries(clone.server.key_names).map(([k, name]) => [maskKey(k), name]),
|
|
88
90
|
);
|
|
89
91
|
}
|
|
92
|
+
// key_models / key_rpm are keyed by the RAW key — re-key to the masked form so
|
|
93
|
+
// real keys never leak through /admin/config.
|
|
94
|
+
if (clone.server.key_models) {
|
|
95
|
+
clone.server.key_models = Object.fromEntries(
|
|
96
|
+
Object.entries(clone.server.key_models).map(([k, v]) => [maskKey(k), v]),
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
if (clone.server.key_rpm) {
|
|
100
|
+
clone.server.key_rpm = Object.fromEntries(
|
|
101
|
+
Object.entries(clone.server.key_rpm).map(([k, v]) => [maskKey(k), v]),
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
if (clone.server.key_expires) {
|
|
105
|
+
clone.server.key_expires = Object.fromEntries(
|
|
106
|
+
Object.entries(clone.server.key_expires).map(([k, v]) => [maskKey(k), v]),
|
|
107
|
+
);
|
|
108
|
+
}
|
|
90
109
|
return clone;
|
|
91
110
|
}
|
|
92
111
|
|
|
@@ -143,12 +162,9 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
143
162
|
reply.send({ providers: deps.state.pool.snapshot(deps.state.config.listProviders()) });
|
|
144
163
|
});
|
|
145
164
|
|
|
146
|
-
//
|
|
147
|
-
app.get("/admin/
|
|
148
|
-
reply.send({
|
|
149
|
-
quota: deps.state.quota.snapshot(deps.state.config.listProviders()),
|
|
150
|
-
budgets: deps.state.budget.statuses(),
|
|
151
|
-
});
|
|
165
|
+
// budget statuses: consumed, limit, and ms until the next scheduled reset.
|
|
166
|
+
app.get("/admin/budgets", requireAdmin, (_req, reply) => {
|
|
167
|
+
reply.send({ budgets: deps.state.budget.statuses() });
|
|
152
168
|
});
|
|
153
169
|
|
|
154
170
|
// add or replace a budget (keyed by scope). Body = Budget; invalid shape or an
|
|
@@ -403,7 +419,7 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
403
419
|
});
|
|
404
420
|
|
|
405
421
|
// Test ONE model end-to-end (aigetwey's per-model science button). Routes through
|
|
406
|
-
// the real pipeline via handle(), so the ping lands in usage
|
|
422
|
+
// the real pipeline via handle(), so the ping lands in usage exactly like
|
|
407
423
|
// a normal call — and it catches "model not found / not entitled" a /models
|
|
408
424
|
// ping can't. Model id travels as ?model= to survive slashes through the proxy.
|
|
409
425
|
app.post("/admin/providers/:id/models/test", requireAdmin, async (req, reply) => {
|
|
@@ -414,7 +430,7 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
414
430
|
if (!provider) return reply.code(404).send({ error: `provider "${id}" not found` });
|
|
415
431
|
try {
|
|
416
432
|
await handle(
|
|
417
|
-
{ config: deps.state.config, pool: deps.state.pool, db: deps.db
|
|
433
|
+
{ config: deps.state.config, pool: deps.state.pool, db: deps.db },
|
|
418
434
|
"openai",
|
|
419
435
|
{ model: `${id}/${modelId}`, messages: [{ role: "user", content: "ping" }], max_tokens: 1, stream: false },
|
|
420
436
|
);
|
|
@@ -564,6 +580,15 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
564
580
|
applyMutation(reply, (c) => editServerKey(c, i, { name: b?.name }));
|
|
565
581
|
});
|
|
566
582
|
|
|
583
|
+
// set/clear ONE gateway key's scopes (model allowlist + rpm), by index.
|
|
584
|
+
app.put("/admin/endpoint/keys/:index/scope", requireAdmin, (req, reply) => {
|
|
585
|
+
const { index } = req.params as { index: string };
|
|
586
|
+
const i = Number(index);
|
|
587
|
+
if (!Number.isInteger(i)) return reply.code(400).send({ error: "index must be an integer" });
|
|
588
|
+
const b = (req.body ?? {}) as { models?: string[]; rpm?: number | null; expires?: number | null };
|
|
589
|
+
applyMutation(reply, (c) => setServerKeyScope(c, i, { models: b.models, rpm: b.rpm, expires: b.expires }));
|
|
590
|
+
});
|
|
591
|
+
|
|
567
592
|
app.delete("/admin/endpoint/keys/:index", requireAdmin, (req, reply) => {
|
|
568
593
|
const { index } = req.params as { index: string };
|
|
569
594
|
const i = Number(index);
|
|
@@ -584,6 +609,26 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
584
609
|
);
|
|
585
610
|
});
|
|
586
611
|
|
|
612
|
+
// per-key spend for the Budgets page "Keys" section: every gateway key, its
|
|
613
|
+
// all-time usage, expiry, and key-scoped budget status (null when uncapped).
|
|
614
|
+
app.get("/admin/keys/usage", requireAdmin, (_req, reply) => {
|
|
615
|
+
if (!deps.db) return reply.code(503).send({ error: "usage tracking disabled" });
|
|
616
|
+
const cfg = deps.state.config.raw;
|
|
617
|
+
const statuses = deps.state.budget.statuses();
|
|
618
|
+
const keys = cfg.server.api_keys.map((k) => {
|
|
619
|
+
const fp = clientKeyFingerprint(k);
|
|
620
|
+
return buildKeyUsageRow({
|
|
621
|
+
fingerprint: fp,
|
|
622
|
+
name: cfg.server.key_names?.[k] ?? maskKey(k),
|
|
623
|
+
masked: maskKey(k),
|
|
624
|
+
expires: cfg.server.key_expires?.[k],
|
|
625
|
+
totals: deps.db!.totals(0, { client_key: fp }),
|
|
626
|
+
budget: statuses.find((s) => s.scope.type === "key" && s.scope.id === fp) ?? null,
|
|
627
|
+
});
|
|
628
|
+
});
|
|
629
|
+
reply.send({ keys });
|
|
630
|
+
});
|
|
631
|
+
|
|
587
632
|
// reveal ONE raw gateway key (the "show key" button on the Endpoint page).
|
|
588
633
|
app.get("/admin/endpoint/keys/:index/reveal", requireAdmin, (req, reply) => {
|
|
589
634
|
const { index } = req.params as { index: string };
|
|
@@ -749,6 +794,13 @@ function endpointPayload(config: Config) {
|
|
|
749
794
|
caveman: config.endpoint.caveman,
|
|
750
795
|
ponytail: config.endpoint.ponytail,
|
|
751
796
|
headroom: config.endpoint.headroom,
|
|
752
|
-
keys: config.server.api_keys.map((k) => ({
|
|
797
|
+
keys: config.server.api_keys.map((k) => ({
|
|
798
|
+
key: maskKey(k),
|
|
799
|
+
fingerprint: clientKeyFingerprint(k),
|
|
800
|
+
name: config.server.key_names?.[k],
|
|
801
|
+
models: config.server.key_models?.[k],
|
|
802
|
+
rpm: config.server.key_rpm?.[k],
|
|
803
|
+
expires: config.server.key_expires?.[k],
|
|
804
|
+
})),
|
|
753
805
|
};
|
|
754
806
|
}
|