aigetwey 1.2.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -1
- package/README.md +30 -7
- package/assets/screenshot.png +0 -0
- package/config.example.yaml +0 -1
- package/dashboard/src/app/(console)/quota/page.tsx +2 -2
- package/dashboard/src/app/layout.tsx +3 -2
- package/dashboard/src/components/BudgetForm.tsx +15 -17
- package/dashboard/src/components/{QuotaView.tsx → BudgetTracker.tsx} +71 -56
- package/dashboard/src/components/CooldownTimer.tsx +1 -1
- package/dashboard/src/components/EndpointView.tsx +255 -47
- package/dashboard/src/components/LogTable.tsx +36 -26
- package/dashboard/src/components/ProviderManager.tsx +3 -28
- package/dashboard/src/components/Rail.tsx +1 -1
- package/dashboard/src/components/RoutingView.tsx +6 -2
- package/dashboard/src/components/TopBar.tsx +1 -1
- package/dashboard/src/components/ui.tsx +6 -1
- package/dashboard/src/lib/client.ts +6 -5
- package/dashboard/src/lib/gateway.ts +24 -16
- package/dist/adapters/gemini.js +1 -0
- package/dist/adapters/gemini.js.map +1 -1
- package/dist/adapters/openai.js +13 -1
- package/dist/adapters/openai.js.map +1 -1
- package/dist/config.js +86 -23
- package/dist/config.js.map +1 -1
- package/dist/core/budget.js +1 -1
- package/dist/core/budget.js.map +1 -1
- package/dist/core/fallback.js +0 -6
- package/dist/core/fallback.js.map +1 -1
- package/dist/core/handler.js +13 -7
- package/dist/core/handler.js.map +1 -1
- package/dist/core/keysUsage.js +15 -0
- package/dist/core/keysUsage.js.map +1 -0
- package/dist/core/ratelimit.js +15 -0
- package/dist/core/ratelimit.js.map +1 -0
- package/dist/core/state.js +5 -13
- package/dist/core/state.js.map +1 -1
- package/dist/core/window.js +35 -0
- package/dist/core/window.js.map +1 -0
- package/dist/db.js +34 -29
- package/dist/db.js.map +1 -1
- package/dist/routes/admin.js +55 -10
- package/dist/routes/admin.js.map +1 -1
- package/dist/routes/v1.js +14 -1
- package/dist/routes/v1.js.map +1 -1
- package/dist/server.js +1 -7
- package/dist/server.js.map +1 -1
- package/dist/stream/anthropic-stream.js +7 -0
- package/dist/stream/anthropic-stream.js.map +1 -1
- package/dist/stream/gemini-stream.js +2 -1
- package/dist/stream/gemini-stream.js.map +1 -1
- package/dist/stream/openai-stream.js +10 -0
- package/dist/stream/openai-stream.js.map +1 -1
- package/package.json +1 -1
- package/src/adapters/gemini.ts +2 -0
- package/src/adapters/openai.ts +18 -1
- package/src/config.ts +89 -23
- package/src/core/budget.ts +1 -1
- package/src/core/fallback.ts +0 -9
- package/src/core/handler.ts +16 -9
- package/src/core/keysUsage.ts +49 -0
- package/src/core/ratelimit.ts +25 -0
- package/src/core/state.ts +4 -14
- package/src/core/window.ts +45 -0
- package/src/db.ts +35 -31
- package/src/routes/admin.ts +61 -9
- package/src/routes/v1.ts +18 -1
- package/src/server.ts +1 -8
- package/src/stream/anthropic-stream.ts +10 -1
- package/src/stream/chunk.ts +2 -0
- package/src/stream/gemini-stream.ts +3 -2
- package/src/stream/openai-stream.ts +12 -1
- package/src/core/quota.ts +0 -253
package/src/config.ts
CHANGED
|
@@ -18,21 +18,9 @@ export { clientKeyFingerprint } from "./middleware/auth.js";
|
|
|
18
18
|
// Shape differs from a flat OpenAI gateway: routing lives in a top-level
|
|
19
19
|
// `models[]` layer (alias -> provider chain), the endpoint block carries the
|
|
20
20
|
// token-saver toggles, and providers may be free passthroughs or service-account
|
|
21
|
-
// backed. The handler/keypool
|
|
21
|
+
// backed. The handler/keypool phases read these fields; defining the full
|
|
22
22
|
// shape up front avoids reshaping config across later phases.
|
|
23
23
|
|
|
24
|
-
/** Token quota window for a provider — drives the dashboard reset countdown. */
|
|
25
|
-
const QuotaSchema = z.object({
|
|
26
|
-
window: z.enum(["5h", "daily", "weekly", "monthly"]),
|
|
27
|
-
// daily: "HH:MM" local reset; weekly: weekday name ("monday"); others: ignored.
|
|
28
|
-
reset_at: z.string().optional(),
|
|
29
|
-
timezone: z.string().default("UTC"),
|
|
30
|
-
// optional ceiling for a progress bar; quota tracking works without it.
|
|
31
|
-
limit_tokens: z.number().int().positive().optional(),
|
|
32
|
-
// soft-alert threshold (0..1); UI flags the quota when pct >= this. Default 0.8.
|
|
33
|
-
alert_at: z.number().gt(0).lte(1).optional(),
|
|
34
|
-
});
|
|
35
|
-
|
|
36
24
|
const ProviderModelSchema = z.object({
|
|
37
25
|
id: z.string().min(1),
|
|
38
26
|
price_in: z.number().nonnegative().optional(),
|
|
@@ -59,7 +47,6 @@ const ProviderSchema = z
|
|
|
59
47
|
service_account: z.string().optional(),
|
|
60
48
|
models: z.array(ProviderModelSchema).default([]),
|
|
61
49
|
headers: z.record(z.string()).optional(),
|
|
62
|
-
quota: QuotaSchema.optional(),
|
|
63
50
|
// when true the provider is skipped in routing (kept in config, like a key's
|
|
64
51
|
// disabled state but for the whole provider).
|
|
65
52
|
disabled: z.boolean().optional(),
|
|
@@ -120,14 +107,20 @@ const ServerSchema = z
|
|
|
120
107
|
// optional friendly label per key, keyed by the key itself. Kept separate so
|
|
121
108
|
// api_keys stays a plain string[] (auth/masking paths untouched).
|
|
122
109
|
key_names: z.record(z.string()).optional(),
|
|
110
|
+
// per-key model allowlist (call-strings) + rate limit (req/min), keyed by the
|
|
111
|
+
// raw key like key_names. Absent → unrestricted / unlimited.
|
|
112
|
+
key_models: z.record(z.array(z.string().min(1))).optional(),
|
|
113
|
+
key_rpm: z.record(z.number().int().positive()).optional(),
|
|
114
|
+
// per-key access expiry, epoch ms, keyed by the RAW key. Absent → never expires.
|
|
115
|
+
key_expires: z.record(z.number().int().positive()).optional(),
|
|
123
116
|
})
|
|
124
117
|
.default({ host: "127.0.0.1", port: 18080, api_keys: [] });
|
|
125
118
|
|
|
126
119
|
/**
|
|
127
120
|
* A spend budget scoped to the whole gateway, one provider, or one upstream
|
|
128
121
|
* model. unit picks what `limit` means — USD cost or total tokens. Soft-alert at
|
|
129
|
-
* alert_at (default 0.8), hard-stop at 100%.
|
|
130
|
-
*
|
|
122
|
+
* alert_at (default 0.8), hard-stop at 100%. Each window is a rolling tumbling
|
|
123
|
+
* bucket on the epoch grid (window.ts). Opt-in: omit / empty list to disable.
|
|
131
124
|
*/
|
|
132
125
|
const BudgetScopeSchema = z.discriminatedUnion("type", [
|
|
133
126
|
z.object({ type: z.literal("global") }),
|
|
@@ -136,13 +129,21 @@ const BudgetScopeSchema = z.discriminatedUnion("type", [
|
|
|
136
129
|
z.object({ type: z.literal("key"), id: z.string().min(1) }),
|
|
137
130
|
]);
|
|
138
131
|
|
|
132
|
+
// rolling windows replaced the old calendar windows; coerce any legacy value so
|
|
133
|
+
// existing config.yaml budgets keep loading (daily→24h, weekly→7day, monthly→30day).
|
|
134
|
+
const LEGACY_WINDOW: Record<string, string> = { daily: "24h", weekly: "7day", monthly: "30day" };
|
|
135
|
+
const WindowSchema = z.preprocess(
|
|
136
|
+
(v) => (typeof v === "string" && v in LEGACY_WINDOW ? LEGACY_WINDOW[v] : v),
|
|
137
|
+
z.enum(["5h", "24h", "7day", "30day"]),
|
|
138
|
+
);
|
|
139
|
+
|
|
139
140
|
const BudgetSchema = z.object({
|
|
140
141
|
scope: BudgetScopeSchema,
|
|
141
142
|
unit: z.enum(["usd", "tokens"]),
|
|
142
143
|
limit: z.number().positive(),
|
|
143
|
-
window:
|
|
144
|
-
|
|
145
|
-
|
|
144
|
+
window: WindowSchema,
|
|
145
|
+
// epoch ms the recurring cycle is anchored to; stamped by setBudget on create.
|
|
146
|
+
anchor: z.number().int().nonnegative().optional(),
|
|
146
147
|
alert_at: z.number().gt(0).lte(1).optional(),
|
|
147
148
|
// optional free-text label so an operator remembers what a budget is for.
|
|
148
149
|
note: z.string().max(200).optional(),
|
|
@@ -157,7 +158,6 @@ const ConfigSchema = z.object({
|
|
|
157
158
|
budgets: z.array(BudgetSchema).default([]),
|
|
158
159
|
});
|
|
159
160
|
|
|
160
|
-
export type Quota = z.infer<typeof QuotaSchema>;
|
|
161
161
|
export type ProviderModel = z.infer<typeof ProviderModelSchema>;
|
|
162
162
|
export type Provider = z.infer<typeof ProviderSchema>;
|
|
163
163
|
export type ModelRoute = z.infer<typeof ModelRouteSchema>;
|
|
@@ -790,7 +790,7 @@ export function budgetKey(scope: BudgetScope): string {
|
|
|
790
790
|
}
|
|
791
791
|
|
|
792
792
|
/** Add a budget, or replace the existing one with the same scope key. */
|
|
793
|
-
export function setBudget(config: Config, budget: Budget): Config {
|
|
793
|
+
export function setBudget(config: Config, budget: Budget, now: number = Date.now()): Config {
|
|
794
794
|
if (budget.scope.type === "provider") {
|
|
795
795
|
const { id } = budget.scope;
|
|
796
796
|
if (!config.providers.some((p) => p.id === id)) {
|
|
@@ -806,8 +806,16 @@ export function setBudget(config: Config, budget: Budget): Config {
|
|
|
806
806
|
const next = cloneConfig(config);
|
|
807
807
|
const key = budgetKey(budget.scope);
|
|
808
808
|
const idx = next.budgets.findIndex((b) => budgetKey(b.scope) === key);
|
|
809
|
-
if (idx === -1)
|
|
810
|
-
|
|
809
|
+
if (idx === -1) {
|
|
810
|
+
next.budgets.push({ ...budget, anchor: budget.anchor ?? now });
|
|
811
|
+
} else {
|
|
812
|
+
const prev = next.budgets[idx]!;
|
|
813
|
+
// keep the running cycle on edit (preserve prev anchor as-is, including a
|
|
814
|
+
// legacy undefined = epoch grid, so editing a limit never resets spend);
|
|
815
|
+
// start a fresh cycle only when the window length actually changed.
|
|
816
|
+
const anchor = budget.anchor ?? (prev.window === budget.window ? prev.anchor : now);
|
|
817
|
+
next.budgets[idx] = { ...budget, anchor };
|
|
818
|
+
}
|
|
811
819
|
return next;
|
|
812
820
|
}
|
|
813
821
|
|
|
@@ -854,5 +862,63 @@ export function removeServerKey(config: Config, index: number): Config {
|
|
|
854
862
|
if (removed && next.server.key_names && removed in next.server.key_names) {
|
|
855
863
|
delete next.server.key_names[removed];
|
|
856
864
|
}
|
|
865
|
+
if (removed && next.server.key_models && removed in next.server.key_models) {
|
|
866
|
+
delete next.server.key_models[removed];
|
|
867
|
+
if (Object.keys(next.server.key_models).length === 0) next.server.key_models = undefined;
|
|
868
|
+
}
|
|
869
|
+
if (removed && next.server.key_rpm && removed in next.server.key_rpm) {
|
|
870
|
+
delete next.server.key_rpm[removed];
|
|
871
|
+
if (Object.keys(next.server.key_rpm).length === 0) next.server.key_rpm = undefined;
|
|
872
|
+
}
|
|
873
|
+
if (removed && next.server.key_expires && removed in next.server.key_expires) {
|
|
874
|
+
delete next.server.key_expires[removed];
|
|
875
|
+
if (Object.keys(next.server.key_expires).length === 0) next.server.key_expires = undefined;
|
|
876
|
+
}
|
|
877
|
+
return next;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
/**
|
|
881
|
+
* Set or clear a gateway key's scopes (by index, since keys are masked in the
|
|
882
|
+
* API). `models`/`rpm` are each applied only when present in the patch; an empty
|
|
883
|
+
* list or null/0 clears that scope. Empty maps are pruned to undefined.
|
|
884
|
+
*/
|
|
885
|
+
export function setServerKeyScope(
|
|
886
|
+
config: Config,
|
|
887
|
+
index: number,
|
|
888
|
+
patch: { models?: string[] | null; rpm?: number | null; expires?: number | null },
|
|
889
|
+
): Config {
|
|
890
|
+
const next = cloneConfig(config);
|
|
891
|
+
const keys = next.server.api_keys;
|
|
892
|
+
if (index < 0 || index >= keys.length) throw new Error(`no gateway key at index ${index}`);
|
|
893
|
+
const key = keys[index]!;
|
|
894
|
+
|
|
895
|
+
if (patch.models !== undefined) {
|
|
896
|
+
const models = { ...(next.server.key_models ?? {}) };
|
|
897
|
+
const list = (patch.models ?? []).map((m) => m.trim()).filter(Boolean);
|
|
898
|
+
if (list.length > 0) models[key] = list;
|
|
899
|
+
else delete models[key];
|
|
900
|
+
next.server.key_models = Object.keys(models).length > 0 ? models : undefined;
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
if (patch.rpm !== undefined) {
|
|
904
|
+
const rpm = { ...(next.server.key_rpm ?? {}) };
|
|
905
|
+
if (patch.rpm && patch.rpm > 0) rpm[key] = Math.floor(patch.rpm);
|
|
906
|
+
else delete rpm[key];
|
|
907
|
+
next.server.key_rpm = Object.keys(rpm).length > 0 ? rpm : undefined;
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
if (patch.expires !== undefined) {
|
|
911
|
+
const exp = { ...(next.server.key_expires ?? {}) };
|
|
912
|
+
if (patch.expires && patch.expires > 0) exp[key] = Math.floor(patch.expires);
|
|
913
|
+
else delete exp[key];
|
|
914
|
+
next.server.key_expires = Object.keys(exp).length > 0 ? exp : undefined;
|
|
915
|
+
}
|
|
916
|
+
|
|
857
917
|
return next;
|
|
858
918
|
}
|
|
919
|
+
|
|
920
|
+
/** True when `rawKey` has an expiry set and `now` is strictly past it. */
|
|
921
|
+
export function isKeyExpired(server: Config["server"], rawKey: string, now: number): boolean {
|
|
922
|
+
const at = server.key_expires?.[rawKey];
|
|
923
|
+
return at !== undefined && now > at;
|
|
924
|
+
}
|
package/src/core/budget.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
import type { Budget, BudgetScope } from "../config.js";
|
|
10
10
|
import { budgetKey } from "../config.js";
|
|
11
|
-
import { currentWindowStart, nextResetAt } from "./
|
|
11
|
+
import { currentWindowStart, nextResetAt } from "./window.js";
|
|
12
12
|
|
|
13
13
|
export interface BudgetStatus {
|
|
14
14
|
scope: BudgetScope;
|
package/src/core/fallback.ts
CHANGED
|
@@ -32,8 +32,6 @@ export interface FallbackOpts {
|
|
|
32
32
|
onAttempt?: (log: AttemptLog) => void;
|
|
33
33
|
/** which key the pool handed out for the winning attempt (handler uses it for usage). */
|
|
34
34
|
onServed?: (route: ResolvedRoute, key: string) => void;
|
|
35
|
-
/** when set, a provider this returns true for is skipped (quota exhausted). */
|
|
36
|
-
isExhausted?: (provider: ResolvedRoute["provider"]) => boolean;
|
|
37
35
|
/** captured client thinking intent, applied per-attempt in the provider's format. */
|
|
38
36
|
thinkingIntent?: ThinkingConfig | null;
|
|
39
37
|
}
|
|
@@ -56,13 +54,6 @@ export async function executeWithFallback(
|
|
|
56
54
|
for (const route of routes) {
|
|
57
55
|
const { provider } = route;
|
|
58
56
|
|
|
59
|
-
// skip a provider whose token budget is spent for this window — like a key
|
|
60
|
-
// cooling down, but for the whole provider. Falls through to the next route.
|
|
61
|
-
if (opts.isExhausted?.(provider)) {
|
|
62
|
-
log({ provider: provider.id, model: route.model, outcome: "skip", detail: "quota exhausted" });
|
|
63
|
-
continue;
|
|
64
|
-
}
|
|
65
|
-
|
|
66
57
|
const attempts = provider.max_retries + 1;
|
|
67
58
|
|
|
68
59
|
for (let i = 0; i < attempts; i++) {
|
package/src/core/handler.ts
CHANGED
|
@@ -20,7 +20,6 @@ import { parseSSE, encodeSSE } from "../stream/sse.js";
|
|
|
20
20
|
import { streamAdapterFor } from "../stream/index.js";
|
|
21
21
|
import type { CanonicalChunk } from "../stream/chunk.js";
|
|
22
22
|
import type { KeyPool } from "./keypool.js";
|
|
23
|
-
import type { QuotaTracker } from "./quota.js";
|
|
24
23
|
import { executeWithFallback } from "./fallback.js";
|
|
25
24
|
import { type UsageDB, computeCost } from "../db.js";
|
|
26
25
|
import { compressMessages } from "../rtk/index.js";
|
|
@@ -50,12 +49,12 @@ export interface HandleDeps {
|
|
|
50
49
|
config: GatewayConfig;
|
|
51
50
|
pool: KeyPool;
|
|
52
51
|
db?: UsageDB;
|
|
53
|
-
quota?: QuotaTracker;
|
|
54
52
|
budget?: {
|
|
55
53
|
globalStatus(): { exhausted: boolean; reset_in_ms: number } | null;
|
|
56
54
|
blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null;
|
|
57
55
|
blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null;
|
|
58
56
|
};
|
|
57
|
+
clientKeyModels?: string[];
|
|
59
58
|
clientKeyFp?: string;
|
|
60
59
|
log?: (msg: string) => void;
|
|
61
60
|
now?: () => number;
|
|
@@ -71,22 +70,25 @@ function recordUsage(
|
|
|
71
70
|
): void {
|
|
72
71
|
const tokensIn = usage?.prompt_tokens ?? 0;
|
|
73
72
|
const tokensOut = usage?.completion_tokens ?? 0;
|
|
74
|
-
|
|
75
|
-
|
|
73
|
+
const reasoningTokens = usage?.reasoning_tokens ?? 0;
|
|
74
|
+
const cachedTokens = usage?.cached_tokens ?? 0;
|
|
76
75
|
if (!deps.db) return;
|
|
77
76
|
// Cost: a combo/route may set explicit prices; otherwise fall back to the ported
|
|
78
77
|
// aigetwey pricing table so cost auto-resolves per model instead of showing $0.
|
|
79
78
|
const pricing = getPricingForModel(route.provider.id, route.model);
|
|
80
79
|
const priceIn = route.price_in ?? pricing?.input;
|
|
81
80
|
const priceOut = route.price_out ?? pricing?.output;
|
|
81
|
+
const priceCachedRead = pricing?.cached;
|
|
82
|
+
const priceReasoning = pricing?.reasoning;
|
|
82
83
|
deps.db.record({
|
|
83
84
|
alias: route.alias,
|
|
84
85
|
provider: route.provider.id,
|
|
85
86
|
model: route.model,
|
|
86
87
|
tokens_in: tokensIn,
|
|
87
88
|
tokens_out: tokensOut,
|
|
88
|
-
|
|
89
|
-
|
|
89
|
+
reasoning_tokens: reasoningTokens,
|
|
90
|
+
cached_tokens: cachedTokens,
|
|
91
|
+
cost: computeCost(tokensIn, tokensOut, priceIn, priceOut, priceReasoning, priceCachedRead, cachedTokens, reasoningTokens),
|
|
90
92
|
status,
|
|
91
93
|
latency_ms: latencyMs,
|
|
92
94
|
stream: stream ? 1 : 0,
|
|
@@ -118,6 +120,13 @@ export async function handle(
|
|
|
118
120
|
// that can't reason. Matches aigetwey's capture-before-translate flow.
|
|
119
121
|
const { cleanModel, override } = parseSuffix(canonical.model);
|
|
120
122
|
canonical.model = cleanModel;
|
|
123
|
+
|
|
124
|
+
// per-key allowlist: a key may be restricted to specific call-strings. Empty/
|
|
125
|
+
// absent → unrestricted. Match the literal clean model the client asked for.
|
|
126
|
+
if (deps.clientKeyModels && deps.clientKeyModels.length > 0 && !deps.clientKeyModels.includes(cleanModel)) {
|
|
127
|
+
throw new GatewayError(403, { error: "model not allowed for this key" });
|
|
128
|
+
}
|
|
129
|
+
|
|
121
130
|
const thinkingIntent: ThinkingConfig | null =
|
|
122
131
|
override ?? captureThinking(canonical as Record<string, unknown>);
|
|
123
132
|
|
|
@@ -127,8 +136,7 @@ export async function handle(
|
|
|
127
136
|
}
|
|
128
137
|
|
|
129
138
|
// Budget hard-stop. Global overrun fails fast. Provider/model budgets bar the
|
|
130
|
-
// matching routes
|
|
131
|
-
// there's nothing to serve → 402.
|
|
139
|
+
// matching routes; if every candidate is barred, there's nothing to serve → 402.
|
|
132
140
|
if (deps.budget) {
|
|
133
141
|
const g = deps.budget.globalStatus();
|
|
134
142
|
if (g?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: g.reset_in_ms });
|
|
@@ -193,7 +201,6 @@ export async function handle(
|
|
|
193
201
|
stream: wantStream,
|
|
194
202
|
signal,
|
|
195
203
|
thinkingIntent,
|
|
196
|
-
isExhausted: deps.quota ? (p) => deps.quota!.isExhausted(p) : undefined,
|
|
197
204
|
onAttempt: (a) =>
|
|
198
205
|
deps.log?.(`[fallback] ${a.provider}/${a.model} ${a.status ?? "-"} -> ${a.outcome}${a.detail ? ` (${a.detail})` : ""}`),
|
|
199
206
|
});
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shapes one row for the Budgets page "Keys" section: a gateway key joined with
|
|
3
|
+
* its all-time spend/tokens, optional expiry, and its key-scoped budget status
|
|
4
|
+
* (null when the key is uncapped). Pure — the admin route feeds it real data.
|
|
5
|
+
*/
|
|
6
|
+
import type { BudgetStatus } from "./budget.js";
|
|
7
|
+
|
|
8
|
+
export interface KeyBudgetView {
|
|
9
|
+
unit: "usd" | "tokens";
|
|
10
|
+
limit: number;
|
|
11
|
+
spent: number;
|
|
12
|
+
pct: number;
|
|
13
|
+
window: BudgetStatus["window"];
|
|
14
|
+
reset_in_ms: number;
|
|
15
|
+
exhausted: boolean;
|
|
16
|
+
alert: boolean;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface KeyUsageRow {
|
|
20
|
+
fingerprint: string;
|
|
21
|
+
name: string;
|
|
22
|
+
masked: string;
|
|
23
|
+
expires?: number;
|
|
24
|
+
spent: number;
|
|
25
|
+
tokens: number;
|
|
26
|
+
budget: KeyBudgetView | null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function buildKeyUsageRow(input: {
|
|
30
|
+
fingerprint: string;
|
|
31
|
+
name: string;
|
|
32
|
+
masked: string;
|
|
33
|
+
expires?: number;
|
|
34
|
+
totals: { tokens_in: number; tokens_out: number; cost: number };
|
|
35
|
+
budget: BudgetStatus | null;
|
|
36
|
+
}): KeyUsageRow {
|
|
37
|
+
const b = input.budget;
|
|
38
|
+
return {
|
|
39
|
+
fingerprint: input.fingerprint,
|
|
40
|
+
name: input.name,
|
|
41
|
+
masked: input.masked,
|
|
42
|
+
expires: input.expires,
|
|
43
|
+
spent: input.totals.cost,
|
|
44
|
+
tokens: input.totals.tokens_in + input.totals.tokens_out,
|
|
45
|
+
budget: b
|
|
46
|
+
? { unit: b.unit, limit: b.limit, spent: b.spent, pct: b.pct, window: b.window, reset_in_ms: b.reset_in_ms, exhausted: b.exhausted, alert: b.alert }
|
|
47
|
+
: null,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-key request counter on a fixed calendar-minute window. In-memory only —
|
|
3
|
+
* counts reset on restart, which is fine for a 1-minute window. Used to rate-limit
|
|
4
|
+
* gateway keys that opt in via server.key_rpm.
|
|
5
|
+
*/
|
|
6
|
+
interface Bucket {
|
|
7
|
+
minute: number;
|
|
8
|
+
count: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export class RateLimiter {
|
|
12
|
+
private readonly buckets = new Map<string, Bucket>();
|
|
13
|
+
|
|
14
|
+
/** Record a hit for `key`; return true if it now EXCEEDS `limit` this minute. */
|
|
15
|
+
over(key: string, limit: number, now: number = Date.now()): boolean {
|
|
16
|
+
const minute = Math.floor(now / 60_000);
|
|
17
|
+
const b = this.buckets.get(key);
|
|
18
|
+
if (!b || b.minute !== minute) {
|
|
19
|
+
this.buckets.set(key, { minute, count: 1 });
|
|
20
|
+
return 1 > limit;
|
|
21
|
+
}
|
|
22
|
+
b.count += 1;
|
|
23
|
+
return b.count > limit;
|
|
24
|
+
}
|
|
25
|
+
}
|
package/src/core/state.ts
CHANGED
|
@@ -1,15 +1,13 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Mutable holder for the live gateway config, key pool, and
|
|
2
|
+
* Mutable holder for the live gateway config, key pool, and budget tracker.
|
|
3
3
|
*
|
|
4
4
|
* Config loads once at boot, but the dashboard edits it at runtime. Routes read
|
|
5
|
-
* `state.config` / `state.pool` / `state.
|
|
5
|
+
* `state.config` / `state.pool` / `state.budget` fresh per request (never close
|
|
6
6
|
* over them), so a successful reload swaps in the new config + pool atomically —
|
|
7
7
|
* no restart.
|
|
8
8
|
*
|
|
9
9
|
* reload() validates and persists BEFORE swapping: an invalid edit throws and
|
|
10
|
-
* the old config keeps serving. The pool is rebuilt (cooldown is transient)
|
|
11
|
-
* the quota tracker is KEPT across reloads — a budget consumed this window must
|
|
12
|
-
* survive a config edit, else editing config would silently reset every quota.
|
|
10
|
+
* the old config keeps serving. The pool is rebuilt (cooldown is transient).
|
|
13
11
|
*/
|
|
14
12
|
import {
|
|
15
13
|
GatewayConfig,
|
|
@@ -21,7 +19,6 @@ import {
|
|
|
21
19
|
} from "../config.js";
|
|
22
20
|
import { clientKeyFingerprint } from "../middleware/auth.js";
|
|
23
21
|
import { KeyPool } from "./keypool.js";
|
|
24
|
-
import { QuotaTracker } from "./quota.js";
|
|
25
22
|
import { BudgetTracker } from "./budget.js";
|
|
26
23
|
|
|
27
24
|
function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string, string> }, fp: string): string {
|
|
@@ -34,18 +31,15 @@ function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string,
|
|
|
34
31
|
export class GatewayState {
|
|
35
32
|
private _config: GatewayConfig;
|
|
36
33
|
private _pool: KeyPool;
|
|
37
|
-
private readonly _quota: QuotaTracker;
|
|
38
34
|
private readonly _budget: BudgetTracker;
|
|
39
35
|
|
|
40
36
|
constructor(
|
|
41
37
|
private readonly configPath: string,
|
|
42
38
|
initial: GatewayConfig,
|
|
43
|
-
quota?: QuotaTracker,
|
|
44
39
|
budgetDb?: { totals(since: number, filter?: { provider?: string; model?: string; client_key?: string }): { tokens_in: number; tokens_out: number; cost: number } },
|
|
45
40
|
) {
|
|
46
41
|
this._config = initial;
|
|
47
42
|
this._pool = new KeyPool();
|
|
48
|
-
this._quota = quota ?? new QuotaTracker();
|
|
49
43
|
this._budget = new BudgetTracker(
|
|
50
44
|
() => this._config.raw.budgets,
|
|
51
45
|
budgetDb ?? { totals: () => ({ tokens_in: 0, tokens_out: 0, cost: 0 }) },
|
|
@@ -63,10 +57,6 @@ export class GatewayState {
|
|
|
63
57
|
return this._pool;
|
|
64
58
|
}
|
|
65
59
|
|
|
66
|
-
get quota(): QuotaTracker {
|
|
67
|
-
return this._quota;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
60
|
get budget(): BudgetTracker {
|
|
71
61
|
return this._budget;
|
|
72
62
|
}
|
|
@@ -75,7 +65,7 @@ export class GatewayState {
|
|
|
75
65
|
* Validate edited config text, restore masked secrets from the live config,
|
|
76
66
|
* persist atomically, then swap in a fresh config + pool. Throws without
|
|
77
67
|
* changing anything if validation fails or a masked key can't be resolved —
|
|
78
|
-
* the old config keeps serving.
|
|
68
|
+
* the old config keeps serving.
|
|
79
69
|
*/
|
|
80
70
|
reload(text: string): void {
|
|
81
71
|
const parsed = parseConfigText(text);
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Rolling-window engine: every budget window is a fixed-duration tumbling bucket
|
|
3
|
+
* aligned to the epoch grid (no calendar/timezone math). `5h` resets every five
|
|
4
|
+
* hours, `24h` daily, `7day` weekly, `30day` monthly — each on a rolling grid
|
|
5
|
+
* rather than a calendar boundary. Shared by the budget tracker.
|
|
6
|
+
*/
|
|
7
|
+
const HOUR_MS = 3600_000;
|
|
8
|
+
const DAY_MS = 24 * HOUR_MS;
|
|
9
|
+
|
|
10
|
+
export type WindowName = "5h" | "24h" | "7day" | "30day";
|
|
11
|
+
|
|
12
|
+
export type WindowSpec = {
|
|
13
|
+
window: WindowName;
|
|
14
|
+
/** Epoch ms the recurring cycle is anchored to. Absent ⇒ epoch-grid (legacy). */
|
|
15
|
+
anchor?: number;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
const DURATION_MS: Record<WindowName, number> = {
|
|
19
|
+
"5h": 5 * HOUR_MS,
|
|
20
|
+
"24h": 24 * HOUR_MS,
|
|
21
|
+
"7day": 7 * DAY_MS,
|
|
22
|
+
"30day": 30 * DAY_MS,
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
/** Length (ms) of one window bucket. */
|
|
26
|
+
export function windowDuration(spec: WindowSpec): number {
|
|
27
|
+
return DURATION_MS[spec.window];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Epoch ms of the START of the bucket containing `now`. Anchored to `spec.anchor`
|
|
31
|
+
* when present (cycles tumble from the anchor); otherwise floored to the epoch grid. */
|
|
32
|
+
export function currentWindowStart(spec: WindowSpec, now: number): number {
|
|
33
|
+
const dur = DURATION_MS[spec.window];
|
|
34
|
+
if (spec.anchor === undefined) return Math.floor(now / dur) * dur;
|
|
35
|
+
if (now <= spec.anchor) return spec.anchor;
|
|
36
|
+
return spec.anchor + Math.floor((now - spec.anchor) / dur) * dur;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/** Next reset instant: the end of the current bucket (windowStart + duration). */
|
|
40
|
+
export function nextResetAt(spec: WindowSpec, windowStart: number, _now: number): number {
|
|
41
|
+
return windowStart + DURATION_MS[spec.window];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// `DAY_MS` is exported for any future window math that needs a day constant.
|
|
45
|
+
export { DAY_MS };
|
package/src/db.ts
CHANGED
|
@@ -22,6 +22,7 @@ export interface UsageRow {
|
|
|
22
22
|
model: string;
|
|
23
23
|
tokens_in: number;
|
|
24
24
|
tokens_out: number;
|
|
25
|
+
reasoning_tokens: number;
|
|
25
26
|
cached_tokens: number;
|
|
26
27
|
cost: number;
|
|
27
28
|
status: number;
|
|
@@ -67,7 +68,6 @@ export class UsageDB {
|
|
|
67
68
|
private readonly db: DatabaseSync;
|
|
68
69
|
private readonly insertUsage;
|
|
69
70
|
private readonly insertLog;
|
|
70
|
-
private readonly upsertQuota;
|
|
71
71
|
private readonly now: () => number;
|
|
72
72
|
|
|
73
73
|
constructor(path: string, now: () => number = Date.now) {
|
|
@@ -82,6 +82,7 @@ export class UsageDB {
|
|
|
82
82
|
model TEXT NOT NULL,
|
|
83
83
|
tokens_in INTEGER NOT NULL DEFAULT 0,
|
|
84
84
|
tokens_out INTEGER NOT NULL DEFAULT 0,
|
|
85
|
+
reasoning_tokens INTEGER NOT NULL DEFAULT 0,
|
|
85
86
|
cached_tokens INTEGER NOT NULL DEFAULT 0,
|
|
86
87
|
cost REAL NOT NULL DEFAULT 0,
|
|
87
88
|
status INTEGER NOT NULL,
|
|
@@ -112,25 +113,21 @@ export class UsageDB {
|
|
|
112
113
|
if (!cols.some((c) => String(c.name) === "client_key")) {
|
|
113
114
|
this.db.exec(`ALTER TABLE usage ADD COLUMN client_key TEXT NOT NULL DEFAULT ''`);
|
|
114
115
|
}
|
|
116
|
+
if (!cols.some((c) => String(c.name) === "reasoning_tokens")) {
|
|
117
|
+
this.db.exec(`ALTER TABLE usage ADD COLUMN reasoning_tokens INTEGER NOT NULL DEFAULT 0`);
|
|
118
|
+
}
|
|
115
119
|
this.now = now;
|
|
116
120
|
this.insertUsage = this.db.prepare(`
|
|
117
|
-
INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, cached_tokens, cost, status, latency_ms, stream, client_key)
|
|
118
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
121
|
+
INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, reasoning_tokens, cached_tokens, cost, status, latency_ms, stream, client_key)
|
|
122
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
119
123
|
`);
|
|
120
124
|
this.insertLog = this.db.prepare(`
|
|
121
125
|
INSERT INTO logs (ts, direction, provider, status, request_summary, response_summary)
|
|
122
126
|
VALUES (?, ?, ?, ?, ?, ?)
|
|
123
127
|
`);
|
|
124
|
-
// upsert keyed on provider_id so each provider keeps one live window row.
|
|
125
|
-
this.upsertQuota = this.db.prepare(`
|
|
126
|
-
INSERT INTO quota_state (provider_id, window_start, consumed, last_reset)
|
|
127
|
-
VALUES (?, ?, ?, ?)
|
|
128
|
-
ON CONFLICT(provider_id) DO UPDATE SET window_start = excluded.window_start,
|
|
129
|
-
consumed = excluded.consumed, last_reset = excluded.last_reset
|
|
130
|
-
`);
|
|
131
128
|
}
|
|
132
129
|
|
|
133
|
-
record(row: Omit<UsageRow, "ts" | "client_key"> & { ts?: number; client_key?: string }): void {
|
|
130
|
+
record(row: Omit<UsageRow, "ts" | "client_key" | "reasoning_tokens"> & { ts?: number; client_key?: string; reasoning_tokens?: number }): void {
|
|
134
131
|
this.insertUsage.run(
|
|
135
132
|
row.ts ?? this.now(),
|
|
136
133
|
row.alias,
|
|
@@ -138,6 +135,7 @@ export class UsageDB {
|
|
|
138
135
|
row.model,
|
|
139
136
|
row.tokens_in,
|
|
140
137
|
row.tokens_out,
|
|
138
|
+
row.reasoning_tokens ?? 0,
|
|
141
139
|
row.cached_tokens,
|
|
142
140
|
row.cost,
|
|
143
141
|
row.status,
|
|
@@ -276,7 +274,7 @@ export class UsageDB {
|
|
|
276
274
|
recent(limit = 100): UsageRow[] {
|
|
277
275
|
const rows = this.db
|
|
278
276
|
.prepare(
|
|
279
|
-
`SELECT ts, alias, provider, model, tokens_in, tokens_out, cached_tokens,
|
|
277
|
+
`SELECT ts, alias, provider, model, tokens_in, tokens_out, reasoning_tokens, cached_tokens,
|
|
280
278
|
cost, status, latency_ms, stream, client_key
|
|
281
279
|
FROM usage ORDER BY id DESC LIMIT ?`,
|
|
282
280
|
)
|
|
@@ -288,6 +286,7 @@ export class UsageDB {
|
|
|
288
286
|
model: String(r.model),
|
|
289
287
|
tokens_in: num(r.tokens_in),
|
|
290
288
|
tokens_out: num(r.tokens_out),
|
|
289
|
+
reasoning_tokens: num(r.reasoning_tokens),
|
|
291
290
|
cached_tokens: num(r.cached_tokens),
|
|
292
291
|
cost: num(r.cost),
|
|
293
292
|
status: num(r.status),
|
|
@@ -297,29 +296,34 @@ export class UsageDB {
|
|
|
297
296
|
}));
|
|
298
297
|
}
|
|
299
298
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
loadQuota(): Array<{ provider_id: string; window_start: number; consumed: number }> {
|
|
303
|
-
const rows = this.db.prepare(`SELECT provider_id, window_start, consumed FROM quota_state`).all() as SqlRow[];
|
|
304
|
-
return rows.map((r) => ({
|
|
305
|
-
provider_id: String(r.provider_id),
|
|
306
|
-
window_start: num(r.window_start),
|
|
307
|
-
consumed: num(r.consumed),
|
|
308
|
-
}));
|
|
299
|
+
close(): void {
|
|
300
|
+
this.db.close();
|
|
309
301
|
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/** Compute USD cost from token counts and per-1M prices. Separate rates for input (non-cache), cache_read, output, reasoning. */
|
|
305
|
+
export function computeCost(tokensIn: number, tokensOut: number, priceIn?: number, priceOut?: number, priceReasoning?: number, priceCachedRead?: number, cachedTokens?: number, reasoningTokens?: number): number {
|
|
306
|
+
let cost = 0;
|
|
310
307
|
|
|
311
|
-
|
|
312
|
-
|
|
308
|
+
// Non-cached input (input minus cache_read)
|
|
309
|
+
const nonCachedInput = Math.max(0, tokensIn - (cachedTokens ?? 0));
|
|
310
|
+
if (priceIn) cost += (nonCachedInput / 1_000_000) * priceIn;
|
|
311
|
+
|
|
312
|
+
// Cached read — uses separate rate or falls back to input rate
|
|
313
|
+
if (cachedTokens && priceCachedRead) {
|
|
314
|
+
cost += (cachedTokens / 1_000_000) * priceCachedRead;
|
|
315
|
+
} else if (cachedTokens && priceIn) {
|
|
316
|
+
cost += (cachedTokens / 1_000_000) * priceIn;
|
|
313
317
|
}
|
|
314
318
|
|
|
315
|
-
|
|
316
|
-
|
|
319
|
+
// Output completion
|
|
320
|
+
if (priceOut) cost += (tokensOut / 1_000_000) * priceOut;
|
|
321
|
+
|
|
322
|
+
// Reasoning tokens — uses dedicated rate or falls back to output rate
|
|
323
|
+
if (reasoningTokens) {
|
|
324
|
+
if (priceReasoning) cost += (reasoningTokens / 1_000_000) * priceReasoning;
|
|
325
|
+
else if (priceOut) cost += (reasoningTokens / 1_000_000) * priceOut;
|
|
317
326
|
}
|
|
318
|
-
}
|
|
319
327
|
|
|
320
|
-
|
|
321
|
-
export function computeCost(tokensIn: number, tokensOut: number, priceIn?: number, priceOut?: number): number {
|
|
322
|
-
const ci = priceIn ? (tokensIn / 1_000_000) * priceIn : 0;
|
|
323
|
-
const co = priceOut ? (tokensOut / 1_000_000) * priceOut : 0;
|
|
324
|
-
return ci + co;
|
|
328
|
+
return cost;
|
|
325
329
|
}
|