aigetwey 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/CHANGELOG.md +19 -1
  2. package/README.md +28 -7
  3. package/config.example.yaml +0 -1
  4. package/dashboard/src/app/(console)/quota/page.tsx +2 -2
  5. package/dashboard/src/components/BudgetForm.tsx +15 -17
  6. package/dashboard/src/components/{QuotaView.tsx → BudgetTracker.tsx} +71 -56
  7. package/dashboard/src/components/CooldownTimer.tsx +1 -1
  8. package/dashboard/src/components/EndpointView.tsx +255 -47
  9. package/dashboard/src/components/LogTable.tsx +32 -25
  10. package/dashboard/src/components/ProviderManager.tsx +3 -28
  11. package/dashboard/src/components/Rail.tsx +1 -1
  12. package/dashboard/src/components/RoutingView.tsx +6 -2
  13. package/dashboard/src/components/TopBar.tsx +1 -1
  14. package/dashboard/src/components/ui.tsx +6 -1
  15. package/dashboard/src/lib/client.ts +6 -5
  16. package/dashboard/src/lib/gateway.ts +23 -16
  17. package/dist/config.js +86 -23
  18. package/dist/config.js.map +1 -1
  19. package/dist/core/budget.js +1 -1
  20. package/dist/core/budget.js.map +1 -1
  21. package/dist/core/fallback.js +0 -6
  22. package/dist/core/fallback.js.map +1 -1
  23. package/dist/core/handler.js +6 -5
  24. package/dist/core/handler.js.map +1 -1
  25. package/dist/core/keysUsage.js +15 -0
  26. package/dist/core/keysUsage.js.map +1 -0
  27. package/dist/core/ratelimit.js +15 -0
  28. package/dist/core/ratelimit.js.map +1 -0
  29. package/dist/core/state.js +5 -13
  30. package/dist/core/state.js.map +1 -1
  31. package/dist/core/window.js +35 -0
  32. package/dist/core/window.js.map +1 -0
  33. package/dist/db.js +0 -20
  34. package/dist/db.js.map +1 -1
  35. package/dist/routes/admin.js +55 -10
  36. package/dist/routes/admin.js.map +1 -1
  37. package/dist/routes/v1.js +14 -1
  38. package/dist/routes/v1.js.map +1 -1
  39. package/dist/server.js +1 -7
  40. package/dist/server.js.map +1 -1
  41. package/dist/stream/openai-stream.js +3 -0
  42. package/dist/stream/openai-stream.js.map +1 -1
  43. package/package.json +1 -1
  44. package/src/config.ts +89 -23
  45. package/src/core/budget.ts +1 -1
  46. package/src/core/fallback.ts +0 -9
  47. package/src/core/handler.ts +9 -7
  48. package/src/core/keysUsage.ts +49 -0
  49. package/src/core/ratelimit.ts +25 -0
  50. package/src/core/state.ts +4 -14
  51. package/src/core/window.ts +45 -0
  52. package/src/db.ts +0 -23
  53. package/src/routes/admin.ts +61 -9
  54. package/src/routes/v1.ts +18 -1
  55. package/src/server.ts +1 -8
  56. package/src/stream/openai-stream.ts +3 -1
  57. package/src/core/quota.ts +0 -253
package/src/config.ts CHANGED
@@ -18,21 +18,9 @@ export { clientKeyFingerprint } from "./middleware/auth.js";
18
18
  // Shape differs from a flat OpenAI gateway: routing lives in a top-level
19
19
  // `models[]` layer (alias -> provider chain), the endpoint block carries the
20
20
  // token-saver toggles, and providers may be free passthroughs or service-account
21
- // backed. The handler/keypool/quota phases read these fields; defining the full
21
+ // backed. The handler/keypool phases read these fields; defining the full
22
22
  // shape up front avoids reshaping config across later phases.
23
23
 
24
- /** Token quota window for a provider — drives the dashboard reset countdown. */
25
- const QuotaSchema = z.object({
26
- window: z.enum(["5h", "daily", "weekly", "monthly"]),
27
- // daily: "HH:MM" local reset; weekly: weekday name ("monday"); others: ignored.
28
- reset_at: z.string().optional(),
29
- timezone: z.string().default("UTC"),
30
- // optional ceiling for a progress bar; quota tracking works without it.
31
- limit_tokens: z.number().int().positive().optional(),
32
- // soft-alert threshold (0..1); UI flags the quota when pct >= this. Default 0.8.
33
- alert_at: z.number().gt(0).lte(1).optional(),
34
- });
35
-
36
24
  const ProviderModelSchema = z.object({
37
25
  id: z.string().min(1),
38
26
  price_in: z.number().nonnegative().optional(),
@@ -59,7 +47,6 @@ const ProviderSchema = z
59
47
  service_account: z.string().optional(),
60
48
  models: z.array(ProviderModelSchema).default([]),
61
49
  headers: z.record(z.string()).optional(),
62
- quota: QuotaSchema.optional(),
63
50
  // when true the provider is skipped in routing (kept in config, like a key's
64
51
  // disabled state but for the whole provider).
65
52
  disabled: z.boolean().optional(),
@@ -120,14 +107,20 @@ const ServerSchema = z
120
107
  // optional friendly label per key, keyed by the key itself. Kept separate so
121
108
  // api_keys stays a plain string[] (auth/masking paths untouched).
122
109
  key_names: z.record(z.string()).optional(),
110
+ // per-key model allowlist (call-strings) + rate limit (req/min), keyed by the
111
+ // raw key like key_names. Absent → unrestricted / unlimited.
112
+ key_models: z.record(z.array(z.string().min(1))).optional(),
113
+ key_rpm: z.record(z.number().int().positive()).optional(),
114
+ // per-key access expiry, epoch ms, keyed by the RAW key. Absent → never expires.
115
+ key_expires: z.record(z.number().int().positive()).optional(),
123
116
  })
124
117
  .default({ host: "127.0.0.1", port: 18080, api_keys: [] });
125
118
 
126
119
  /**
127
120
  * A spend budget scoped to the whole gateway, one provider, or one upstream
128
121
  * model. unit picks what `limit` means — USD cost or total tokens. Soft-alert at
129
- * alert_at (default 0.8), hard-stop at 100%. Window math reuses the quota
130
- * calendar engine. Opt-in: omit / empty list to disable.
122
+ * alert_at (default 0.8), hard-stop at 100%. Each window is a rolling tumbling
123
+ * bucket on the epoch grid (window.ts). Opt-in: omit / empty list to disable.
131
124
  */
132
125
  const BudgetScopeSchema = z.discriminatedUnion("type", [
133
126
  z.object({ type: z.literal("global") }),
@@ -136,13 +129,21 @@ const BudgetScopeSchema = z.discriminatedUnion("type", [
136
129
  z.object({ type: z.literal("key"), id: z.string().min(1) }),
137
130
  ]);
138
131
 
132
+ // rolling windows replaced the old calendar windows; coerce any legacy value so
133
+ // existing config.yaml budgets keep loading (daily→24h, weekly→7day, monthly→30day).
134
+ const LEGACY_WINDOW: Record<string, string> = { daily: "24h", weekly: "7day", monthly: "30day" };
135
+ const WindowSchema = z.preprocess(
136
+ (v) => (typeof v === "string" && v in LEGACY_WINDOW ? LEGACY_WINDOW[v] : v),
137
+ z.enum(["5h", "24h", "7day", "30day"]),
138
+ );
139
+
139
140
  const BudgetSchema = z.object({
140
141
  scope: BudgetScopeSchema,
141
142
  unit: z.enum(["usd", "tokens"]),
142
143
  limit: z.number().positive(),
143
- window: z.enum(["5h", "daily", "weekly", "monthly"]),
144
- reset_at: z.string().optional(),
145
- timezone: z.string().default("UTC"),
144
+ window: WindowSchema,
145
+ // epoch ms the recurring cycle is anchored to; stamped by setBudget on create.
146
+ anchor: z.number().int().nonnegative().optional(),
146
147
  alert_at: z.number().gt(0).lte(1).optional(),
147
148
  // optional free-text label so an operator remembers what a budget is for.
148
149
  note: z.string().max(200).optional(),
@@ -157,7 +158,6 @@ const ConfigSchema = z.object({
157
158
  budgets: z.array(BudgetSchema).default([]),
158
159
  });
159
160
 
160
- export type Quota = z.infer<typeof QuotaSchema>;
161
161
  export type ProviderModel = z.infer<typeof ProviderModelSchema>;
162
162
  export type Provider = z.infer<typeof ProviderSchema>;
163
163
  export type ModelRoute = z.infer<typeof ModelRouteSchema>;
@@ -790,7 +790,7 @@ export function budgetKey(scope: BudgetScope): string {
790
790
  }
791
791
 
792
792
  /** Add a budget, or replace the existing one with the same scope key. */
793
- export function setBudget(config: Config, budget: Budget): Config {
793
+ export function setBudget(config: Config, budget: Budget, now: number = Date.now()): Config {
794
794
  if (budget.scope.type === "provider") {
795
795
  const { id } = budget.scope;
796
796
  if (!config.providers.some((p) => p.id === id)) {
@@ -806,8 +806,16 @@ export function setBudget(config: Config, budget: Budget): Config {
806
806
  const next = cloneConfig(config);
807
807
  const key = budgetKey(budget.scope);
808
808
  const idx = next.budgets.findIndex((b) => budgetKey(b.scope) === key);
809
- if (idx === -1) next.budgets.push(budget);
810
- else next.budgets[idx] = budget;
809
+ if (idx === -1) {
810
+ next.budgets.push({ ...budget, anchor: budget.anchor ?? now });
811
+ } else {
812
+ const prev = next.budgets[idx]!;
813
+ // keep the running cycle on edit (preserve prev anchor as-is, including a
814
+ // legacy undefined = epoch grid, so editing a limit never resets spend);
815
+ // start a fresh cycle only when the window length actually changed.
816
+ const anchor = budget.anchor ?? (prev.window === budget.window ? prev.anchor : now);
817
+ next.budgets[idx] = { ...budget, anchor };
818
+ }
811
819
  return next;
812
820
  }
813
821
 
@@ -854,5 +862,63 @@ export function removeServerKey(config: Config, index: number): Config {
854
862
  if (removed && next.server.key_names && removed in next.server.key_names) {
855
863
  delete next.server.key_names[removed];
856
864
  }
865
+ if (removed && next.server.key_models && removed in next.server.key_models) {
866
+ delete next.server.key_models[removed];
867
+ if (Object.keys(next.server.key_models).length === 0) next.server.key_models = undefined;
868
+ }
869
+ if (removed && next.server.key_rpm && removed in next.server.key_rpm) {
870
+ delete next.server.key_rpm[removed];
871
+ if (Object.keys(next.server.key_rpm).length === 0) next.server.key_rpm = undefined;
872
+ }
873
+ if (removed && next.server.key_expires && removed in next.server.key_expires) {
874
+ delete next.server.key_expires[removed];
875
+ if (Object.keys(next.server.key_expires).length === 0) next.server.key_expires = undefined;
876
+ }
877
+ return next;
878
+ }
879
+
880
+ /**
881
+ * Set or clear a gateway key's scopes (by index, since keys are masked in the
882
+ * API). `models`/`rpm` are each applied only when present in the patch; an empty
883
+ * list or null/0 clears that scope. Empty maps are pruned to undefined.
884
+ */
885
+ export function setServerKeyScope(
886
+ config: Config,
887
+ index: number,
888
+ patch: { models?: string[] | null; rpm?: number | null; expires?: number | null },
889
+ ): Config {
890
+ const next = cloneConfig(config);
891
+ const keys = next.server.api_keys;
892
+ if (index < 0 || index >= keys.length) throw new Error(`no gateway key at index ${index}`);
893
+ const key = keys[index]!;
894
+
895
+ if (patch.models !== undefined) {
896
+ const models = { ...(next.server.key_models ?? {}) };
897
+ const list = (patch.models ?? []).map((m) => m.trim()).filter(Boolean);
898
+ if (list.length > 0) models[key] = list;
899
+ else delete models[key];
900
+ next.server.key_models = Object.keys(models).length > 0 ? models : undefined;
901
+ }
902
+
903
+ if (patch.rpm !== undefined) {
904
+ const rpm = { ...(next.server.key_rpm ?? {}) };
905
+ if (patch.rpm && patch.rpm > 0) rpm[key] = Math.floor(patch.rpm);
906
+ else delete rpm[key];
907
+ next.server.key_rpm = Object.keys(rpm).length > 0 ? rpm : undefined;
908
+ }
909
+
910
+ if (patch.expires !== undefined) {
911
+ const exp = { ...(next.server.key_expires ?? {}) };
912
+ if (patch.expires && patch.expires > 0) exp[key] = Math.floor(patch.expires);
913
+ else delete exp[key];
914
+ next.server.key_expires = Object.keys(exp).length > 0 ? exp : undefined;
915
+ }
916
+
857
917
  return next;
858
918
  }
919
+
920
+ /** True when `rawKey` has an expiry set and `now` is strictly past it. */
921
+ export function isKeyExpired(server: Config["server"], rawKey: string, now: number): boolean {
922
+ const at = server.key_expires?.[rawKey];
923
+ return at !== undefined && now > at;
924
+ }
@@ -8,7 +8,7 @@
8
8
  */
9
9
  import type { Budget, BudgetScope } from "../config.js";
10
10
  import { budgetKey } from "../config.js";
11
- import { currentWindowStart, nextResetAt } from "./quota.js";
11
+ import { currentWindowStart, nextResetAt } from "./window.js";
12
12
 
13
13
  export interface BudgetStatus {
14
14
  scope: BudgetScope;
@@ -32,8 +32,6 @@ export interface FallbackOpts {
32
32
  onAttempt?: (log: AttemptLog) => void;
33
33
  /** which key the pool handed out for the winning attempt (handler uses it for usage). */
34
34
  onServed?: (route: ResolvedRoute, key: string) => void;
35
- /** when set, a provider this returns true for is skipped (quota exhausted). */
36
- isExhausted?: (provider: ResolvedRoute["provider"]) => boolean;
37
35
  /** captured client thinking intent, applied per-attempt in the provider's format. */
38
36
  thinkingIntent?: ThinkingConfig | null;
39
37
  }
@@ -56,13 +54,6 @@ export async function executeWithFallback(
56
54
  for (const route of routes) {
57
55
  const { provider } = route;
58
56
 
59
- // skip a provider whose token budget is spent for this window — like a key
60
- // cooling down, but for the whole provider. Falls through to the next route.
61
- if (opts.isExhausted?.(provider)) {
62
- log({ provider: provider.id, model: route.model, outcome: "skip", detail: "quota exhausted" });
63
- continue;
64
- }
65
-
66
57
  const attempts = provider.max_retries + 1;
67
58
 
68
59
  for (let i = 0; i < attempts; i++) {
@@ -20,7 +20,6 @@ import { parseSSE, encodeSSE } from "../stream/sse.js";
20
20
  import { streamAdapterFor } from "../stream/index.js";
21
21
  import type { CanonicalChunk } from "../stream/chunk.js";
22
22
  import type { KeyPool } from "./keypool.js";
23
- import type { QuotaTracker } from "./quota.js";
24
23
  import { executeWithFallback } from "./fallback.js";
25
24
  import { type UsageDB, computeCost } from "../db.js";
26
25
  import { compressMessages } from "../rtk/index.js";
@@ -50,12 +49,12 @@ export interface HandleDeps {
50
49
  config: GatewayConfig;
51
50
  pool: KeyPool;
52
51
  db?: UsageDB;
53
- quota?: QuotaTracker;
54
52
  budget?: {
55
53
  globalStatus(): { exhausted: boolean; reset_in_ms: number } | null;
56
54
  blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null;
57
55
  blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null;
58
56
  };
57
+ clientKeyModels?: string[];
59
58
  clientKeyFp?: string;
60
59
  log?: (msg: string) => void;
61
60
  now?: () => number;
@@ -71,8 +70,6 @@ function recordUsage(
71
70
  ): void {
72
71
  const tokensIn = usage?.prompt_tokens ?? 0;
73
72
  const tokensOut = usage?.completion_tokens ?? 0;
74
- // count the full request against the served provider's window budget.
75
- deps.quota?.consume(route.provider, tokensIn + tokensOut);
76
73
  if (!deps.db) return;
77
74
  // Cost: a combo/route may set explicit prices; otherwise fall back to the ported
78
75
  // aigetwey pricing table so cost auto-resolves per model instead of showing $0.
@@ -118,6 +115,13 @@ export async function handle(
118
115
  // that can't reason. Matches aigetwey's capture-before-translate flow.
119
116
  const { cleanModel, override } = parseSuffix(canonical.model);
120
117
  canonical.model = cleanModel;
118
+
119
+ // per-key allowlist: a key may be restricted to specific call-strings. Empty/
120
+ // absent → unrestricted. Match the literal clean model the client asked for.
121
+ if (deps.clientKeyModels && deps.clientKeyModels.length > 0 && !deps.clientKeyModels.includes(cleanModel)) {
122
+ throw new GatewayError(403, { error: "model not allowed for this key" });
123
+ }
124
+
121
125
  const thinkingIntent: ThinkingConfig | null =
122
126
  override ?? captureThinking(canonical as Record<string, unknown>);
123
127
 
@@ -127,8 +131,7 @@ export async function handle(
127
131
  }
128
132
 
129
133
  // Budget hard-stop. Global overrun fails fast. Provider/model budgets bar the
130
- // matching routes (like the token-quota skip); if every candidate is barred,
131
- // there's nothing to serve → 402.
134
+ // matching routes; if every candidate is barred, there's nothing to serve → 402.
132
135
  if (deps.budget) {
133
136
  const g = deps.budget.globalStatus();
134
137
  if (g?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: g.reset_in_ms });
@@ -193,7 +196,6 @@ export async function handle(
193
196
  stream: wantStream,
194
197
  signal,
195
198
  thinkingIntent,
196
- isExhausted: deps.quota ? (p) => deps.quota!.isExhausted(p) : undefined,
197
199
  onAttempt: (a) =>
198
200
  deps.log?.(`[fallback] ${a.provider}/${a.model} ${a.status ?? "-"} -> ${a.outcome}${a.detail ? ` (${a.detail})` : ""}`),
199
201
  });
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Shapes one row for the Budgets page "Keys" section: a gateway key joined with
3
+ * its all-time spend/tokens, optional expiry, and its key-scoped budget status
4
+ * (null when the key is uncapped). Pure — the admin route feeds it real data.
5
+ */
6
+ import type { BudgetStatus } from "./budget.js";
7
+
8
+ export interface KeyBudgetView {
9
+ unit: "usd" | "tokens";
10
+ limit: number;
11
+ spent: number;
12
+ pct: number;
13
+ window: BudgetStatus["window"];
14
+ reset_in_ms: number;
15
+ exhausted: boolean;
16
+ alert: boolean;
17
+ }
18
+
19
+ export interface KeyUsageRow {
20
+ fingerprint: string;
21
+ name: string;
22
+ masked: string;
23
+ expires?: number;
24
+ spent: number;
25
+ tokens: number;
26
+ budget: KeyBudgetView | null;
27
+ }
28
+
29
+ export function buildKeyUsageRow(input: {
30
+ fingerprint: string;
31
+ name: string;
32
+ masked: string;
33
+ expires?: number;
34
+ totals: { tokens_in: number; tokens_out: number; cost: number };
35
+ budget: BudgetStatus | null;
36
+ }): KeyUsageRow {
37
+ const b = input.budget;
38
+ return {
39
+ fingerprint: input.fingerprint,
40
+ name: input.name,
41
+ masked: input.masked,
42
+ expires: input.expires,
43
+ spent: input.totals.cost,
44
+ tokens: input.totals.tokens_in + input.totals.tokens_out,
45
+ budget: b
46
+ ? { unit: b.unit, limit: b.limit, spent: b.spent, pct: b.pct, window: b.window, reset_in_ms: b.reset_in_ms, exhausted: b.exhausted, alert: b.alert }
47
+ : null,
48
+ };
49
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Per-key request counter on a fixed calendar-minute window. In-memory only —
3
+ * counts reset on restart, which is fine for a 1-minute window. Used to rate-limit
4
+ * gateway keys that opt in via server.key_rpm.
5
+ */
6
+ interface Bucket {
7
+ minute: number;
8
+ count: number;
9
+ }
10
+
11
+ export class RateLimiter {
12
+ private readonly buckets = new Map<string, Bucket>();
13
+
14
+ /** Record a hit for `key`; return true if it now EXCEEDS `limit` this minute. */
15
+ over(key: string, limit: number, now: number = Date.now()): boolean {
16
+ const minute = Math.floor(now / 60_000);
17
+ const b = this.buckets.get(key);
18
+ if (!b || b.minute !== minute) {
19
+ this.buckets.set(key, { minute, count: 1 });
20
+ return 1 > limit;
21
+ }
22
+ b.count += 1;
23
+ return b.count > limit;
24
+ }
25
+ }
package/src/core/state.ts CHANGED
@@ -1,15 +1,13 @@
1
1
  /**
2
- * Mutable holder for the live gateway config, key pool, and quota tracker.
2
+ * Mutable holder for the live gateway config, key pool, and budget tracker.
3
3
  *
4
4
  * Config loads once at boot, but the dashboard edits it at runtime. Routes read
5
- * `state.config` / `state.pool` / `state.quota` fresh per request (never close
5
+ * `state.config` / `state.pool` / `state.budget` fresh per request (never close
6
6
  * over them), so a successful reload swaps in the new config + pool atomically —
7
7
  * no restart.
8
8
  *
9
9
  * reload() validates and persists BEFORE swapping: an invalid edit throws and
10
- * the old config keeps serving. The pool is rebuilt (cooldown is transient), but
11
- * the quota tracker is KEPT across reloads — a budget consumed this window must
12
- * survive a config edit, else editing config would silently reset every quota.
10
+ * the old config keeps serving. The pool is rebuilt (cooldown is transient).
13
11
  */
14
12
  import {
15
13
  GatewayConfig,
@@ -21,7 +19,6 @@ import {
21
19
  } from "../config.js";
22
20
  import { clientKeyFingerprint } from "../middleware/auth.js";
23
21
  import { KeyPool } from "./keypool.js";
24
- import { QuotaTracker } from "./quota.js";
25
22
  import { BudgetTracker } from "./budget.js";
26
23
 
27
24
  function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string, string> }, fp: string): string {
@@ -34,18 +31,15 @@ function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string,
34
31
  export class GatewayState {
35
32
  private _config: GatewayConfig;
36
33
  private _pool: KeyPool;
37
- private readonly _quota: QuotaTracker;
38
34
  private readonly _budget: BudgetTracker;
39
35
 
40
36
  constructor(
41
37
  private readonly configPath: string,
42
38
  initial: GatewayConfig,
43
- quota?: QuotaTracker,
44
39
  budgetDb?: { totals(since: number, filter?: { provider?: string; model?: string; client_key?: string }): { tokens_in: number; tokens_out: number; cost: number } },
45
40
  ) {
46
41
  this._config = initial;
47
42
  this._pool = new KeyPool();
48
- this._quota = quota ?? new QuotaTracker();
49
43
  this._budget = new BudgetTracker(
50
44
  () => this._config.raw.budgets,
51
45
  budgetDb ?? { totals: () => ({ tokens_in: 0, tokens_out: 0, cost: 0 }) },
@@ -63,10 +57,6 @@ export class GatewayState {
63
57
  return this._pool;
64
58
  }
65
59
 
66
- get quota(): QuotaTracker {
67
- return this._quota;
68
- }
69
-
70
60
  get budget(): BudgetTracker {
71
61
  return this._budget;
72
62
  }
@@ -75,7 +65,7 @@ export class GatewayState {
75
65
  * Validate edited config text, restore masked secrets from the live config,
76
66
  * persist atomically, then swap in a fresh config + pool. Throws without
77
67
  * changing anything if validation fails or a masked key can't be resolved —
78
- * the old config keeps serving. The quota tracker is intentionally preserved.
68
+ * the old config keeps serving.
79
69
  */
80
70
  reload(text: string): void {
81
71
  const parsed = parseConfigText(text);
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Rolling-window engine: every budget window is a fixed-duration tumbling bucket
3
+ * aligned to the epoch grid (no calendar/timezone math). `5h` resets every five
4
+ * hours, `24h` daily, `7day` weekly, `30day` monthly — each on a rolling grid
5
+ * rather than a calendar boundary. Shared by the budget tracker.
6
+ */
7
+ const HOUR_MS = 3600_000;
8
+ const DAY_MS = 24 * HOUR_MS;
9
+
10
+ export type WindowName = "5h" | "24h" | "7day" | "30day";
11
+
12
+ export type WindowSpec = {
13
+ window: WindowName;
14
+ /** Epoch ms the recurring cycle is anchored to. Absent ⇒ epoch-grid (legacy). */
15
+ anchor?: number;
16
+ };
17
+
18
+ const DURATION_MS: Record<WindowName, number> = {
19
+ "5h": 5 * HOUR_MS,
20
+ "24h": 24 * HOUR_MS,
21
+ "7day": 7 * DAY_MS,
22
+ "30day": 30 * DAY_MS,
23
+ };
24
+
25
+ /** Length (ms) of one window bucket. */
26
+ export function windowDuration(spec: WindowSpec): number {
27
+ return DURATION_MS[spec.window];
28
+ }
29
+
30
+ /** Epoch ms of the START of the bucket containing `now`. Anchored to `spec.anchor`
31
+ * when present (cycles tumble from the anchor); otherwise floored to the epoch grid. */
32
+ export function currentWindowStart(spec: WindowSpec, now: number): number {
33
+ const dur = DURATION_MS[spec.window];
34
+ if (spec.anchor === undefined) return Math.floor(now / dur) * dur;
35
+ if (now <= spec.anchor) return spec.anchor;
36
+ return spec.anchor + Math.floor((now - spec.anchor) / dur) * dur;
37
+ }
38
+
39
+ /** Next reset instant: the end of the current bucket (windowStart + duration). */
40
+ export function nextResetAt(spec: WindowSpec, windowStart: number, _now: number): number {
41
+ return windowStart + DURATION_MS[spec.window];
42
+ }
43
+
44
+ // `DAY_MS` is exported for any future window math that needs a day constant.
45
+ export { DAY_MS };
package/src/db.ts CHANGED
@@ -67,7 +67,6 @@ export class UsageDB {
67
67
  private readonly db: DatabaseSync;
68
68
  private readonly insertUsage;
69
69
  private readonly insertLog;
70
- private readonly upsertQuota;
71
70
  private readonly now: () => number;
72
71
 
73
72
  constructor(path: string, now: () => number = Date.now) {
@@ -121,13 +120,6 @@ export class UsageDB {
121
120
  INSERT INTO logs (ts, direction, provider, status, request_summary, response_summary)
122
121
  VALUES (?, ?, ?, ?, ?, ?)
123
122
  `);
124
- // upsert keyed on provider_id so each provider keeps one live window row.
125
- this.upsertQuota = this.db.prepare(`
126
- INSERT INTO quota_state (provider_id, window_start, consumed, last_reset)
127
- VALUES (?, ?, ?, ?)
128
- ON CONFLICT(provider_id) DO UPDATE SET window_start = excluded.window_start,
129
- consumed = excluded.consumed, last_reset = excluded.last_reset
130
- `);
131
123
  }
132
124
 
133
125
  record(row: Omit<UsageRow, "ts" | "client_key"> & { ts?: number; client_key?: string }): void {
@@ -297,21 +289,6 @@ export class UsageDB {
297
289
  }));
298
290
  }
299
291
 
300
- // ---- QuotaStore: one live window row per provider (survives restart) ----
301
-
302
- loadQuota(): Array<{ provider_id: string; window_start: number; consumed: number }> {
303
- const rows = this.db.prepare(`SELECT provider_id, window_start, consumed FROM quota_state`).all() as SqlRow[];
304
- return rows.map((r) => ({
305
- provider_id: String(r.provider_id),
306
- window_start: num(r.window_start),
307
- consumed: num(r.consumed),
308
- }));
309
- }
310
-
311
- saveQuota(providerId: string, windowStart: number, consumed: number): void {
312
- this.upsertQuota.run(providerId, windowStart, consumed, this.now());
313
- }
314
-
315
292
  close(): void {
316
293
  this.db.close();
317
294
  }
@@ -16,6 +16,7 @@ import type { FastifyInstance, FastifyRequest, FastifyReply } from "fastify";
16
16
  import type { GatewayState } from "../core/state.js";
17
17
  import type { UsageDB } from "../db.js";
18
18
  import { checkAdminAuth, clientKeyFingerprint, type AdminVerifier } from "../middleware/auth.js";
19
+ import { buildKeyUsageRow } from "../core/keysUsage.js";
19
20
  import {
20
21
  maskKey,
21
22
  serializeConfig,
@@ -44,6 +45,7 @@ import {
44
45
  addServerKey,
45
46
  editServerKey,
46
47
  removeServerKey,
48
+ setServerKeyScope,
47
49
  setBudget,
48
50
  clearBudget,
49
51
  type Config,
@@ -87,6 +89,23 @@ function maskedConfig(config: Config): Config {
87
89
  Object.entries(clone.server.key_names).map(([k, name]) => [maskKey(k), name]),
88
90
  );
89
91
  }
92
+ // key_models / key_rpm are keyed by the RAW key — re-key to the masked form so
93
+ // real keys never leak through /admin/config.
94
+ if (clone.server.key_models) {
95
+ clone.server.key_models = Object.fromEntries(
96
+ Object.entries(clone.server.key_models).map(([k, v]) => [maskKey(k), v]),
97
+ );
98
+ }
99
+ if (clone.server.key_rpm) {
100
+ clone.server.key_rpm = Object.fromEntries(
101
+ Object.entries(clone.server.key_rpm).map(([k, v]) => [maskKey(k), v]),
102
+ );
103
+ }
104
+ if (clone.server.key_expires) {
105
+ clone.server.key_expires = Object.fromEntries(
106
+ Object.entries(clone.server.key_expires).map(([k, v]) => [maskKey(k), v]),
107
+ );
108
+ }
90
109
  return clone;
91
110
  }
92
111
 
@@ -143,12 +162,9 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
143
162
  reply.send({ providers: deps.state.pool.snapshot(deps.state.config.listProviders()) });
144
163
  });
145
164
 
146
- // per-provider quota: consumed, limit, and ms until the next scheduled reset.
147
- app.get("/admin/quota", requireAdmin, (_req, reply) => {
148
- reply.send({
149
- quota: deps.state.quota.snapshot(deps.state.config.listProviders()),
150
- budgets: deps.state.budget.statuses(),
151
- });
165
+ // budget statuses: consumed, limit, and ms until the next scheduled reset.
166
+ app.get("/admin/budgets", requireAdmin, (_req, reply) => {
167
+ reply.send({ budgets: deps.state.budget.statuses() });
152
168
  });
153
169
 
154
170
  // add or replace a budget (keyed by scope). Body = Budget; invalid shape or an
@@ -403,7 +419,7 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
403
419
  });
404
420
 
405
421
  // Test ONE model end-to-end (aigetwey's per-model science button). Routes through
406
- // the real pipeline via handle(), so the ping lands in usage/quota exactly like
422
+ // the real pipeline via handle(), so the ping lands in usage exactly like
407
423
  // a normal call — and it catches "model not found / not entitled" a /models
408
424
  // ping can't. Model id travels as ?model= to survive slashes through the proxy.
409
425
  app.post("/admin/providers/:id/models/test", requireAdmin, async (req, reply) => {
@@ -414,7 +430,7 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
414
430
  if (!provider) return reply.code(404).send({ error: `provider "${id}" not found` });
415
431
  try {
416
432
  await handle(
417
- { config: deps.state.config, pool: deps.state.pool, db: deps.db, quota: deps.state.quota },
433
+ { config: deps.state.config, pool: deps.state.pool, db: deps.db },
418
434
  "openai",
419
435
  { model: `${id}/${modelId}`, messages: [{ role: "user", content: "ping" }], max_tokens: 1, stream: false },
420
436
  );
@@ -564,6 +580,15 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
564
580
  applyMutation(reply, (c) => editServerKey(c, i, { name: b?.name }));
565
581
  });
566
582
 
583
+ // set/clear ONE gateway key's scopes (model allowlist + rpm), by index.
584
+ app.put("/admin/endpoint/keys/:index/scope", requireAdmin, (req, reply) => {
585
+ const { index } = req.params as { index: string };
586
+ const i = Number(index);
587
+ if (!Number.isInteger(i)) return reply.code(400).send({ error: "index must be an integer" });
588
+ const b = (req.body ?? {}) as { models?: string[]; rpm?: number | null; expires?: number | null };
589
+ applyMutation(reply, (c) => setServerKeyScope(c, i, { models: b.models, rpm: b.rpm, expires: b.expires }));
590
+ });
591
+
567
592
  app.delete("/admin/endpoint/keys/:index", requireAdmin, (req, reply) => {
568
593
  const { index } = req.params as { index: string };
569
594
  const i = Number(index);
@@ -584,6 +609,26 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
584
609
  );
585
610
  });
586
611
 
612
+ // per-key spend for the Budgets page "Keys" section: every gateway key, its
613
+ // all-time usage, expiry, and key-scoped budget status (null when uncapped).
614
+ app.get("/admin/keys/usage", requireAdmin, (_req, reply) => {
615
+ if (!deps.db) return reply.code(503).send({ error: "usage tracking disabled" });
616
+ const cfg = deps.state.config.raw;
617
+ const statuses = deps.state.budget.statuses();
618
+ const keys = cfg.server.api_keys.map((k) => {
619
+ const fp = clientKeyFingerprint(k);
620
+ return buildKeyUsageRow({
621
+ fingerprint: fp,
622
+ name: cfg.server.key_names?.[k] ?? maskKey(k),
623
+ masked: maskKey(k),
624
+ expires: cfg.server.key_expires?.[k],
625
+ totals: deps.db!.totals(0, { client_key: fp }),
626
+ budget: statuses.find((s) => s.scope.type === "key" && s.scope.id === fp) ?? null,
627
+ });
628
+ });
629
+ reply.send({ keys });
630
+ });
631
+
587
632
  // reveal ONE raw gateway key (the "show key" button on the Endpoint page).
588
633
  app.get("/admin/endpoint/keys/:index/reveal", requireAdmin, (req, reply) => {
589
634
  const { index } = req.params as { index: string };
@@ -749,6 +794,13 @@ function endpointPayload(config: Config) {
749
794
  caveman: config.endpoint.caveman,
750
795
  ponytail: config.endpoint.ponytail,
751
796
  headroom: config.endpoint.headroom,
752
- keys: config.server.api_keys.map((k) => ({ key: maskKey(k), name: config.server.key_names?.[k] })),
797
+ keys: config.server.api_keys.map((k) => ({
798
+ key: maskKey(k),
799
+ fingerprint: clientKeyFingerprint(k),
800
+ name: config.server.key_names?.[k],
801
+ models: config.server.key_models?.[k],
802
+ rpm: config.server.key_rpm?.[k],
803
+ expires: config.server.key_expires?.[k],
804
+ })),
753
805
  };
754
806
  }