aigetwey 1.2.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +41 -1
  2. package/README.md +30 -7
  3. package/assets/screenshot.png +0 -0
  4. package/config.example.yaml +0 -1
  5. package/dashboard/src/app/(console)/quota/page.tsx +2 -2
  6. package/dashboard/src/app/layout.tsx +3 -2
  7. package/dashboard/src/components/BudgetForm.tsx +15 -17
  8. package/dashboard/src/components/{QuotaView.tsx → BudgetTracker.tsx} +71 -56
  9. package/dashboard/src/components/CooldownTimer.tsx +1 -1
  10. package/dashboard/src/components/EndpointView.tsx +255 -47
  11. package/dashboard/src/components/LogTable.tsx +36 -26
  12. package/dashboard/src/components/ProviderManager.tsx +3 -28
  13. package/dashboard/src/components/Rail.tsx +1 -1
  14. package/dashboard/src/components/RoutingView.tsx +6 -2
  15. package/dashboard/src/components/TopBar.tsx +1 -1
  16. package/dashboard/src/components/ui.tsx +6 -1
  17. package/dashboard/src/lib/client.ts +6 -5
  18. package/dashboard/src/lib/gateway.ts +24 -16
  19. package/dist/adapters/gemini.js +1 -0
  20. package/dist/adapters/gemini.js.map +1 -1
  21. package/dist/adapters/openai.js +13 -1
  22. package/dist/adapters/openai.js.map +1 -1
  23. package/dist/config.js +86 -23
  24. package/dist/config.js.map +1 -1
  25. package/dist/core/budget.js +1 -1
  26. package/dist/core/budget.js.map +1 -1
  27. package/dist/core/fallback.js +0 -6
  28. package/dist/core/fallback.js.map +1 -1
  29. package/dist/core/handler.js +13 -7
  30. package/dist/core/handler.js.map +1 -1
  31. package/dist/core/keysUsage.js +15 -0
  32. package/dist/core/keysUsage.js.map +1 -0
  33. package/dist/core/ratelimit.js +15 -0
  34. package/dist/core/ratelimit.js.map +1 -0
  35. package/dist/core/state.js +5 -13
  36. package/dist/core/state.js.map +1 -1
  37. package/dist/core/window.js +35 -0
  38. package/dist/core/window.js.map +1 -0
  39. package/dist/db.js +34 -29
  40. package/dist/db.js.map +1 -1
  41. package/dist/routes/admin.js +55 -10
  42. package/dist/routes/admin.js.map +1 -1
  43. package/dist/routes/v1.js +14 -1
  44. package/dist/routes/v1.js.map +1 -1
  45. package/dist/server.js +1 -7
  46. package/dist/server.js.map +1 -1
  47. package/dist/stream/anthropic-stream.js +7 -0
  48. package/dist/stream/anthropic-stream.js.map +1 -1
  49. package/dist/stream/gemini-stream.js +2 -1
  50. package/dist/stream/gemini-stream.js.map +1 -1
  51. package/dist/stream/openai-stream.js +10 -0
  52. package/dist/stream/openai-stream.js.map +1 -1
  53. package/package.json +1 -1
  54. package/src/adapters/gemini.ts +2 -0
  55. package/src/adapters/openai.ts +18 -1
  56. package/src/config.ts +89 -23
  57. package/src/core/budget.ts +1 -1
  58. package/src/core/fallback.ts +0 -9
  59. package/src/core/handler.ts +16 -9
  60. package/src/core/keysUsage.ts +49 -0
  61. package/src/core/ratelimit.ts +25 -0
  62. package/src/core/state.ts +4 -14
  63. package/src/core/window.ts +45 -0
  64. package/src/db.ts +35 -31
  65. package/src/routes/admin.ts +61 -9
  66. package/src/routes/v1.ts +18 -1
  67. package/src/server.ts +1 -8
  68. package/src/stream/anthropic-stream.ts +10 -1
  69. package/src/stream/chunk.ts +2 -0
  70. package/src/stream/gemini-stream.ts +3 -2
  71. package/src/stream/openai-stream.ts +12 -1
  72. package/src/core/quota.ts +0 -253
package/src/config.ts CHANGED
@@ -18,21 +18,9 @@ export { clientKeyFingerprint } from "./middleware/auth.js";
18
18
  // Shape differs from a flat OpenAI gateway: routing lives in a top-level
19
19
  // `models[]` layer (alias -> provider chain), the endpoint block carries the
20
20
  // token-saver toggles, and providers may be free passthroughs or service-account
21
- // backed. The handler/keypool/quota phases read these fields; defining the full
21
+ // backed. The handler/keypool phases read these fields; defining the full
22
22
  // shape up front avoids reshaping config across later phases.
23
23
 
24
- /** Token quota window for a provider — drives the dashboard reset countdown. */
25
- const QuotaSchema = z.object({
26
- window: z.enum(["5h", "daily", "weekly", "monthly"]),
27
- // daily: "HH:MM" local reset; weekly: weekday name ("monday"); others: ignored.
28
- reset_at: z.string().optional(),
29
- timezone: z.string().default("UTC"),
30
- // optional ceiling for a progress bar; quota tracking works without it.
31
- limit_tokens: z.number().int().positive().optional(),
32
- // soft-alert threshold (0..1); UI flags the quota when pct >= this. Default 0.8.
33
- alert_at: z.number().gt(0).lte(1).optional(),
34
- });
35
-
36
24
  const ProviderModelSchema = z.object({
37
25
  id: z.string().min(1),
38
26
  price_in: z.number().nonnegative().optional(),
@@ -59,7 +47,6 @@ const ProviderSchema = z
59
47
  service_account: z.string().optional(),
60
48
  models: z.array(ProviderModelSchema).default([]),
61
49
  headers: z.record(z.string()).optional(),
62
- quota: QuotaSchema.optional(),
63
50
  // when true the provider is skipped in routing (kept in config, like a key's
64
51
  // disabled state but for the whole provider).
65
52
  disabled: z.boolean().optional(),
@@ -120,14 +107,20 @@ const ServerSchema = z
120
107
  // optional friendly label per key, keyed by the key itself. Kept separate so
121
108
  // api_keys stays a plain string[] (auth/masking paths untouched).
122
109
  key_names: z.record(z.string()).optional(),
110
+ // per-key model allowlist (call-strings) + rate limit (req/min), keyed by the
111
+ // raw key like key_names. Absent → unrestricted / unlimited.
112
+ key_models: z.record(z.array(z.string().min(1))).optional(),
113
+ key_rpm: z.record(z.number().int().positive()).optional(),
114
+ // per-key access expiry, epoch ms, keyed by the RAW key. Absent → never expires.
115
+ key_expires: z.record(z.number().int().positive()).optional(),
123
116
  })
124
117
  .default({ host: "127.0.0.1", port: 18080, api_keys: [] });
125
118
 
126
119
  /**
127
120
  * A spend budget scoped to the whole gateway, one provider, or one upstream
128
121
  * model. unit picks what `limit` means — USD cost or total tokens. Soft-alert at
129
- * alert_at (default 0.8), hard-stop at 100%. Window math reuses the quota
130
- * calendar engine. Opt-in: omit / empty list to disable.
122
+ * alert_at (default 0.8), hard-stop at 100%. Each window is a rolling tumbling
123
+ * bucket on the epoch grid (window.ts). Opt-in: omit / empty list to disable.
131
124
  */
132
125
  const BudgetScopeSchema = z.discriminatedUnion("type", [
133
126
  z.object({ type: z.literal("global") }),
@@ -136,13 +129,21 @@ const BudgetScopeSchema = z.discriminatedUnion("type", [
136
129
  z.object({ type: z.literal("key"), id: z.string().min(1) }),
137
130
  ]);
138
131
 
132
+ // rolling windows replaced the old calendar windows; coerce any legacy value so
133
+ // existing config.yaml budgets keep loading (daily→24h, weekly→7day, monthly→30day).
134
+ const LEGACY_WINDOW: Record<string, string> = { daily: "24h", weekly: "7day", monthly: "30day" };
135
+ const WindowSchema = z.preprocess(
136
+ (v) => (typeof v === "string" && v in LEGACY_WINDOW ? LEGACY_WINDOW[v] : v),
137
+ z.enum(["5h", "24h", "7day", "30day"]),
138
+ );
139
+
139
140
  const BudgetSchema = z.object({
140
141
  scope: BudgetScopeSchema,
141
142
  unit: z.enum(["usd", "tokens"]),
142
143
  limit: z.number().positive(),
143
- window: z.enum(["5h", "daily", "weekly", "monthly"]),
144
- reset_at: z.string().optional(),
145
- timezone: z.string().default("UTC"),
144
+ window: WindowSchema,
145
+ // epoch ms the recurring cycle is anchored to; stamped by setBudget on create.
146
+ anchor: z.number().int().nonnegative().optional(),
146
147
  alert_at: z.number().gt(0).lte(1).optional(),
147
148
  // optional free-text label so an operator remembers what a budget is for.
148
149
  note: z.string().max(200).optional(),
@@ -157,7 +158,6 @@ const ConfigSchema = z.object({
157
158
  budgets: z.array(BudgetSchema).default([]),
158
159
  });
159
160
 
160
- export type Quota = z.infer<typeof QuotaSchema>;
161
161
  export type ProviderModel = z.infer<typeof ProviderModelSchema>;
162
162
  export type Provider = z.infer<typeof ProviderSchema>;
163
163
  export type ModelRoute = z.infer<typeof ModelRouteSchema>;
@@ -790,7 +790,7 @@ export function budgetKey(scope: BudgetScope): string {
790
790
  }
791
791
 
792
792
  /** Add a budget, or replace the existing one with the same scope key. */
793
- export function setBudget(config: Config, budget: Budget): Config {
793
+ export function setBudget(config: Config, budget: Budget, now: number = Date.now()): Config {
794
794
  if (budget.scope.type === "provider") {
795
795
  const { id } = budget.scope;
796
796
  if (!config.providers.some((p) => p.id === id)) {
@@ -806,8 +806,16 @@ export function setBudget(config: Config, budget: Budget): Config {
806
806
  const next = cloneConfig(config);
807
807
  const key = budgetKey(budget.scope);
808
808
  const idx = next.budgets.findIndex((b) => budgetKey(b.scope) === key);
809
- if (idx === -1) next.budgets.push(budget);
810
- else next.budgets[idx] = budget;
809
+ if (idx === -1) {
810
+ next.budgets.push({ ...budget, anchor: budget.anchor ?? now });
811
+ } else {
812
+ const prev = next.budgets[idx]!;
813
+ // keep the running cycle on edit (preserve prev anchor as-is, including a
814
+ // legacy undefined = epoch grid, so editing a limit never resets spend);
815
+ // start a fresh cycle only when the window length actually changed.
816
+ const anchor = budget.anchor ?? (prev.window === budget.window ? prev.anchor : now);
817
+ next.budgets[idx] = { ...budget, anchor };
818
+ }
811
819
  return next;
812
820
  }
813
821
 
@@ -854,5 +862,63 @@ export function removeServerKey(config: Config, index: number): Config {
854
862
  if (removed && next.server.key_names && removed in next.server.key_names) {
855
863
  delete next.server.key_names[removed];
856
864
  }
865
+ if (removed && next.server.key_models && removed in next.server.key_models) {
866
+ delete next.server.key_models[removed];
867
+ if (Object.keys(next.server.key_models).length === 0) next.server.key_models = undefined;
868
+ }
869
+ if (removed && next.server.key_rpm && removed in next.server.key_rpm) {
870
+ delete next.server.key_rpm[removed];
871
+ if (Object.keys(next.server.key_rpm).length === 0) next.server.key_rpm = undefined;
872
+ }
873
+ if (removed && next.server.key_expires && removed in next.server.key_expires) {
874
+ delete next.server.key_expires[removed];
875
+ if (Object.keys(next.server.key_expires).length === 0) next.server.key_expires = undefined;
876
+ }
877
+ return next;
878
+ }
879
+
880
+ /**
881
+ * Set or clear a gateway key's scopes (by index, since keys are masked in the
882
+ * API). `models`/`rpm` are each applied only when present in the patch; an empty
883
+ * list or null/0 clears that scope. Empty maps are pruned to undefined.
884
+ */
885
+ export function setServerKeyScope(
886
+ config: Config,
887
+ index: number,
888
+ patch: { models?: string[] | null; rpm?: number | null; expires?: number | null },
889
+ ): Config {
890
+ const next = cloneConfig(config);
891
+ const keys = next.server.api_keys;
892
+ if (index < 0 || index >= keys.length) throw new Error(`no gateway key at index ${index}`);
893
+ const key = keys[index]!;
894
+
895
+ if (patch.models !== undefined) {
896
+ const models = { ...(next.server.key_models ?? {}) };
897
+ const list = (patch.models ?? []).map((m) => m.trim()).filter(Boolean);
898
+ if (list.length > 0) models[key] = list;
899
+ else delete models[key];
900
+ next.server.key_models = Object.keys(models).length > 0 ? models : undefined;
901
+ }
902
+
903
+ if (patch.rpm !== undefined) {
904
+ const rpm = { ...(next.server.key_rpm ?? {}) };
905
+ if (patch.rpm && patch.rpm > 0) rpm[key] = Math.floor(patch.rpm);
906
+ else delete rpm[key];
907
+ next.server.key_rpm = Object.keys(rpm).length > 0 ? rpm : undefined;
908
+ }
909
+
910
+ if (patch.expires !== undefined) {
911
+ const exp = { ...(next.server.key_expires ?? {}) };
912
+ if (patch.expires && patch.expires > 0) exp[key] = Math.floor(patch.expires);
913
+ else delete exp[key];
914
+ next.server.key_expires = Object.keys(exp).length > 0 ? exp : undefined;
915
+ }
916
+
857
917
  return next;
858
918
  }
919
+
920
+ /** True when `rawKey` has an expiry set and `now` is strictly past it. */
921
+ export function isKeyExpired(server: Config["server"], rawKey: string, now: number): boolean {
922
+ const at = server.key_expires?.[rawKey];
923
+ return at !== undefined && now > at;
924
+ }
@@ -8,7 +8,7 @@
8
8
  */
9
9
  import type { Budget, BudgetScope } from "../config.js";
10
10
  import { budgetKey } from "../config.js";
11
- import { currentWindowStart, nextResetAt } from "./quota.js";
11
+ import { currentWindowStart, nextResetAt } from "./window.js";
12
12
 
13
13
  export interface BudgetStatus {
14
14
  scope: BudgetScope;
@@ -32,8 +32,6 @@ export interface FallbackOpts {
32
32
  onAttempt?: (log: AttemptLog) => void;
33
33
  /** which key the pool handed out for the winning attempt (handler uses it for usage). */
34
34
  onServed?: (route: ResolvedRoute, key: string) => void;
35
- /** when set, a provider this returns true for is skipped (quota exhausted). */
36
- isExhausted?: (provider: ResolvedRoute["provider"]) => boolean;
37
35
  /** captured client thinking intent, applied per-attempt in the provider's format. */
38
36
  thinkingIntent?: ThinkingConfig | null;
39
37
  }
@@ -56,13 +54,6 @@ export async function executeWithFallback(
56
54
  for (const route of routes) {
57
55
  const { provider } = route;
58
56
 
59
- // skip a provider whose token budget is spent for this window — like a key
60
- // cooling down, but for the whole provider. Falls through to the next route.
61
- if (opts.isExhausted?.(provider)) {
62
- log({ provider: provider.id, model: route.model, outcome: "skip", detail: "quota exhausted" });
63
- continue;
64
- }
65
-
66
57
  const attempts = provider.max_retries + 1;
67
58
 
68
59
  for (let i = 0; i < attempts; i++) {
@@ -20,7 +20,6 @@ import { parseSSE, encodeSSE } from "../stream/sse.js";
20
20
  import { streamAdapterFor } from "../stream/index.js";
21
21
  import type { CanonicalChunk } from "../stream/chunk.js";
22
22
  import type { KeyPool } from "./keypool.js";
23
- import type { QuotaTracker } from "./quota.js";
24
23
  import { executeWithFallback } from "./fallback.js";
25
24
  import { type UsageDB, computeCost } from "../db.js";
26
25
  import { compressMessages } from "../rtk/index.js";
@@ -50,12 +49,12 @@ export interface HandleDeps {
50
49
  config: GatewayConfig;
51
50
  pool: KeyPool;
52
51
  db?: UsageDB;
53
- quota?: QuotaTracker;
54
52
  budget?: {
55
53
  globalStatus(): { exhausted: boolean; reset_in_ms: number } | null;
56
54
  blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null;
57
55
  blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null;
58
56
  };
57
+ clientKeyModels?: string[];
59
58
  clientKeyFp?: string;
60
59
  log?: (msg: string) => void;
61
60
  now?: () => number;
@@ -71,22 +70,25 @@ function recordUsage(
71
70
  ): void {
72
71
  const tokensIn = usage?.prompt_tokens ?? 0;
73
72
  const tokensOut = usage?.completion_tokens ?? 0;
74
- // count the full request against the served provider's window budget.
75
- deps.quota?.consume(route.provider, tokensIn + tokensOut);
73
+ const reasoningTokens = usage?.reasoning_tokens ?? 0;
74
+ const cachedTokens = usage?.cached_tokens ?? 0;
76
75
  if (!deps.db) return;
77
76
  // Cost: a combo/route may set explicit prices; otherwise fall back to the ported
78
77
  // aigetwey pricing table so cost auto-resolves per model instead of showing $0.
79
78
  const pricing = getPricingForModel(route.provider.id, route.model);
80
79
  const priceIn = route.price_in ?? pricing?.input;
81
80
  const priceOut = route.price_out ?? pricing?.output;
81
+ const priceCachedRead = pricing?.cached;
82
+ const priceReasoning = pricing?.reasoning;
82
83
  deps.db.record({
83
84
  alias: route.alias,
84
85
  provider: route.provider.id,
85
86
  model: route.model,
86
87
  tokens_in: tokensIn,
87
88
  tokens_out: tokensOut,
88
- cached_tokens: usage?.cached_tokens ?? 0,
89
- cost: computeCost(tokensIn, tokensOut, priceIn, priceOut),
89
+ reasoning_tokens: reasoningTokens,
90
+ cached_tokens: cachedTokens,
91
+ cost: computeCost(tokensIn, tokensOut, priceIn, priceOut, priceReasoning, priceCachedRead, cachedTokens, reasoningTokens),
90
92
  status,
91
93
  latency_ms: latencyMs,
92
94
  stream: stream ? 1 : 0,
@@ -118,6 +120,13 @@ export async function handle(
118
120
  // that can't reason. Matches aigetwey's capture-before-translate flow.
119
121
  const { cleanModel, override } = parseSuffix(canonical.model);
120
122
  canonical.model = cleanModel;
123
+
124
+ // per-key allowlist: a key may be restricted to specific call-strings. Empty/
125
+ // absent → unrestricted. Match the literal clean model the client asked for.
126
+ if (deps.clientKeyModels && deps.clientKeyModels.length > 0 && !deps.clientKeyModels.includes(cleanModel)) {
127
+ throw new GatewayError(403, { error: "model not allowed for this key" });
128
+ }
129
+
121
130
  const thinkingIntent: ThinkingConfig | null =
122
131
  override ?? captureThinking(canonical as Record<string, unknown>);
123
132
 
@@ -127,8 +136,7 @@ export async function handle(
127
136
  }
128
137
 
129
138
  // Budget hard-stop. Global overrun fails fast. Provider/model budgets bar the
130
- // matching routes (like the token-quota skip); if every candidate is barred,
131
- // there's nothing to serve → 402.
139
+ // matching routes; if every candidate is barred, there's nothing to serve → 402.
132
140
  if (deps.budget) {
133
141
  const g = deps.budget.globalStatus();
134
142
  if (g?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: g.reset_in_ms });
@@ -193,7 +201,6 @@ export async function handle(
193
201
  stream: wantStream,
194
202
  signal,
195
203
  thinkingIntent,
196
- isExhausted: deps.quota ? (p) => deps.quota!.isExhausted(p) : undefined,
197
204
  onAttempt: (a) =>
198
205
  deps.log?.(`[fallback] ${a.provider}/${a.model} ${a.status ?? "-"} -> ${a.outcome}${a.detail ? ` (${a.detail})` : ""}`),
199
206
  });
@@ -0,0 +1,49 @@
1
+ /**
2
+ * Shapes one row for the Budgets page "Keys" section: a gateway key joined with
3
+ * its all-time spend/tokens, optional expiry, and its key-scoped budget status
4
+ * (null when the key is uncapped). Pure — the admin route feeds it real data.
5
+ */
6
+ import type { BudgetStatus } from "./budget.js";
7
+
8
+ export interface KeyBudgetView {
9
+ unit: "usd" | "tokens";
10
+ limit: number;
11
+ spent: number;
12
+ pct: number;
13
+ window: BudgetStatus["window"];
14
+ reset_in_ms: number;
15
+ exhausted: boolean;
16
+ alert: boolean;
17
+ }
18
+
19
+ export interface KeyUsageRow {
20
+ fingerprint: string;
21
+ name: string;
22
+ masked: string;
23
+ expires?: number;
24
+ spent: number;
25
+ tokens: number;
26
+ budget: KeyBudgetView | null;
27
+ }
28
+
29
+ export function buildKeyUsageRow(input: {
30
+ fingerprint: string;
31
+ name: string;
32
+ masked: string;
33
+ expires?: number;
34
+ totals: { tokens_in: number; tokens_out: number; cost: number };
35
+ budget: BudgetStatus | null;
36
+ }): KeyUsageRow {
37
+ const b = input.budget;
38
+ return {
39
+ fingerprint: input.fingerprint,
40
+ name: input.name,
41
+ masked: input.masked,
42
+ expires: input.expires,
43
+ spent: input.totals.cost,
44
+ tokens: input.totals.tokens_in + input.totals.tokens_out,
45
+ budget: b
46
+ ? { unit: b.unit, limit: b.limit, spent: b.spent, pct: b.pct, window: b.window, reset_in_ms: b.reset_in_ms, exhausted: b.exhausted, alert: b.alert }
47
+ : null,
48
+ };
49
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Per-key request counter on a fixed calendar-minute window. In-memory only —
3
+ * counts reset on restart, which is fine for a 1-minute window. Used to rate-limit
4
+ * gateway keys that opt in via server.key_rpm.
5
+ */
6
+ interface Bucket {
7
+ minute: number;
8
+ count: number;
9
+ }
10
+
11
+ export class RateLimiter {
12
+ private readonly buckets = new Map<string, Bucket>();
13
+
14
+ /** Record a hit for `key`; return true if it now EXCEEDS `limit` this minute. */
15
+ over(key: string, limit: number, now: number = Date.now()): boolean {
16
+ const minute = Math.floor(now / 60_000);
17
+ const b = this.buckets.get(key);
18
+ if (!b || b.minute !== minute) {
19
+ this.buckets.set(key, { minute, count: 1 });
20
+ return 1 > limit;
21
+ }
22
+ b.count += 1;
23
+ return b.count > limit;
24
+ }
25
+ }
package/src/core/state.ts CHANGED
@@ -1,15 +1,13 @@
1
1
  /**
2
- * Mutable holder for the live gateway config, key pool, and quota tracker.
2
+ * Mutable holder for the live gateway config, key pool, and budget tracker.
3
3
  *
4
4
  * Config loads once at boot, but the dashboard edits it at runtime. Routes read
5
- * `state.config` / `state.pool` / `state.quota` fresh per request (never close
5
+ * `state.config` / `state.pool` / `state.budget` fresh per request (never close
6
6
  * over them), so a successful reload swaps in the new config + pool atomically —
7
7
  * no restart.
8
8
  *
9
9
  * reload() validates and persists BEFORE swapping: an invalid edit throws and
10
- * the old config keeps serving. The pool is rebuilt (cooldown is transient), but
11
- * the quota tracker is KEPT across reloads — a budget consumed this window must
12
- * survive a config edit, else editing config would silently reset every quota.
10
+ * the old config keeps serving. The pool is rebuilt (cooldown is transient).
13
11
  */
14
12
  import {
15
13
  GatewayConfig,
@@ -21,7 +19,6 @@ import {
21
19
  } from "../config.js";
22
20
  import { clientKeyFingerprint } from "../middleware/auth.js";
23
21
  import { KeyPool } from "./keypool.js";
24
- import { QuotaTracker } from "./quota.js";
25
22
  import { BudgetTracker } from "./budget.js";
26
23
 
27
24
  function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string, string> }, fp: string): string {
@@ -34,18 +31,15 @@ function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string,
34
31
  export class GatewayState {
35
32
  private _config: GatewayConfig;
36
33
  private _pool: KeyPool;
37
- private readonly _quota: QuotaTracker;
38
34
  private readonly _budget: BudgetTracker;
39
35
 
40
36
  constructor(
41
37
  private readonly configPath: string,
42
38
  initial: GatewayConfig,
43
- quota?: QuotaTracker,
44
39
  budgetDb?: { totals(since: number, filter?: { provider?: string; model?: string; client_key?: string }): { tokens_in: number; tokens_out: number; cost: number } },
45
40
  ) {
46
41
  this._config = initial;
47
42
  this._pool = new KeyPool();
48
- this._quota = quota ?? new QuotaTracker();
49
43
  this._budget = new BudgetTracker(
50
44
  () => this._config.raw.budgets,
51
45
  budgetDb ?? { totals: () => ({ tokens_in: 0, tokens_out: 0, cost: 0 }) },
@@ -63,10 +57,6 @@ export class GatewayState {
63
57
  return this._pool;
64
58
  }
65
59
 
66
- get quota(): QuotaTracker {
67
- return this._quota;
68
- }
69
-
70
60
  get budget(): BudgetTracker {
71
61
  return this._budget;
72
62
  }
@@ -75,7 +65,7 @@ export class GatewayState {
75
65
  * Validate edited config text, restore masked secrets from the live config,
76
66
  * persist atomically, then swap in a fresh config + pool. Throws without
77
67
  * changing anything if validation fails or a masked key can't be resolved —
78
- * the old config keeps serving. The quota tracker is intentionally preserved.
68
+ * the old config keeps serving.
79
69
  */
80
70
  reload(text: string): void {
81
71
  const parsed = parseConfigText(text);
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Rolling-window engine: every budget window is a fixed-duration tumbling bucket
3
+ * aligned to the epoch grid (no calendar/timezone math). `5h` resets every five
4
+ * hours, `24h` daily, `7day` weekly, `30day` monthly — each on a rolling grid
5
+ * rather than a calendar boundary. Shared by the budget tracker.
6
+ */
7
+ const HOUR_MS = 3600_000;
8
+ const DAY_MS = 24 * HOUR_MS;
9
+
10
+ export type WindowName = "5h" | "24h" | "7day" | "30day";
11
+
12
+ export type WindowSpec = {
13
+ window: WindowName;
14
+ /** Epoch ms the recurring cycle is anchored to. Absent ⇒ epoch-grid (legacy). */
15
+ anchor?: number;
16
+ };
17
+
18
+ const DURATION_MS: Record<WindowName, number> = {
19
+ "5h": 5 * HOUR_MS,
20
+ "24h": 24 * HOUR_MS,
21
+ "7day": 7 * DAY_MS,
22
+ "30day": 30 * DAY_MS,
23
+ };
24
+
25
+ /** Length (ms) of one window bucket. */
26
+ export function windowDuration(spec: WindowSpec): number {
27
+ return DURATION_MS[spec.window];
28
+ }
29
+
30
+ /** Epoch ms of the START of the bucket containing `now`. Anchored to `spec.anchor`
31
+ * when present (cycles tumble from the anchor); otherwise floored to the epoch grid. */
32
+ export function currentWindowStart(spec: WindowSpec, now: number): number {
33
+ const dur = DURATION_MS[spec.window];
34
+ if (spec.anchor === undefined) return Math.floor(now / dur) * dur;
35
+ if (now <= spec.anchor) return spec.anchor;
36
+ return spec.anchor + Math.floor((now - spec.anchor) / dur) * dur;
37
+ }
38
+
39
+ /** Next reset instant: the end of the current bucket (windowStart + duration). */
40
+ export function nextResetAt(spec: WindowSpec, windowStart: number, _now: number): number {
41
+ return windowStart + DURATION_MS[spec.window];
42
+ }
43
+
44
+ // `DAY_MS` is exported for any future window math that needs a day constant.
45
+ export { DAY_MS };
package/src/db.ts CHANGED
@@ -22,6 +22,7 @@ export interface UsageRow {
22
22
  model: string;
23
23
  tokens_in: number;
24
24
  tokens_out: number;
25
+ reasoning_tokens: number;
25
26
  cached_tokens: number;
26
27
  cost: number;
27
28
  status: number;
@@ -67,7 +68,6 @@ export class UsageDB {
67
68
  private readonly db: DatabaseSync;
68
69
  private readonly insertUsage;
69
70
  private readonly insertLog;
70
- private readonly upsertQuota;
71
71
  private readonly now: () => number;
72
72
 
73
73
  constructor(path: string, now: () => number = Date.now) {
@@ -82,6 +82,7 @@ export class UsageDB {
82
82
  model TEXT NOT NULL,
83
83
  tokens_in INTEGER NOT NULL DEFAULT 0,
84
84
  tokens_out INTEGER NOT NULL DEFAULT 0,
85
+ reasoning_tokens INTEGER NOT NULL DEFAULT 0,
85
86
  cached_tokens INTEGER NOT NULL DEFAULT 0,
86
87
  cost REAL NOT NULL DEFAULT 0,
87
88
  status INTEGER NOT NULL,
@@ -112,25 +113,21 @@ export class UsageDB {
112
113
  if (!cols.some((c) => String(c.name) === "client_key")) {
113
114
  this.db.exec(`ALTER TABLE usage ADD COLUMN client_key TEXT NOT NULL DEFAULT ''`);
114
115
  }
116
+ if (!cols.some((c) => String(c.name) === "reasoning_tokens")) {
117
+ this.db.exec(`ALTER TABLE usage ADD COLUMN reasoning_tokens INTEGER NOT NULL DEFAULT 0`);
118
+ }
115
119
  this.now = now;
116
120
  this.insertUsage = this.db.prepare(`
117
- INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, cached_tokens, cost, status, latency_ms, stream, client_key)
118
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
121
+ INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, reasoning_tokens, cached_tokens, cost, status, latency_ms, stream, client_key)
122
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
119
123
  `);
120
124
  this.insertLog = this.db.prepare(`
121
125
  INSERT INTO logs (ts, direction, provider, status, request_summary, response_summary)
122
126
  VALUES (?, ?, ?, ?, ?, ?)
123
127
  `);
124
- // upsert keyed on provider_id so each provider keeps one live window row.
125
- this.upsertQuota = this.db.prepare(`
126
- INSERT INTO quota_state (provider_id, window_start, consumed, last_reset)
127
- VALUES (?, ?, ?, ?)
128
- ON CONFLICT(provider_id) DO UPDATE SET window_start = excluded.window_start,
129
- consumed = excluded.consumed, last_reset = excluded.last_reset
130
- `);
131
128
  }
132
129
 
133
- record(row: Omit<UsageRow, "ts" | "client_key"> & { ts?: number; client_key?: string }): void {
130
+ record(row: Omit<UsageRow, "ts" | "client_key" | "reasoning_tokens"> & { ts?: number; client_key?: string; reasoning_tokens?: number }): void {
134
131
  this.insertUsage.run(
135
132
  row.ts ?? this.now(),
136
133
  row.alias,
@@ -138,6 +135,7 @@ export class UsageDB {
138
135
  row.model,
139
136
  row.tokens_in,
140
137
  row.tokens_out,
138
+ row.reasoning_tokens ?? 0,
141
139
  row.cached_tokens,
142
140
  row.cost,
143
141
  row.status,
@@ -276,7 +274,7 @@ export class UsageDB {
276
274
  recent(limit = 100): UsageRow[] {
277
275
  const rows = this.db
278
276
  .prepare(
279
- `SELECT ts, alias, provider, model, tokens_in, tokens_out, cached_tokens,
277
+ `SELECT ts, alias, provider, model, tokens_in, tokens_out, reasoning_tokens, cached_tokens,
280
278
  cost, status, latency_ms, stream, client_key
281
279
  FROM usage ORDER BY id DESC LIMIT ?`,
282
280
  )
@@ -288,6 +286,7 @@ export class UsageDB {
288
286
  model: String(r.model),
289
287
  tokens_in: num(r.tokens_in),
290
288
  tokens_out: num(r.tokens_out),
289
+ reasoning_tokens: num(r.reasoning_tokens),
291
290
  cached_tokens: num(r.cached_tokens),
292
291
  cost: num(r.cost),
293
292
  status: num(r.status),
@@ -297,29 +296,34 @@ export class UsageDB {
297
296
  }));
298
297
  }
299
298
 
300
- // ---- QuotaStore: one live window row per provider (survives restart) ----
301
-
302
- loadQuota(): Array<{ provider_id: string; window_start: number; consumed: number }> {
303
- const rows = this.db.prepare(`SELECT provider_id, window_start, consumed FROM quota_state`).all() as SqlRow[];
304
- return rows.map((r) => ({
305
- provider_id: String(r.provider_id),
306
- window_start: num(r.window_start),
307
- consumed: num(r.consumed),
308
- }));
299
+ close(): void {
300
+ this.db.close();
309
301
  }
302
+ }
303
+
304
+ /** Compute USD cost from token counts and per-1M prices. Separate rates for input (non-cache), cache_read, output, reasoning. */
305
+ export function computeCost(tokensIn: number, tokensOut: number, priceIn?: number, priceOut?: number, priceReasoning?: number, priceCachedRead?: number, cachedTokens?: number, reasoningTokens?: number): number {
306
+ let cost = 0;
310
307
 
311
- saveQuota(providerId: string, windowStart: number, consumed: number): void {
312
- this.upsertQuota.run(providerId, windowStart, consumed, this.now());
308
+ // Non-cached input (input minus cache_read)
309
+ const nonCachedInput = Math.max(0, tokensIn - (cachedTokens ?? 0));
310
+ if (priceIn) cost += (nonCachedInput / 1_000_000) * priceIn;
311
+
312
+ // Cached read — uses separate rate or falls back to input rate
313
+ if (cachedTokens && priceCachedRead) {
314
+ cost += (cachedTokens / 1_000_000) * priceCachedRead;
315
+ } else if (cachedTokens && priceIn) {
316
+ cost += (cachedTokens / 1_000_000) * priceIn;
313
317
  }
314
318
 
315
- close(): void {
316
- this.db.close();
319
+ // Output completion
320
+ if (priceOut) cost += (tokensOut / 1_000_000) * priceOut;
321
+
322
+ // Reasoning tokens — uses dedicated rate or falls back to output rate
323
+ if (reasoningTokens) {
324
+ if (priceReasoning) cost += (reasoningTokens / 1_000_000) * priceReasoning;
325
+ else if (priceOut) cost += (reasoningTokens / 1_000_000) * priceOut;
317
326
  }
318
- }
319
327
 
320
- /** Compute USD cost from token counts and per-1M prices. */
321
- export function computeCost(tokensIn: number, tokensOut: number, priceIn?: number, priceOut?: number): number {
322
- const ci = priceIn ? (tokensIn / 1_000_000) * priceIn : 0;
323
- const co = priceOut ? (tokensOut / 1_000_000) * priceOut : 0;
324
- return ci + co;
328
+ return cost;
325
329
  }