aigetwey 1.2.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/CHANGELOG.md +41 -1
  2. package/README.md +30 -7
  3. package/assets/screenshot.png +0 -0
  4. package/config.example.yaml +0 -1
  5. package/dashboard/src/app/(console)/quota/page.tsx +2 -2
  6. package/dashboard/src/app/layout.tsx +3 -2
  7. package/dashboard/src/components/BudgetForm.tsx +15 -17
  8. package/dashboard/src/components/{QuotaView.tsx → BudgetTracker.tsx} +71 -56
  9. package/dashboard/src/components/CooldownTimer.tsx +1 -1
  10. package/dashboard/src/components/EndpointView.tsx +255 -47
  11. package/dashboard/src/components/LogTable.tsx +36 -26
  12. package/dashboard/src/components/ProviderManager.tsx +3 -28
  13. package/dashboard/src/components/Rail.tsx +1 -1
  14. package/dashboard/src/components/RoutingView.tsx +6 -2
  15. package/dashboard/src/components/TopBar.tsx +1 -1
  16. package/dashboard/src/components/ui.tsx +6 -1
  17. package/dashboard/src/lib/client.ts +6 -5
  18. package/dashboard/src/lib/gateway.ts +24 -16
  19. package/dist/adapters/gemini.js +1 -0
  20. package/dist/adapters/gemini.js.map +1 -1
  21. package/dist/adapters/openai.js +13 -1
  22. package/dist/adapters/openai.js.map +1 -1
  23. package/dist/config.js +86 -23
  24. package/dist/config.js.map +1 -1
  25. package/dist/core/budget.js +1 -1
  26. package/dist/core/budget.js.map +1 -1
  27. package/dist/core/fallback.js +0 -6
  28. package/dist/core/fallback.js.map +1 -1
  29. package/dist/core/handler.js +13 -7
  30. package/dist/core/handler.js.map +1 -1
  31. package/dist/core/keysUsage.js +15 -0
  32. package/dist/core/keysUsage.js.map +1 -0
  33. package/dist/core/ratelimit.js +15 -0
  34. package/dist/core/ratelimit.js.map +1 -0
  35. package/dist/core/state.js +5 -13
  36. package/dist/core/state.js.map +1 -1
  37. package/dist/core/window.js +35 -0
  38. package/dist/core/window.js.map +1 -0
  39. package/dist/db.js +34 -29
  40. package/dist/db.js.map +1 -1
  41. package/dist/routes/admin.js +55 -10
  42. package/dist/routes/admin.js.map +1 -1
  43. package/dist/routes/v1.js +14 -1
  44. package/dist/routes/v1.js.map +1 -1
  45. package/dist/server.js +1 -7
  46. package/dist/server.js.map +1 -1
  47. package/dist/stream/anthropic-stream.js +7 -0
  48. package/dist/stream/anthropic-stream.js.map +1 -1
  49. package/dist/stream/gemini-stream.js +2 -1
  50. package/dist/stream/gemini-stream.js.map +1 -1
  51. package/dist/stream/openai-stream.js +10 -0
  52. package/dist/stream/openai-stream.js.map +1 -1
  53. package/package.json +1 -1
  54. package/src/adapters/gemini.ts +2 -0
  55. package/src/adapters/openai.ts +18 -1
  56. package/src/config.ts +89 -23
  57. package/src/core/budget.ts +1 -1
  58. package/src/core/fallback.ts +0 -9
  59. package/src/core/handler.ts +16 -9
  60. package/src/core/keysUsage.ts +49 -0
  61. package/src/core/ratelimit.ts +25 -0
  62. package/src/core/state.ts +4 -14
  63. package/src/core/window.ts +45 -0
  64. package/src/db.ts +35 -31
  65. package/src/routes/admin.ts +61 -9
  66. package/src/routes/v1.ts +18 -1
  67. package/src/server.ts +1 -8
  68. package/src/stream/anthropic-stream.ts +10 -1
  69. package/src/stream/chunk.ts +2 -0
  70. package/src/stream/gemini-stream.ts +3 -2
  71. package/src/stream/openai-stream.ts +12 -1
  72. package/src/core/quota.ts +0 -253
@@ -16,6 +16,7 @@ import type { FastifyInstance, FastifyRequest, FastifyReply } from "fastify";
16
16
  import type { GatewayState } from "../core/state.js";
17
17
  import type { UsageDB } from "../db.js";
18
18
  import { checkAdminAuth, clientKeyFingerprint, type AdminVerifier } from "../middleware/auth.js";
19
+ import { buildKeyUsageRow } from "../core/keysUsage.js";
19
20
  import {
20
21
  maskKey,
21
22
  serializeConfig,
@@ -44,6 +45,7 @@ import {
44
45
  addServerKey,
45
46
  editServerKey,
46
47
  removeServerKey,
48
+ setServerKeyScope,
47
49
  setBudget,
48
50
  clearBudget,
49
51
  type Config,
@@ -87,6 +89,23 @@ function maskedConfig(config: Config): Config {
87
89
  Object.entries(clone.server.key_names).map(([k, name]) => [maskKey(k), name]),
88
90
  );
89
91
  }
92
+ // key_models / key_rpm are keyed by the RAW key — re-key to the masked form so
93
+ // real keys never leak through /admin/config.
94
+ if (clone.server.key_models) {
95
+ clone.server.key_models = Object.fromEntries(
96
+ Object.entries(clone.server.key_models).map(([k, v]) => [maskKey(k), v]),
97
+ );
98
+ }
99
+ if (clone.server.key_rpm) {
100
+ clone.server.key_rpm = Object.fromEntries(
101
+ Object.entries(clone.server.key_rpm).map(([k, v]) => [maskKey(k), v]),
102
+ );
103
+ }
104
+ if (clone.server.key_expires) {
105
+ clone.server.key_expires = Object.fromEntries(
106
+ Object.entries(clone.server.key_expires).map(([k, v]) => [maskKey(k), v]),
107
+ );
108
+ }
90
109
  return clone;
91
110
  }
92
111
 
@@ -143,12 +162,9 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
143
162
  reply.send({ providers: deps.state.pool.snapshot(deps.state.config.listProviders()) });
144
163
  });
145
164
 
146
- // per-provider quota: consumed, limit, and ms until the next scheduled reset.
147
- app.get("/admin/quota", requireAdmin, (_req, reply) => {
148
- reply.send({
149
- quota: deps.state.quota.snapshot(deps.state.config.listProviders()),
150
- budgets: deps.state.budget.statuses(),
151
- });
165
+ // budget statuses: consumed, limit, and ms until the next scheduled reset.
166
+ app.get("/admin/budgets", requireAdmin, (_req, reply) => {
167
+ reply.send({ budgets: deps.state.budget.statuses() });
152
168
  });
153
169
 
154
170
  // add or replace a budget (keyed by scope). Body = Budget; invalid shape or an
@@ -403,7 +419,7 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
403
419
  });
404
420
 
405
421
  // Test ONE model end-to-end (aigetwey's per-model science button). Routes through
406
- // the real pipeline via handle(), so the ping lands in usage/quota exactly like
422
+ // the real pipeline via handle(), so the ping lands in usage exactly like
407
423
  // a normal call — and it catches "model not found / not entitled" a /models
408
424
  // ping can't. Model id travels as ?model= to survive slashes through the proxy.
409
425
  app.post("/admin/providers/:id/models/test", requireAdmin, async (req, reply) => {
@@ -414,7 +430,7 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
414
430
  if (!provider) return reply.code(404).send({ error: `provider "${id}" not found` });
415
431
  try {
416
432
  await handle(
417
- { config: deps.state.config, pool: deps.state.pool, db: deps.db, quota: deps.state.quota },
433
+ { config: deps.state.config, pool: deps.state.pool, db: deps.db },
418
434
  "openai",
419
435
  { model: `${id}/${modelId}`, messages: [{ role: "user", content: "ping" }], max_tokens: 1, stream: false },
420
436
  );
@@ -564,6 +580,15 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
564
580
  applyMutation(reply, (c) => editServerKey(c, i, { name: b?.name }));
565
581
  });
566
582
 
583
+ // set/clear ONE gateway key's scopes (model allowlist + rpm), by index.
584
+ app.put("/admin/endpoint/keys/:index/scope", requireAdmin, (req, reply) => {
585
+ const { index } = req.params as { index: string };
586
+ const i = Number(index);
587
+ if (!Number.isInteger(i)) return reply.code(400).send({ error: "index must be an integer" });
588
+ const b = (req.body ?? {}) as { models?: string[]; rpm?: number | null; expires?: number | null };
589
+ applyMutation(reply, (c) => setServerKeyScope(c, i, { models: b.models, rpm: b.rpm, expires: b.expires }));
590
+ });
591
+
567
592
  app.delete("/admin/endpoint/keys/:index", requireAdmin, (req, reply) => {
568
593
  const { index } = req.params as { index: string };
569
594
  const i = Number(index);
@@ -584,6 +609,26 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
584
609
  );
585
610
  });
586
611
 
612
+ // per-key spend for the Budgets page "Keys" section: every gateway key, its
613
+ // all-time usage, expiry, and key-scoped budget status (null when uncapped).
614
+ app.get("/admin/keys/usage", requireAdmin, (_req, reply) => {
615
+ if (!deps.db) return reply.code(503).send({ error: "usage tracking disabled" });
616
+ const cfg = deps.state.config.raw;
617
+ const statuses = deps.state.budget.statuses();
618
+ const keys = cfg.server.api_keys.map((k) => {
619
+ const fp = clientKeyFingerprint(k);
620
+ return buildKeyUsageRow({
621
+ fingerprint: fp,
622
+ name: cfg.server.key_names?.[k] ?? maskKey(k),
623
+ masked: maskKey(k),
624
+ expires: cfg.server.key_expires?.[k],
625
+ totals: deps.db!.totals(0, { client_key: fp }),
626
+ budget: statuses.find((s) => s.scope.type === "key" && s.scope.id === fp) ?? null,
627
+ });
628
+ });
629
+ reply.send({ keys });
630
+ });
631
+
587
632
  // reveal ONE raw gateway key (the "show key" button on the Endpoint page).
588
633
  app.get("/admin/endpoint/keys/:index/reveal", requireAdmin, (req, reply) => {
589
634
  const { index } = req.params as { index: string };
@@ -749,6 +794,13 @@ function endpointPayload(config: Config) {
749
794
  caveman: config.endpoint.caveman,
750
795
  ponytail: config.endpoint.ponytail,
751
796
  headroom: config.endpoint.headroom,
752
- keys: config.server.api_keys.map((k) => ({ key: maskKey(k), name: config.server.key_names?.[k] })),
797
+ keys: config.server.api_keys.map((k) => ({
798
+ key: maskKey(k),
799
+ fingerprint: clientKeyFingerprint(k),
800
+ name: config.server.key_names?.[k],
801
+ models: config.server.key_models?.[k],
802
+ rpm: config.server.key_rpm?.[k],
803
+ expires: config.server.key_expires?.[k],
804
+ })),
753
805
  };
754
806
  }
package/src/routes/v1.ts CHANGED
@@ -1,9 +1,11 @@
1
1
  import type { FastifyInstance, FastifyReply, FastifyRequest } from "fastify";
2
2
  import { checkAuth, extractKey, clientKeyFingerprint } from "../middleware/auth.js";
3
+ import { isKeyExpired } from "../config.js";
3
4
  import type { GatewayState } from "../core/state.js";
4
5
  import { handle, GatewayError, type HandleDeps } from "../core/handler.js";
5
6
  import type { WireFormat } from "../core/canonical.js";
6
7
  import type { UsageDB } from "../db.js";
8
+ import { RateLimiter } from "../core/ratelimit.js";
7
9
 
8
10
  /**
9
11
  * /v1 proxy surface. Auth-gates on the gateway's own keys (read from state each
@@ -11,6 +13,8 @@ import type { UsageDB } from "../db.js";
11
13
  * pipeline (non-stream JSON or SSE stream).
12
14
  */
13
15
  export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?: UsageDB): void {
16
+ const limiter = new RateLimiter();
17
+
14
18
  const requireAuth = {
15
19
  preHandler: (req: FastifyRequest, reply: FastifyReply, done: (err?: Error) => void) => {
16
20
  const res = checkAuth(req, state.config.server.api_keys);
@@ -18,6 +22,19 @@ export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?:
18
22
  reply.code(res.status ?? 401).send({ error: res.error });
19
23
  return; // skip done() to short-circuit the route
20
24
  }
25
+
26
+ const presented = extractKey(req);
27
+ if (presented && isKeyExpired(state.config.server, presented, Date.now())) {
28
+ reply.code(403).send({ error: "key expired" });
29
+ return; // short-circuit
30
+ }
31
+
32
+ const rpm = presented ? state.config.server.key_rpm?.[presented] : undefined;
33
+ if (presented && rpm && limiter.over(clientKeyFingerprint(presented), rpm)) {
34
+ reply.code(429).send({ error: "rate limit exceeded" });
35
+ return; // short-circuit
36
+ }
37
+
21
38
  done();
22
39
  },
23
40
  };
@@ -28,9 +45,9 @@ export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?:
28
45
  return {
29
46
  config: state.config,
30
47
  pool: state.pool,
31
- quota: state.quota,
32
48
  budget: state.budget,
33
49
  db,
50
+ clientKeyModels: presented ? state.config.server.key_models?.[presented] : undefined,
34
51
  clientKeyFp: presented ? clientKeyFingerprint(presented) : undefined,
35
52
  log: (msg) => app.log.info(msg),
36
53
  };
package/src/server.ts CHANGED
@@ -4,7 +4,6 @@ import { loadConfig } from "./config.js";
4
4
  import { registerRoutes } from "./routes/index.js";
5
5
  import { GatewayState } from "./core/state.js";
6
6
  import { UsageDB } from "./db.js";
7
- import { QuotaTracker } from "./core/quota.js";
8
7
  import { AuthStore } from "./core/authStore.js";
9
8
  import { consoleBuffer } from "./core/console-buffer.js";
10
9
 
@@ -52,14 +51,8 @@ async function main(): Promise<void> {
52
51
  const dataDir = resolve(process.env.AIGETWEY_DATA_DIR ?? "data");
53
52
  const db = new UsageDB(join(dataDir, "usage.sqlite"));
54
53
 
55
- // quota counts persist via the DB so a restart within a window keeps the budget.
56
- const quota = new QuotaTracker(Date.now, {
57
- load: () => db.loadQuota(),
58
- save: (id, start, consumed) => db.saveQuota(id, start, consumed),
59
- });
60
-
61
54
  // holder enables runtime config edits (hot-reload) from the dashboard.
62
- const state = new GatewayState(configPath, config, quota, db);
55
+ const state = new GatewayState(configPath, config, db);
63
56
  // admin password lives in a hash store (seeded from the env on first run,
64
57
  // changeable at runtime from the dashboard).
65
58
  const auth = AuthStore.open(dataDir, process.env.AIGETWEY_ADMIN_PASSWORD);
@@ -25,8 +25,10 @@ interface AnthStreamState {
25
25
  toolIndexByBlock: Map<number, number>;
26
26
  nextToolIndex: number;
27
27
  promptTokens: number;
28
+ completionTokens: number;
28
29
  cachedTokens?: number;
29
30
  cacheCreationTokens?: number;
31
+ reasoningTokens?: number;
30
32
  }
31
33
 
32
34
  export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): AsyncGenerator<CanonicalChunk> {
@@ -36,6 +38,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
36
38
  toolIndexByBlock: new Map(),
37
39
  nextToolIndex: 0,
38
40
  promptTokens: 0,
41
+ completionTokens: 0,
39
42
  };
40
43
 
41
44
  for await (const ev of events) {
@@ -60,14 +63,18 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
60
63
  const u = message?.usage;
61
64
  if (u) {
62
65
  state.promptTokens = u.input_tokens ?? 0;
66
+ state.completionTokens = u.output_tokens ?? 0;
67
+ state.reasoningTokens = u.thinking_tokens ?? 0;
63
68
  state.cachedTokens = u.cache_read_input_tokens;
64
69
  state.cacheCreationTokens = u.cache_creation_input_tokens;
65
70
  }
66
71
  const startChunk = baseChunk(state, { role: "assistant", content: "" }, null);
67
72
  startChunk.usage = {
68
73
  prompt_tokens: state.promptTokens,
74
+ completion_tokens: state.completionTokens,
69
75
  cached_tokens: state.cachedTokens,
70
76
  cache_creation_tokens: state.cacheCreationTokens,
77
+ reasoning_tokens: state.reasoningTokens,
71
78
  };
72
79
  yield startChunk;
73
80
  break;
@@ -99,6 +106,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
99
106
  if (delta?.type === "text_delta") {
100
107
  yield baseChunk(state, { content: delta.text ?? "" }, null);
101
108
  } else if (delta?.type === "thinking_delta") {
109
+ state.reasoningTokens = (state.reasoningTokens ?? 0) + 1;
102
110
  yield baseChunk(state, { reasoning: delta.thinking ?? "" }, null);
103
111
  } else if (delta?.type === "input_json_delta") {
104
112
  const toolIndex = state.toolIndexByBlock.get(index);
@@ -115,7 +123,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
115
123
 
116
124
  case "message_delta": {
117
125
  const delta = msg.delta as { stop_reason?: string | null } | undefined;
118
- const usage = msg.usage as { output_tokens?: number } | undefined;
126
+ const usage = msg.usage as { output_tokens?: number; thinking_tokens?: number } | undefined;
119
127
  const finish = mapStopReason(delta?.stop_reason);
120
128
  const chunk = baseChunk(state, {}, finish);
121
129
  chunk.usage = {
@@ -123,6 +131,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
123
131
  completion_tokens: usage?.output_tokens ?? 0,
124
132
  cached_tokens: state.cachedTokens,
125
133
  cache_creation_tokens: state.cacheCreationTokens,
134
+ reasoning_tokens: usage?.thinking_tokens ?? state.reasoningTokens ?? 0,
126
135
  };
127
136
  yield chunk;
128
137
  break;
@@ -29,6 +29,8 @@ export interface CanonicalChunkUsage {
29
29
  cached_tokens?: number;
30
30
  cache_creation_tokens?: number;
31
31
  reasoning_tokens?: number;
32
+ prompt_tokens_details?: { cached_tokens?: number };
33
+ completion_tokens_details?: { reasoning_tokens?: number };
32
34
  }
33
35
 
34
36
  export type ChunkFinishReason = "stop" | "length" | "tool_calls" | "content_filter" | null;
@@ -76,7 +76,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
76
76
  }
77
77
 
78
78
  const usageMetadata = msg.usageMetadata as
79
- | { promptTokenCount?: number; candidatesTokenCount?: number; cachedContentTokenCount?: number }
79
+ | { promptTokenCount?: number; candidatesTokenCount?: number; cachedContentTokenCount?: number; thoughtsTokenCount?: number }
80
80
  | undefined;
81
81
  if (cand?.finishReason || usageMetadata) {
82
82
  const chunk = base({}, mapFinish(cand?.finishReason));
@@ -84,7 +84,8 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
84
84
  chunk.usage = {
85
85
  prompt_tokens: usageMetadata.promptTokenCount ?? 0,
86
86
  completion_tokens: usageMetadata.candidatesTokenCount ?? 0,
87
- cached_tokens: usageMetadata.cachedContentTokenCount,
87
+ cached_tokens: usageMetadata.cachedContentTokenCount ?? 0,
88
+ reasoning_tokens: usageMetadata.thoughtsTokenCount ?? 0,
88
89
  };
89
90
  }
90
91
  yield chunk;
@@ -24,12 +24,23 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
24
24
  /** Lift vendor reasoning fields into the canonical `delta.reasoning`. */
25
25
  function normalize(chunk: CanonicalChunk): CanonicalChunk {
26
26
  for (const choice of chunk.choices ?? []) {
27
- const d = choice.delta as Record<string, unknown> & { reasoning?: string };
27
+ const d = choice.delta as (Record<string, unknown> & { reasoning?: string }) | undefined;
28
+ if (!d) continue;
28
29
  if (d.reasoning === undefined) {
29
30
  const vendor = (d["reasoning_content"] as string | undefined) ?? (d["reasoning"] as string | undefined);
30
31
  if (vendor) d.reasoning = vendor;
31
32
  }
32
33
  }
34
+
35
+ // Extract reasoning_tokens from OpenAI response.usage.completion_tokens_details.reasoning_tokens
36
+ if (chunk.usage?.completion_tokens_details?.reasoning_tokens !== undefined) {
37
+ chunk.usage.reasoning_tokens = chunk.usage.completion_tokens_details.reasoning_tokens;
38
+ }
39
+ // Extract cached_tokens from OpenAI response.usage.prompt_tokens_details.cached_tokens
40
+ if (chunk.usage?.prompt_tokens_details?.cached_tokens !== undefined) {
41
+ chunk.usage.cached_tokens = chunk.usage.prompt_tokens_details.cached_tokens;
42
+ }
43
+
33
44
  return chunk;
34
45
  }
35
46
 
package/src/core/quota.ts DELETED
@@ -1,253 +0,0 @@
1
- /**
2
- * Per-provider token quota tracking with scheduled window resets.
3
- *
4
- * Distinct from the key-pool cooldown: a cooldown is a transient penalty after a
5
- * 429; a quota is a budget that refills on a schedule (a 5-hour rolling window, a
6
- * daily/weekly/monthly calendar boundary). When a provider's `limit_tokens` is
7
- * reached before its window resets, routing skips it — like a key that's cooling
8
- * down, but for the whole provider.
9
- *
10
- * State is in-memory, optionally persisted so counts survive a restart within
11
- * the same window. Calendar boundaries are computed in the provider's timezone.
12
- */
13
- import type { Provider, Quota } from "../config.js";
14
-
15
- const HOUR_MS = 3600_000;
16
- const DAY_MS = 24 * HOUR_MS;
17
-
18
- const WEEKDAYS = ["sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"];
19
-
20
- /** Optional persistence hook so counts survive a restart within a window. */
21
- export interface QuotaStore {
22
- load(): Array<{ provider_id: string; window_start: number; consumed: number }>;
23
- save(providerId: string, windowStart: number, consumed: number): void;
24
- }
25
-
26
- interface QuotaState {
27
- windowStart: number;
28
- consumed: number;
29
- }
30
-
31
- export interface QuotaSnapshot {
32
- provider: string;
33
- window: Quota["window"];
34
- consumed: number;
35
- limit_tokens?: number;
36
- /** ms until the next scheduled reset */
37
- reset_in_ms: number;
38
- /** 0..1 fraction of the limit used, if a limit is set */
39
- pct?: number;
40
- exhausted: boolean;
41
- /** true when a limit is set and pct >= the quota's alert_at (default 0.8) */
42
- alert: boolean;
43
- }
44
-
45
- // ---- timezone-aware calendar math -----------------------------------------
46
-
47
- /** Wall-clock offset (ms) of `tz` at instant `date`: tzWallAsUTC - actualUTC. */
48
- function tzOffsetMs(date: Date, tz: string): number {
49
- const dtf = new Intl.DateTimeFormat("en-US", {
50
- timeZone: tz,
51
- hourCycle: "h23",
52
- year: "numeric",
53
- month: "2-digit",
54
- day: "2-digit",
55
- hour: "2-digit",
56
- minute: "2-digit",
57
- second: "2-digit",
58
- });
59
- const parts = Object.fromEntries(dtf.formatToParts(date).map((p) => [p.type, p.value]));
60
- const asUTC = Date.UTC(
61
- Number(parts.year),
62
- Number(parts.month) - 1,
63
- Number(parts.day),
64
- Number(parts.hour),
65
- Number(parts.minute),
66
- Number(parts.second),
67
- );
68
- return asUTC - date.getTime();
69
- }
70
-
71
- /** Convert a desired wall-clock time in `tz` to an epoch ms. DST-corrected once. */
72
- function zonedWallToEpoch(y: number, mo: number, d: number, h: number, mi: number, tz: string): number {
73
- const guessUTC = Date.UTC(y, mo, d, h, mi);
74
- const offset = tzOffsetMs(new Date(guessUTC), tz);
75
- let epoch = guessUTC - offset;
76
- // re-check once: the offset can differ across a DST boundary
77
- const offset2 = tzOffsetMs(new Date(epoch), tz);
78
- if (offset2 !== offset) epoch = guessUTC - offset2;
79
- return epoch;
80
- }
81
-
82
- /** Wall-clock parts of `nowMs` in `tz`. */
83
- function zonedParts(nowMs: number, tz: string) {
84
- const dtf = new Intl.DateTimeFormat("en-US", {
85
- timeZone: tz,
86
- hourCycle: "h23",
87
- weekday: "long",
88
- year: "numeric",
89
- month: "2-digit",
90
- day: "2-digit",
91
- hour: "2-digit",
92
- minute: "2-digit",
93
- });
94
- const p = Object.fromEntries(dtf.formatToParts(nowMs).map((x) => [x.type, x.value]));
95
- return {
96
- year: Number(p.year),
97
- month: Number(p.month) - 1,
98
- day: Number(p.day),
99
- hour: Number(p.hour),
100
- minute: Number(p.minute),
101
- weekday: String(p.weekday).toLowerCase(),
102
- };
103
- }
104
-
105
- function parseHHMM(reset_at: string | undefined): { h: number; m: number } {
106
- const m = /^(\d{1,2}):(\d{2})$/.exec(reset_at ?? "");
107
- if (!m) return { h: 0, m: 0 };
108
- return { h: Math.min(23, Number(m[1])), m: Math.min(59, Number(m[2])) };
109
- }
110
-
111
- /**
112
- * Next reset instant (epoch ms) strictly after `now` for a quota schedule.
113
- * - 5h: rolling — windowStart + 5h.
114
- * - daily: next `reset_at` (HH:MM, default 00:00) wall-clock in tz.
115
- * - weekly: next `reset_at` weekday (default monday) at 00:00 in tz.
116
- * - monthly: next 1st of month at 00:00 in tz.
117
- */
118
- export type WindowSpec = Pick<Quota, "window" | "reset_at" | "timezone">;
119
-
120
- export function nextResetAt(quota: WindowSpec, windowStart: number, now: number): number {
121
- const tz = quota.timezone || "UTC";
122
- if (quota.window === "5h") return windowStart + 5 * HOUR_MS;
123
-
124
- const p = zonedParts(now, tz);
125
-
126
- if (quota.window === "daily") {
127
- const { h, m } = parseHHMM(quota.reset_at);
128
- let candidate = zonedWallToEpoch(p.year, p.month, p.day, h, m, tz);
129
- if (candidate <= now) candidate = zonedWallToEpoch(p.year, p.month, p.day + 1, h, m, tz);
130
- return candidate;
131
- }
132
-
133
- if (quota.window === "weekly") {
134
- const target = WEEKDAYS.indexOf((quota.reset_at ?? "monday").toLowerCase());
135
- const targetIdx = target === -1 ? 1 : target;
136
- const curIdx = WEEKDAYS.indexOf(p.weekday);
137
- let daysAhead = (targetIdx - curIdx + 7) % 7;
138
- let candidate = zonedWallToEpoch(p.year, p.month, p.day + daysAhead, 0, 0, tz);
139
- if (candidate <= now) candidate = zonedWallToEpoch(p.year, p.month, p.day + daysAhead + 7, 0, 0, tz);
140
- return candidate;
141
- }
142
-
143
- // monthly: first of next month at 00:00
144
- return zonedWallToEpoch(p.year, p.month + 1, 1, 0, 0, tz);
145
- }
146
-
147
- /**
148
- * Epoch ms of the START of the window containing `now`.
149
- * - 5h: fixed 5-hour grid floor (stateless; no per-provider anchor).
150
- * - daily: today's reset_at in tz, or yesterday's if that's still ahead.
151
- * - weekly: the most recent occurrence of the target weekday at 00:00 in tz.
152
- * - monthly: the 1st of the current month at 00:00 in tz.
153
- */
154
- export function currentWindowStart(spec: WindowSpec, now: number): number {
155
- const tz = spec.timezone || "UTC";
156
- if (spec.window === "5h") return Math.floor(now / (5 * HOUR_MS)) * (5 * HOUR_MS);
157
-
158
- const p = zonedParts(now, tz);
159
-
160
- if (spec.window === "daily") {
161
- const { h, m } = parseHHMM(spec.reset_at);
162
- let start = zonedWallToEpoch(p.year, p.month, p.day, h, m, tz);
163
- if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - 1, h, m, tz);
164
- return start;
165
- }
166
-
167
- if (spec.window === "weekly") {
168
- const target = WEEKDAYS.indexOf((spec.reset_at ?? "monday").toLowerCase());
169
- const targetIdx = target === -1 ? 1 : target;
170
- const curIdx = WEEKDAYS.indexOf(p.weekday);
171
- const daysBehind = (curIdx - targetIdx + 7) % 7;
172
- let start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind, 0, 0, tz);
173
- if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind - 7, 0, 0, tz);
174
- return start;
175
- }
176
-
177
- // monthly
178
- return zonedWallToEpoch(p.year, p.month, 1, 0, 0, tz);
179
- }
180
-
181
- export class QuotaTracker {
182
- private readonly states = new Map<string, QuotaState>();
183
-
184
- constructor(
185
- private readonly now: () => number = Date.now,
186
- private readonly store?: QuotaStore,
187
- ) {
188
- if (store) {
189
- for (const row of store.load()) {
190
- this.states.set(row.provider_id, { windowStart: row.window_start, consumed: row.consumed });
191
- }
192
- }
193
- }
194
-
195
- /**
196
- * Return the live state for a provider, rolling the window over (resetting
197
- * consumed to 0) if `now` has crossed the scheduled reset boundary.
198
- */
199
- private current(provider: Provider): QuotaState | null {
200
- if (!provider.quota) return null;
201
- const t = this.now();
202
- const state = this.states.get(provider.id) ?? { windowStart: t, consumed: 0 };
203
- if (!this.states.has(provider.id)) this.states.set(provider.id, state);
204
- // boundary is the first reset AFTER this window opened — computed from
205
- // windowStart, not `now`. Computing it from `now` would always return the
206
- // NEXT future boundary and so never detect that we've crossed one.
207
- const reset = nextResetAt(provider.quota, state.windowStart, state.windowStart);
208
- if (t >= reset) {
209
- state.windowStart = t;
210
- state.consumed = 0;
211
- this.store?.save(provider.id, state.windowStart, state.consumed);
212
- }
213
- return state;
214
- }
215
-
216
- /** Add consumed tokens for a provider (no-op if it has no quota config). */
217
- consume(provider: Provider, tokens: number): void {
218
- const state = this.current(provider);
219
- if (!state) return;
220
- state.consumed += Math.max(0, tokens);
221
- this.store?.save(provider.id, state.windowStart, state.consumed);
222
- }
223
-
224
- /** True when a token limit is set AND it's been reached in the current window. */
225
- isExhausted(provider: Provider): boolean {
226
- const state = this.current(provider);
227
- if (!state || !provider.quota?.limit_tokens) return false;
228
- return state.consumed >= provider.quota.limit_tokens;
229
- }
230
-
231
- /** Dashboard view: window, consumed, countdown, and progress for each provider. */
232
- snapshot(providers: Provider[]): QuotaSnapshot[] {
233
- const t = this.now();
234
- return providers.flatMap((provider) => {
235
- if (!provider.quota) return [];
236
- const state = this.current(provider)!;
237
- const reset = nextResetAt(provider.quota, state.windowStart, t);
238
- const limit = provider.quota.limit_tokens;
239
- return [
240
- {
241
- provider: provider.id,
242
- window: provider.quota.window,
243
- consumed: state.consumed,
244
- limit_tokens: limit,
245
- reset_in_ms: Math.max(0, reset - t),
246
- pct: limit ? Math.min(1, state.consumed / limit) : undefined,
247
- exhausted: limit ? state.consumed >= limit : false,
248
- alert: limit ? state.consumed / limit >= (provider.quota.alert_at ?? 0.8) : false,
249
- },
250
- ];
251
- });
252
- }
253
- }