aigetwey 1.2.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +41 -1
- package/README.md +30 -7
- package/assets/screenshot.png +0 -0
- package/config.example.yaml +0 -1
- package/dashboard/src/app/(console)/quota/page.tsx +2 -2
- package/dashboard/src/app/layout.tsx +3 -2
- package/dashboard/src/components/BudgetForm.tsx +15 -17
- package/dashboard/src/components/{QuotaView.tsx → BudgetTracker.tsx} +71 -56
- package/dashboard/src/components/CooldownTimer.tsx +1 -1
- package/dashboard/src/components/EndpointView.tsx +255 -47
- package/dashboard/src/components/LogTable.tsx +36 -26
- package/dashboard/src/components/ProviderManager.tsx +3 -28
- package/dashboard/src/components/Rail.tsx +1 -1
- package/dashboard/src/components/RoutingView.tsx +6 -2
- package/dashboard/src/components/TopBar.tsx +1 -1
- package/dashboard/src/components/ui.tsx +6 -1
- package/dashboard/src/lib/client.ts +6 -5
- package/dashboard/src/lib/gateway.ts +24 -16
- package/dist/adapters/gemini.js +1 -0
- package/dist/adapters/gemini.js.map +1 -1
- package/dist/adapters/openai.js +13 -1
- package/dist/adapters/openai.js.map +1 -1
- package/dist/config.js +86 -23
- package/dist/config.js.map +1 -1
- package/dist/core/budget.js +1 -1
- package/dist/core/budget.js.map +1 -1
- package/dist/core/fallback.js +0 -6
- package/dist/core/fallback.js.map +1 -1
- package/dist/core/handler.js +13 -7
- package/dist/core/handler.js.map +1 -1
- package/dist/core/keysUsage.js +15 -0
- package/dist/core/keysUsage.js.map +1 -0
- package/dist/core/ratelimit.js +15 -0
- package/dist/core/ratelimit.js.map +1 -0
- package/dist/core/state.js +5 -13
- package/dist/core/state.js.map +1 -1
- package/dist/core/window.js +35 -0
- package/dist/core/window.js.map +1 -0
- package/dist/db.js +34 -29
- package/dist/db.js.map +1 -1
- package/dist/routes/admin.js +55 -10
- package/dist/routes/admin.js.map +1 -1
- package/dist/routes/v1.js +14 -1
- package/dist/routes/v1.js.map +1 -1
- package/dist/server.js +1 -7
- package/dist/server.js.map +1 -1
- package/dist/stream/anthropic-stream.js +7 -0
- package/dist/stream/anthropic-stream.js.map +1 -1
- package/dist/stream/gemini-stream.js +2 -1
- package/dist/stream/gemini-stream.js.map +1 -1
- package/dist/stream/openai-stream.js +10 -0
- package/dist/stream/openai-stream.js.map +1 -1
- package/package.json +1 -1
- package/src/adapters/gemini.ts +2 -0
- package/src/adapters/openai.ts +18 -1
- package/src/config.ts +89 -23
- package/src/core/budget.ts +1 -1
- package/src/core/fallback.ts +0 -9
- package/src/core/handler.ts +16 -9
- package/src/core/keysUsage.ts +49 -0
- package/src/core/ratelimit.ts +25 -0
- package/src/core/state.ts +4 -14
- package/src/core/window.ts +45 -0
- package/src/db.ts +35 -31
- package/src/routes/admin.ts +61 -9
- package/src/routes/v1.ts +18 -1
- package/src/server.ts +1 -8
- package/src/stream/anthropic-stream.ts +10 -1
- package/src/stream/chunk.ts +2 -0
- package/src/stream/gemini-stream.ts +3 -2
- package/src/stream/openai-stream.ts +12 -1
- package/src/core/quota.ts +0 -253
package/src/routes/admin.ts
CHANGED
|
@@ -16,6 +16,7 @@ import type { FastifyInstance, FastifyRequest, FastifyReply } from "fastify";
|
|
|
16
16
|
import type { GatewayState } from "../core/state.js";
|
|
17
17
|
import type { UsageDB } from "../db.js";
|
|
18
18
|
import { checkAdminAuth, clientKeyFingerprint, type AdminVerifier } from "../middleware/auth.js";
|
|
19
|
+
import { buildKeyUsageRow } from "../core/keysUsage.js";
|
|
19
20
|
import {
|
|
20
21
|
maskKey,
|
|
21
22
|
serializeConfig,
|
|
@@ -44,6 +45,7 @@ import {
|
|
|
44
45
|
addServerKey,
|
|
45
46
|
editServerKey,
|
|
46
47
|
removeServerKey,
|
|
48
|
+
setServerKeyScope,
|
|
47
49
|
setBudget,
|
|
48
50
|
clearBudget,
|
|
49
51
|
type Config,
|
|
@@ -87,6 +89,23 @@ function maskedConfig(config: Config): Config {
|
|
|
87
89
|
Object.entries(clone.server.key_names).map(([k, name]) => [maskKey(k), name]),
|
|
88
90
|
);
|
|
89
91
|
}
|
|
92
|
+
// key_models / key_rpm are keyed by the RAW key — re-key to the masked form so
|
|
93
|
+
// real keys never leak through /admin/config.
|
|
94
|
+
if (clone.server.key_models) {
|
|
95
|
+
clone.server.key_models = Object.fromEntries(
|
|
96
|
+
Object.entries(clone.server.key_models).map(([k, v]) => [maskKey(k), v]),
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
if (clone.server.key_rpm) {
|
|
100
|
+
clone.server.key_rpm = Object.fromEntries(
|
|
101
|
+
Object.entries(clone.server.key_rpm).map(([k, v]) => [maskKey(k), v]),
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
if (clone.server.key_expires) {
|
|
105
|
+
clone.server.key_expires = Object.fromEntries(
|
|
106
|
+
Object.entries(clone.server.key_expires).map(([k, v]) => [maskKey(k), v]),
|
|
107
|
+
);
|
|
108
|
+
}
|
|
90
109
|
return clone;
|
|
91
110
|
}
|
|
92
111
|
|
|
@@ -143,12 +162,9 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
143
162
|
reply.send({ providers: deps.state.pool.snapshot(deps.state.config.listProviders()) });
|
|
144
163
|
});
|
|
145
164
|
|
|
146
|
-
//
|
|
147
|
-
app.get("/admin/
|
|
148
|
-
reply.send({
|
|
149
|
-
quota: deps.state.quota.snapshot(deps.state.config.listProviders()),
|
|
150
|
-
budgets: deps.state.budget.statuses(),
|
|
151
|
-
});
|
|
165
|
+
// budget statuses: consumed, limit, and ms until the next scheduled reset.
|
|
166
|
+
app.get("/admin/budgets", requireAdmin, (_req, reply) => {
|
|
167
|
+
reply.send({ budgets: deps.state.budget.statuses() });
|
|
152
168
|
});
|
|
153
169
|
|
|
154
170
|
// add or replace a budget (keyed by scope). Body = Budget; invalid shape or an
|
|
@@ -403,7 +419,7 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
403
419
|
});
|
|
404
420
|
|
|
405
421
|
// Test ONE model end-to-end (aigetwey's per-model science button). Routes through
|
|
406
|
-
// the real pipeline via handle(), so the ping lands in usage
|
|
422
|
+
// the real pipeline via handle(), so the ping lands in usage exactly like
|
|
407
423
|
// a normal call — and it catches "model not found / not entitled" a /models
|
|
408
424
|
// ping can't. Model id travels as ?model= to survive slashes through the proxy.
|
|
409
425
|
app.post("/admin/providers/:id/models/test", requireAdmin, async (req, reply) => {
|
|
@@ -414,7 +430,7 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
414
430
|
if (!provider) return reply.code(404).send({ error: `provider "${id}" not found` });
|
|
415
431
|
try {
|
|
416
432
|
await handle(
|
|
417
|
-
{ config: deps.state.config, pool: deps.state.pool, db: deps.db
|
|
433
|
+
{ config: deps.state.config, pool: deps.state.pool, db: deps.db },
|
|
418
434
|
"openai",
|
|
419
435
|
{ model: `${id}/${modelId}`, messages: [{ role: "user", content: "ping" }], max_tokens: 1, stream: false },
|
|
420
436
|
);
|
|
@@ -564,6 +580,15 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
564
580
|
applyMutation(reply, (c) => editServerKey(c, i, { name: b?.name }));
|
|
565
581
|
});
|
|
566
582
|
|
|
583
|
+
// set/clear ONE gateway key's scopes (model allowlist + rpm), by index.
|
|
584
|
+
app.put("/admin/endpoint/keys/:index/scope", requireAdmin, (req, reply) => {
|
|
585
|
+
const { index } = req.params as { index: string };
|
|
586
|
+
const i = Number(index);
|
|
587
|
+
if (!Number.isInteger(i)) return reply.code(400).send({ error: "index must be an integer" });
|
|
588
|
+
const b = (req.body ?? {}) as { models?: string[]; rpm?: number | null; expires?: number | null };
|
|
589
|
+
applyMutation(reply, (c) => setServerKeyScope(c, i, { models: b.models, rpm: b.rpm, expires: b.expires }));
|
|
590
|
+
});
|
|
591
|
+
|
|
567
592
|
app.delete("/admin/endpoint/keys/:index", requireAdmin, (req, reply) => {
|
|
568
593
|
const { index } = req.params as { index: string };
|
|
569
594
|
const i = Number(index);
|
|
@@ -584,6 +609,26 @@ export function registerAdminRoutes(app: FastifyInstance, deps: AdminDeps): void
|
|
|
584
609
|
);
|
|
585
610
|
});
|
|
586
611
|
|
|
612
|
+
// per-key spend for the Budgets page "Keys" section: every gateway key, its
|
|
613
|
+
// all-time usage, expiry, and key-scoped budget status (null when uncapped).
|
|
614
|
+
app.get("/admin/keys/usage", requireAdmin, (_req, reply) => {
|
|
615
|
+
if (!deps.db) return reply.code(503).send({ error: "usage tracking disabled" });
|
|
616
|
+
const cfg = deps.state.config.raw;
|
|
617
|
+
const statuses = deps.state.budget.statuses();
|
|
618
|
+
const keys = cfg.server.api_keys.map((k) => {
|
|
619
|
+
const fp = clientKeyFingerprint(k);
|
|
620
|
+
return buildKeyUsageRow({
|
|
621
|
+
fingerprint: fp,
|
|
622
|
+
name: cfg.server.key_names?.[k] ?? maskKey(k),
|
|
623
|
+
masked: maskKey(k),
|
|
624
|
+
expires: cfg.server.key_expires?.[k],
|
|
625
|
+
totals: deps.db!.totals(0, { client_key: fp }),
|
|
626
|
+
budget: statuses.find((s) => s.scope.type === "key" && s.scope.id === fp) ?? null,
|
|
627
|
+
});
|
|
628
|
+
});
|
|
629
|
+
reply.send({ keys });
|
|
630
|
+
});
|
|
631
|
+
|
|
587
632
|
// reveal ONE raw gateway key (the "show key" button on the Endpoint page).
|
|
588
633
|
app.get("/admin/endpoint/keys/:index/reveal", requireAdmin, (req, reply) => {
|
|
589
634
|
const { index } = req.params as { index: string };
|
|
@@ -749,6 +794,13 @@ function endpointPayload(config: Config) {
|
|
|
749
794
|
caveman: config.endpoint.caveman,
|
|
750
795
|
ponytail: config.endpoint.ponytail,
|
|
751
796
|
headroom: config.endpoint.headroom,
|
|
752
|
-
keys: config.server.api_keys.map((k) => ({
|
|
797
|
+
keys: config.server.api_keys.map((k) => ({
|
|
798
|
+
key: maskKey(k),
|
|
799
|
+
fingerprint: clientKeyFingerprint(k),
|
|
800
|
+
name: config.server.key_names?.[k],
|
|
801
|
+
models: config.server.key_models?.[k],
|
|
802
|
+
rpm: config.server.key_rpm?.[k],
|
|
803
|
+
expires: config.server.key_expires?.[k],
|
|
804
|
+
})),
|
|
753
805
|
};
|
|
754
806
|
}
|
package/src/routes/v1.ts
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import type { FastifyInstance, FastifyReply, FastifyRequest } from "fastify";
|
|
2
2
|
import { checkAuth, extractKey, clientKeyFingerprint } from "../middleware/auth.js";
|
|
3
|
+
import { isKeyExpired } from "../config.js";
|
|
3
4
|
import type { GatewayState } from "../core/state.js";
|
|
4
5
|
import { handle, GatewayError, type HandleDeps } from "../core/handler.js";
|
|
5
6
|
import type { WireFormat } from "../core/canonical.js";
|
|
6
7
|
import type { UsageDB } from "../db.js";
|
|
8
|
+
import { RateLimiter } from "../core/ratelimit.js";
|
|
7
9
|
|
|
8
10
|
/**
|
|
9
11
|
* /v1 proxy surface. Auth-gates on the gateway's own keys (read from state each
|
|
@@ -11,6 +13,8 @@ import type { UsageDB } from "../db.js";
|
|
|
11
13
|
* pipeline (non-stream JSON or SSE stream).
|
|
12
14
|
*/
|
|
13
15
|
export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?: UsageDB): void {
|
|
16
|
+
const limiter = new RateLimiter();
|
|
17
|
+
|
|
14
18
|
const requireAuth = {
|
|
15
19
|
preHandler: (req: FastifyRequest, reply: FastifyReply, done: (err?: Error) => void) => {
|
|
16
20
|
const res = checkAuth(req, state.config.server.api_keys);
|
|
@@ -18,6 +22,19 @@ export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?:
|
|
|
18
22
|
reply.code(res.status ?? 401).send({ error: res.error });
|
|
19
23
|
return; // skip done() to short-circuit the route
|
|
20
24
|
}
|
|
25
|
+
|
|
26
|
+
const presented = extractKey(req);
|
|
27
|
+
if (presented && isKeyExpired(state.config.server, presented, Date.now())) {
|
|
28
|
+
reply.code(403).send({ error: "key expired" });
|
|
29
|
+
return; // short-circuit
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const rpm = presented ? state.config.server.key_rpm?.[presented] : undefined;
|
|
33
|
+
if (presented && rpm && limiter.over(clientKeyFingerprint(presented), rpm)) {
|
|
34
|
+
reply.code(429).send({ error: "rate limit exceeded" });
|
|
35
|
+
return; // short-circuit
|
|
36
|
+
}
|
|
37
|
+
|
|
21
38
|
done();
|
|
22
39
|
},
|
|
23
40
|
};
|
|
@@ -28,9 +45,9 @@ export function registerV1Routes(app: FastifyInstance, state: GatewayState, db?:
|
|
|
28
45
|
return {
|
|
29
46
|
config: state.config,
|
|
30
47
|
pool: state.pool,
|
|
31
|
-
quota: state.quota,
|
|
32
48
|
budget: state.budget,
|
|
33
49
|
db,
|
|
50
|
+
clientKeyModels: presented ? state.config.server.key_models?.[presented] : undefined,
|
|
34
51
|
clientKeyFp: presented ? clientKeyFingerprint(presented) : undefined,
|
|
35
52
|
log: (msg) => app.log.info(msg),
|
|
36
53
|
};
|
package/src/server.ts
CHANGED
|
@@ -4,7 +4,6 @@ import { loadConfig } from "./config.js";
|
|
|
4
4
|
import { registerRoutes } from "./routes/index.js";
|
|
5
5
|
import { GatewayState } from "./core/state.js";
|
|
6
6
|
import { UsageDB } from "./db.js";
|
|
7
|
-
import { QuotaTracker } from "./core/quota.js";
|
|
8
7
|
import { AuthStore } from "./core/authStore.js";
|
|
9
8
|
import { consoleBuffer } from "./core/console-buffer.js";
|
|
10
9
|
|
|
@@ -52,14 +51,8 @@ async function main(): Promise<void> {
|
|
|
52
51
|
const dataDir = resolve(process.env.AIGETWEY_DATA_DIR ?? "data");
|
|
53
52
|
const db = new UsageDB(join(dataDir, "usage.sqlite"));
|
|
54
53
|
|
|
55
|
-
// quota counts persist via the DB so a restart within a window keeps the budget.
|
|
56
|
-
const quota = new QuotaTracker(Date.now, {
|
|
57
|
-
load: () => db.loadQuota(),
|
|
58
|
-
save: (id, start, consumed) => db.saveQuota(id, start, consumed),
|
|
59
|
-
});
|
|
60
|
-
|
|
61
54
|
// holder enables runtime config edits (hot-reload) from the dashboard.
|
|
62
|
-
const state = new GatewayState(configPath, config,
|
|
55
|
+
const state = new GatewayState(configPath, config, db);
|
|
63
56
|
// admin password lives in a hash store (seeded from the env on first run,
|
|
64
57
|
// changeable at runtime from the dashboard).
|
|
65
58
|
const auth = AuthStore.open(dataDir, process.env.AIGETWEY_ADMIN_PASSWORD);
|
|
@@ -25,8 +25,10 @@ interface AnthStreamState {
|
|
|
25
25
|
toolIndexByBlock: Map<number, number>;
|
|
26
26
|
nextToolIndex: number;
|
|
27
27
|
promptTokens: number;
|
|
28
|
+
completionTokens: number;
|
|
28
29
|
cachedTokens?: number;
|
|
29
30
|
cacheCreationTokens?: number;
|
|
31
|
+
reasoningTokens?: number;
|
|
30
32
|
}
|
|
31
33
|
|
|
32
34
|
export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): AsyncGenerator<CanonicalChunk> {
|
|
@@ -36,6 +38,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
36
38
|
toolIndexByBlock: new Map(),
|
|
37
39
|
nextToolIndex: 0,
|
|
38
40
|
promptTokens: 0,
|
|
41
|
+
completionTokens: 0,
|
|
39
42
|
};
|
|
40
43
|
|
|
41
44
|
for await (const ev of events) {
|
|
@@ -60,14 +63,18 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
60
63
|
const u = message?.usage;
|
|
61
64
|
if (u) {
|
|
62
65
|
state.promptTokens = u.input_tokens ?? 0;
|
|
66
|
+
state.completionTokens = u.output_tokens ?? 0;
|
|
67
|
+
state.reasoningTokens = u.thinking_tokens ?? 0;
|
|
63
68
|
state.cachedTokens = u.cache_read_input_tokens;
|
|
64
69
|
state.cacheCreationTokens = u.cache_creation_input_tokens;
|
|
65
70
|
}
|
|
66
71
|
const startChunk = baseChunk(state, { role: "assistant", content: "" }, null);
|
|
67
72
|
startChunk.usage = {
|
|
68
73
|
prompt_tokens: state.promptTokens,
|
|
74
|
+
completion_tokens: state.completionTokens,
|
|
69
75
|
cached_tokens: state.cachedTokens,
|
|
70
76
|
cache_creation_tokens: state.cacheCreationTokens,
|
|
77
|
+
reasoning_tokens: state.reasoningTokens,
|
|
71
78
|
};
|
|
72
79
|
yield startChunk;
|
|
73
80
|
break;
|
|
@@ -99,6 +106,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
99
106
|
if (delta?.type === "text_delta") {
|
|
100
107
|
yield baseChunk(state, { content: delta.text ?? "" }, null);
|
|
101
108
|
} else if (delta?.type === "thinking_delta") {
|
|
109
|
+
state.reasoningTokens = (state.reasoningTokens ?? 0) + 1;
|
|
102
110
|
yield baseChunk(state, { reasoning: delta.thinking ?? "" }, null);
|
|
103
111
|
} else if (delta?.type === "input_json_delta") {
|
|
104
112
|
const toolIndex = state.toolIndexByBlock.get(index);
|
|
@@ -115,7 +123,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
115
123
|
|
|
116
124
|
case "message_delta": {
|
|
117
125
|
const delta = msg.delta as { stop_reason?: string | null } | undefined;
|
|
118
|
-
const usage = msg.usage as { output_tokens?: number } | undefined;
|
|
126
|
+
const usage = msg.usage as { output_tokens?: number; thinking_tokens?: number } | undefined;
|
|
119
127
|
const finish = mapStopReason(delta?.stop_reason);
|
|
120
128
|
const chunk = baseChunk(state, {}, finish);
|
|
121
129
|
chunk.usage = {
|
|
@@ -123,6 +131,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
123
131
|
completion_tokens: usage?.output_tokens ?? 0,
|
|
124
132
|
cached_tokens: state.cachedTokens,
|
|
125
133
|
cache_creation_tokens: state.cacheCreationTokens,
|
|
134
|
+
reasoning_tokens: usage?.thinking_tokens ?? state.reasoningTokens ?? 0,
|
|
126
135
|
};
|
|
127
136
|
yield chunk;
|
|
128
137
|
break;
|
package/src/stream/chunk.ts
CHANGED
|
@@ -29,6 +29,8 @@ export interface CanonicalChunkUsage {
|
|
|
29
29
|
cached_tokens?: number;
|
|
30
30
|
cache_creation_tokens?: number;
|
|
31
31
|
reasoning_tokens?: number;
|
|
32
|
+
prompt_tokens_details?: { cached_tokens?: number };
|
|
33
|
+
completion_tokens_details?: { reasoning_tokens?: number };
|
|
32
34
|
}
|
|
33
35
|
|
|
34
36
|
export type ChunkFinishReason = "stop" | "length" | "tool_calls" | "content_filter" | null;
|
|
@@ -76,7 +76,7 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
76
76
|
}
|
|
77
77
|
|
|
78
78
|
const usageMetadata = msg.usageMetadata as
|
|
79
|
-
| { promptTokenCount?: number; candidatesTokenCount?: number; cachedContentTokenCount?: number }
|
|
79
|
+
| { promptTokenCount?: number; candidatesTokenCount?: number; cachedContentTokenCount?: number; thoughtsTokenCount?: number }
|
|
80
80
|
| undefined;
|
|
81
81
|
if (cand?.finishReason || usageMetadata) {
|
|
82
82
|
const chunk = base({}, mapFinish(cand?.finishReason));
|
|
@@ -84,7 +84,8 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
84
84
|
chunk.usage = {
|
|
85
85
|
prompt_tokens: usageMetadata.promptTokenCount ?? 0,
|
|
86
86
|
completion_tokens: usageMetadata.candidatesTokenCount ?? 0,
|
|
87
|
-
cached_tokens: usageMetadata.cachedContentTokenCount,
|
|
87
|
+
cached_tokens: usageMetadata.cachedContentTokenCount ?? 0,
|
|
88
|
+
reasoning_tokens: usageMetadata.thoughtsTokenCount ?? 0,
|
|
88
89
|
};
|
|
89
90
|
}
|
|
90
91
|
yield chunk;
|
|
@@ -24,12 +24,23 @@ export async function* streamToCanonical(events: AsyncIterable<SSEEvent>): Async
|
|
|
24
24
|
/** Lift vendor reasoning fields into the canonical `delta.reasoning`. */
|
|
25
25
|
function normalize(chunk: CanonicalChunk): CanonicalChunk {
|
|
26
26
|
for (const choice of chunk.choices ?? []) {
|
|
27
|
-
const d = choice.delta as Record<string, unknown> & { reasoning?: string };
|
|
27
|
+
const d = choice.delta as (Record<string, unknown> & { reasoning?: string }) | undefined;
|
|
28
|
+
if (!d) continue;
|
|
28
29
|
if (d.reasoning === undefined) {
|
|
29
30
|
const vendor = (d["reasoning_content"] as string | undefined) ?? (d["reasoning"] as string | undefined);
|
|
30
31
|
if (vendor) d.reasoning = vendor;
|
|
31
32
|
}
|
|
32
33
|
}
|
|
34
|
+
|
|
35
|
+
// Extract reasoning_tokens from OpenAI response.usage.completion_tokens_details.reasoning_tokens
|
|
36
|
+
if (chunk.usage?.completion_tokens_details?.reasoning_tokens !== undefined) {
|
|
37
|
+
chunk.usage.reasoning_tokens = chunk.usage.completion_tokens_details.reasoning_tokens;
|
|
38
|
+
}
|
|
39
|
+
// Extract cached_tokens from OpenAI response.usage.prompt_tokens_details.cached_tokens
|
|
40
|
+
if (chunk.usage?.prompt_tokens_details?.cached_tokens !== undefined) {
|
|
41
|
+
chunk.usage.cached_tokens = chunk.usage.prompt_tokens_details.cached_tokens;
|
|
42
|
+
}
|
|
43
|
+
|
|
33
44
|
return chunk;
|
|
34
45
|
}
|
|
35
46
|
|
package/src/core/quota.ts
DELETED
|
@@ -1,253 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Per-provider token quota tracking with scheduled window resets.
|
|
3
|
-
*
|
|
4
|
-
* Distinct from the key-pool cooldown: a cooldown is a transient penalty after a
|
|
5
|
-
* 429; a quota is a budget that refills on a schedule (a 5-hour rolling window, a
|
|
6
|
-
* daily/weekly/monthly calendar boundary). When a provider's `limit_tokens` is
|
|
7
|
-
* reached before its window resets, routing skips it — like a key that's cooling
|
|
8
|
-
* down, but for the whole provider.
|
|
9
|
-
*
|
|
10
|
-
* State is in-memory, optionally persisted so counts survive a restart within
|
|
11
|
-
* the same window. Calendar boundaries are computed in the provider's timezone.
|
|
12
|
-
*/
|
|
13
|
-
import type { Provider, Quota } from "../config.js";
|
|
14
|
-
|
|
15
|
-
const HOUR_MS = 3600_000;
|
|
16
|
-
const DAY_MS = 24 * HOUR_MS;
|
|
17
|
-
|
|
18
|
-
const WEEKDAYS = ["sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday"];
|
|
19
|
-
|
|
20
|
-
/** Optional persistence hook so counts survive a restart within a window. */
|
|
21
|
-
export interface QuotaStore {
|
|
22
|
-
load(): Array<{ provider_id: string; window_start: number; consumed: number }>;
|
|
23
|
-
save(providerId: string, windowStart: number, consumed: number): void;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
interface QuotaState {
|
|
27
|
-
windowStart: number;
|
|
28
|
-
consumed: number;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
export interface QuotaSnapshot {
|
|
32
|
-
provider: string;
|
|
33
|
-
window: Quota["window"];
|
|
34
|
-
consumed: number;
|
|
35
|
-
limit_tokens?: number;
|
|
36
|
-
/** ms until the next scheduled reset */
|
|
37
|
-
reset_in_ms: number;
|
|
38
|
-
/** 0..1 fraction of the limit used, if a limit is set */
|
|
39
|
-
pct?: number;
|
|
40
|
-
exhausted: boolean;
|
|
41
|
-
/** true when a limit is set and pct >= the quota's alert_at (default 0.8) */
|
|
42
|
-
alert: boolean;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// ---- timezone-aware calendar math -----------------------------------------
|
|
46
|
-
|
|
47
|
-
/** Wall-clock offset (ms) of `tz` at instant `date`: tzWallAsUTC - actualUTC. */
|
|
48
|
-
function tzOffsetMs(date: Date, tz: string): number {
|
|
49
|
-
const dtf = new Intl.DateTimeFormat("en-US", {
|
|
50
|
-
timeZone: tz,
|
|
51
|
-
hourCycle: "h23",
|
|
52
|
-
year: "numeric",
|
|
53
|
-
month: "2-digit",
|
|
54
|
-
day: "2-digit",
|
|
55
|
-
hour: "2-digit",
|
|
56
|
-
minute: "2-digit",
|
|
57
|
-
second: "2-digit",
|
|
58
|
-
});
|
|
59
|
-
const parts = Object.fromEntries(dtf.formatToParts(date).map((p) => [p.type, p.value]));
|
|
60
|
-
const asUTC = Date.UTC(
|
|
61
|
-
Number(parts.year),
|
|
62
|
-
Number(parts.month) - 1,
|
|
63
|
-
Number(parts.day),
|
|
64
|
-
Number(parts.hour),
|
|
65
|
-
Number(parts.minute),
|
|
66
|
-
Number(parts.second),
|
|
67
|
-
);
|
|
68
|
-
return asUTC - date.getTime();
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
/** Convert a desired wall-clock time in `tz` to an epoch ms. DST-corrected once. */
|
|
72
|
-
function zonedWallToEpoch(y: number, mo: number, d: number, h: number, mi: number, tz: string): number {
|
|
73
|
-
const guessUTC = Date.UTC(y, mo, d, h, mi);
|
|
74
|
-
const offset = tzOffsetMs(new Date(guessUTC), tz);
|
|
75
|
-
let epoch = guessUTC - offset;
|
|
76
|
-
// re-check once: the offset can differ across a DST boundary
|
|
77
|
-
const offset2 = tzOffsetMs(new Date(epoch), tz);
|
|
78
|
-
if (offset2 !== offset) epoch = guessUTC - offset2;
|
|
79
|
-
return epoch;
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
/** Wall-clock parts of `nowMs` in `tz`. */
|
|
83
|
-
function zonedParts(nowMs: number, tz: string) {
|
|
84
|
-
const dtf = new Intl.DateTimeFormat("en-US", {
|
|
85
|
-
timeZone: tz,
|
|
86
|
-
hourCycle: "h23",
|
|
87
|
-
weekday: "long",
|
|
88
|
-
year: "numeric",
|
|
89
|
-
month: "2-digit",
|
|
90
|
-
day: "2-digit",
|
|
91
|
-
hour: "2-digit",
|
|
92
|
-
minute: "2-digit",
|
|
93
|
-
});
|
|
94
|
-
const p = Object.fromEntries(dtf.formatToParts(nowMs).map((x) => [x.type, x.value]));
|
|
95
|
-
return {
|
|
96
|
-
year: Number(p.year),
|
|
97
|
-
month: Number(p.month) - 1,
|
|
98
|
-
day: Number(p.day),
|
|
99
|
-
hour: Number(p.hour),
|
|
100
|
-
minute: Number(p.minute),
|
|
101
|
-
weekday: String(p.weekday).toLowerCase(),
|
|
102
|
-
};
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
function parseHHMM(reset_at: string | undefined): { h: number; m: number } {
|
|
106
|
-
const m = /^(\d{1,2}):(\d{2})$/.exec(reset_at ?? "");
|
|
107
|
-
if (!m) return { h: 0, m: 0 };
|
|
108
|
-
return { h: Math.min(23, Number(m[1])), m: Math.min(59, Number(m[2])) };
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
/**
|
|
112
|
-
* Next reset instant (epoch ms) strictly after `now` for a quota schedule.
|
|
113
|
-
* - 5h: rolling — windowStart + 5h.
|
|
114
|
-
* - daily: next `reset_at` (HH:MM, default 00:00) wall-clock in tz.
|
|
115
|
-
* - weekly: next `reset_at` weekday (default monday) at 00:00 in tz.
|
|
116
|
-
* - monthly: next 1st of month at 00:00 in tz.
|
|
117
|
-
*/
|
|
118
|
-
export type WindowSpec = Pick<Quota, "window" | "reset_at" | "timezone">;
|
|
119
|
-
|
|
120
|
-
export function nextResetAt(quota: WindowSpec, windowStart: number, now: number): number {
|
|
121
|
-
const tz = quota.timezone || "UTC";
|
|
122
|
-
if (quota.window === "5h") return windowStart + 5 * HOUR_MS;
|
|
123
|
-
|
|
124
|
-
const p = zonedParts(now, tz);
|
|
125
|
-
|
|
126
|
-
if (quota.window === "daily") {
|
|
127
|
-
const { h, m } = parseHHMM(quota.reset_at);
|
|
128
|
-
let candidate = zonedWallToEpoch(p.year, p.month, p.day, h, m, tz);
|
|
129
|
-
if (candidate <= now) candidate = zonedWallToEpoch(p.year, p.month, p.day + 1, h, m, tz);
|
|
130
|
-
return candidate;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
if (quota.window === "weekly") {
|
|
134
|
-
const target = WEEKDAYS.indexOf((quota.reset_at ?? "monday").toLowerCase());
|
|
135
|
-
const targetIdx = target === -1 ? 1 : target;
|
|
136
|
-
const curIdx = WEEKDAYS.indexOf(p.weekday);
|
|
137
|
-
let daysAhead = (targetIdx - curIdx + 7) % 7;
|
|
138
|
-
let candidate = zonedWallToEpoch(p.year, p.month, p.day + daysAhead, 0, 0, tz);
|
|
139
|
-
if (candidate <= now) candidate = zonedWallToEpoch(p.year, p.month, p.day + daysAhead + 7, 0, 0, tz);
|
|
140
|
-
return candidate;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
// monthly: first of next month at 00:00
|
|
144
|
-
return zonedWallToEpoch(p.year, p.month + 1, 1, 0, 0, tz);
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
/**
|
|
148
|
-
* Epoch ms of the START of the window containing `now`.
|
|
149
|
-
* - 5h: fixed 5-hour grid floor (stateless; no per-provider anchor).
|
|
150
|
-
* - daily: today's reset_at in tz, or yesterday's if that's still ahead.
|
|
151
|
-
* - weekly: the most recent occurrence of the target weekday at 00:00 in tz.
|
|
152
|
-
* - monthly: the 1st of the current month at 00:00 in tz.
|
|
153
|
-
*/
|
|
154
|
-
export function currentWindowStart(spec: WindowSpec, now: number): number {
|
|
155
|
-
const tz = spec.timezone || "UTC";
|
|
156
|
-
if (spec.window === "5h") return Math.floor(now / (5 * HOUR_MS)) * (5 * HOUR_MS);
|
|
157
|
-
|
|
158
|
-
const p = zonedParts(now, tz);
|
|
159
|
-
|
|
160
|
-
if (spec.window === "daily") {
|
|
161
|
-
const { h, m } = parseHHMM(spec.reset_at);
|
|
162
|
-
let start = zonedWallToEpoch(p.year, p.month, p.day, h, m, tz);
|
|
163
|
-
if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - 1, h, m, tz);
|
|
164
|
-
return start;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
if (spec.window === "weekly") {
|
|
168
|
-
const target = WEEKDAYS.indexOf((spec.reset_at ?? "monday").toLowerCase());
|
|
169
|
-
const targetIdx = target === -1 ? 1 : target;
|
|
170
|
-
const curIdx = WEEKDAYS.indexOf(p.weekday);
|
|
171
|
-
const daysBehind = (curIdx - targetIdx + 7) % 7;
|
|
172
|
-
let start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind, 0, 0, tz);
|
|
173
|
-
if (start > now) start = zonedWallToEpoch(p.year, p.month, p.day - daysBehind - 7, 0, 0, tz);
|
|
174
|
-
return start;
|
|
175
|
-
}
|
|
176
|
-
|
|
177
|
-
// monthly
|
|
178
|
-
return zonedWallToEpoch(p.year, p.month, 1, 0, 0, tz);
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
export class QuotaTracker {
|
|
182
|
-
private readonly states = new Map<string, QuotaState>();
|
|
183
|
-
|
|
184
|
-
constructor(
|
|
185
|
-
private readonly now: () => number = Date.now,
|
|
186
|
-
private readonly store?: QuotaStore,
|
|
187
|
-
) {
|
|
188
|
-
if (store) {
|
|
189
|
-
for (const row of store.load()) {
|
|
190
|
-
this.states.set(row.provider_id, { windowStart: row.window_start, consumed: row.consumed });
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
/**
|
|
196
|
-
* Return the live state for a provider, rolling the window over (resetting
|
|
197
|
-
* consumed to 0) if `now` has crossed the scheduled reset boundary.
|
|
198
|
-
*/
|
|
199
|
-
private current(provider: Provider): QuotaState | null {
|
|
200
|
-
if (!provider.quota) return null;
|
|
201
|
-
const t = this.now();
|
|
202
|
-
const state = this.states.get(provider.id) ?? { windowStart: t, consumed: 0 };
|
|
203
|
-
if (!this.states.has(provider.id)) this.states.set(provider.id, state);
|
|
204
|
-
// boundary is the first reset AFTER this window opened — computed from
|
|
205
|
-
// windowStart, not `now`. Computing it from `now` would always return the
|
|
206
|
-
// NEXT future boundary and so never detect that we've crossed one.
|
|
207
|
-
const reset = nextResetAt(provider.quota, state.windowStart, state.windowStart);
|
|
208
|
-
if (t >= reset) {
|
|
209
|
-
state.windowStart = t;
|
|
210
|
-
state.consumed = 0;
|
|
211
|
-
this.store?.save(provider.id, state.windowStart, state.consumed);
|
|
212
|
-
}
|
|
213
|
-
return state;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
/** Add consumed tokens for a provider (no-op if it has no quota config). */
|
|
217
|
-
consume(provider: Provider, tokens: number): void {
|
|
218
|
-
const state = this.current(provider);
|
|
219
|
-
if (!state) return;
|
|
220
|
-
state.consumed += Math.max(0, tokens);
|
|
221
|
-
this.store?.save(provider.id, state.windowStart, state.consumed);
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
/** True when a token limit is set AND it's been reached in the current window. */
|
|
225
|
-
isExhausted(provider: Provider): boolean {
|
|
226
|
-
const state = this.current(provider);
|
|
227
|
-
if (!state || !provider.quota?.limit_tokens) return false;
|
|
228
|
-
return state.consumed >= provider.quota.limit_tokens;
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
/** Dashboard view: window, consumed, countdown, and progress for each provider. */
|
|
232
|
-
snapshot(providers: Provider[]): QuotaSnapshot[] {
|
|
233
|
-
const t = this.now();
|
|
234
|
-
return providers.flatMap((provider) => {
|
|
235
|
-
if (!provider.quota) return [];
|
|
236
|
-
const state = this.current(provider)!;
|
|
237
|
-
const reset = nextResetAt(provider.quota, state.windowStart, t);
|
|
238
|
-
const limit = provider.quota.limit_tokens;
|
|
239
|
-
return [
|
|
240
|
-
{
|
|
241
|
-
provider: provider.id,
|
|
242
|
-
window: provider.quota.window,
|
|
243
|
-
consumed: state.consumed,
|
|
244
|
-
limit_tokens: limit,
|
|
245
|
-
reset_in_ms: Math.max(0, reset - t),
|
|
246
|
-
pct: limit ? Math.min(1, state.consumed / limit) : undefined,
|
|
247
|
-
exhausted: limit ? state.consumed >= limit : false,
|
|
248
|
-
alert: limit ? state.consumed / limit >= (provider.quota.alert_at ?? 0.8) : false,
|
|
249
|
-
},
|
|
250
|
-
];
|
|
251
|
-
});
|
|
252
|
-
}
|
|
253
|
-
}
|