aigetwey 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -3
- package/README.md +4 -4
- package/config.example.yaml +6 -5
- package/dashboard/next.config.ts +6 -0
- package/dashboard/src/app/globals.css +47 -0
- package/dashboard/src/components/BudgetForm.tsx +258 -0
- package/dashboard/src/components/EndpointView.tsx +30 -0
- package/dashboard/src/components/LogTable.tsx +90 -25
- package/dashboard/src/components/ModelPicker.tsx +15 -7
- package/dashboard/src/components/ProviderDetail.tsx +27 -29
- package/dashboard/src/components/ProviderManager.tsx +36 -3
- package/dashboard/src/components/QuotaView.tsx +95 -81
- package/dashboard/src/components/Rail.tsx +1 -1
- package/dashboard/src/components/RoutingView.tsx +2 -2
- package/dashboard/src/components/ToolDetail.tsx +5 -3
- package/dashboard/src/components/TopBar.tsx +1 -1
- package/dashboard/src/components/UsageView.tsx +25 -6
- package/dashboard/src/lib/cliTools.ts +0 -43
- package/dashboard/src/lib/client.ts +9 -3
- package/dashboard/src/lib/gateway.ts +12 -1
- package/dashboard/src/{middleware.ts → proxy.ts} +8 -6
- package/dist/cli.js +43 -8
- package/dist/cli.js.map +1 -1
- package/dist/config.js +56 -10
- package/dist/config.js.map +1 -1
- package/dist/core/budget.js +61 -16
- package/dist/core/budget.js.map +1 -1
- package/dist/core/handler.js +20 -6
- package/dist/core/handler.js.map +1 -1
- package/dist/core/state.js +10 -2
- package/dist/core/state.js.map +1 -1
- package/dist/db.js +39 -5
- package/dist/db.js.map +1 -1
- package/dist/middleware/auth.js +15 -8
- package/dist/middleware/auth.js.map +1 -1
- package/dist/routes/admin.js +26 -8
- package/dist/routes/admin.js.map +1 -1
- package/dist/routes/v1.js +15 -11
- package/dist/routes/v1.js.map +1 -1
- package/dist/server.js +4 -0
- package/dist/server.js.map +1 -1
- package/dist/upstream/client.js +9 -0
- package/dist/upstream/client.js.map +1 -1
- package/package.json +3 -4
- package/src/cli.ts +44 -8
- package/src/config.ts +57 -10
- package/src/core/budget.ts +77 -24
- package/src/core/handler.ts +24 -7
- package/src/core/state.ts +17 -2
- package/src/db.ts +50 -5
- package/src/middleware/auth.ts +18 -8
- package/src/routes/admin.ts +33 -12
- package/src/routes/v1.ts +15 -11
- package/src/server.ts +4 -0
- package/src/upstream/client.ts +9 -0
- package/dashboard/src/components/BudgetEditor.tsx +0 -97
package/dist/upstream/client.js
CHANGED
|
@@ -57,6 +57,15 @@ function buildBody(provider, req, model, stream, thinkingIntent) {
|
|
|
57
57
|
const adapter = adapterFor(provider.format);
|
|
58
58
|
const upstreamReq = { ...req, model, stream };
|
|
59
59
|
const out = adapter.requestFromCanonical(upstreamReq);
|
|
60
|
+
// OpenAI-compatible streams omit usage entirely unless you opt in — without this
|
|
61
|
+
// every streamed call through an openai-format provider logs 0 tokens in/out
|
|
62
|
+
// (anthropic/gemini report usage inline, so they're unaffected). Ask for the
|
|
63
|
+
// final usage chunk; the handler taps it for accounting. Preserve a usage opt-in
|
|
64
|
+
// the client already set.
|
|
65
|
+
if (stream && provider.format === "openai") {
|
|
66
|
+
const existing = (out.stream_options ?? {});
|
|
67
|
+
out.stream_options = { ...existing, include_usage: true };
|
|
68
|
+
}
|
|
60
69
|
// Normalize thinking into THIS provider's native format, keyed by the upstream
|
|
61
70
|
// model's capabilities. No-op for non-reasoning models. Runs per-attempt so each
|
|
62
71
|
// provider in a fallback chain gets the right shape.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/upstream/client.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,EAAE,OAAO,EAAE,MAAM,QAAQ,CAAC;AAGjC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAuB,MAAM,kCAAkC,CAAC;AAStF;;;;;GAKG;AACH,SAAS,iBAAiB,CAAC,MAA0B;IACnD,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC,CAAC,kCAAkC;IACzE,IAAI,MAAM,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC;IAChC,IAAI,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IAC/B,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,GAAuB;IAC/D,MAAM,OAAO,GAA2B;QACtC,cAAc,EAAE,kBAAkB;QAClC,GAAG,CAAC,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC;KAC5B,CAAC;IACF,IAAI,QAAQ,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;QACpC,IAAI,GAAG;YAAE,OAAO,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC;QACpC,OAAO,CAAC,mBAAmB,CAAC,KAAK,YAAY,CAAC;IAChD,CAAC;SAAM,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACxC,IAAI,GAAG;YAAE,OAAO,CAAC,gBAAgB,CAAC,GAAG,GAAG,CAAC;IAC3C,CAAC;SAAM,CAAC;QACN,IAAI,GAAG;YAAE,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,GAAG,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;GAGG;AACH,SAAS,QAAQ,CAAC,QAAkB,EAAE,KAAa,EAAE,MAAe;IAClE,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAClD,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,+BAA+B,CAAC,CAAC,CAAC,iBAAiB,CAAC;QAC5E,OAAO,GAAG,IAAI,WAAW,kBAAkB,CAAC,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;IACjE,CAAC;IACD,OAAO,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC;AACtF,CAAC;AAED,SAAS,SAAS,CAChB,QAAkB,EAClB,GAAqB,EACrB,KAAa,EACb,MAAe,EACf,cAAsC;IAEtC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,WAAW,GAAqB,EAAE,GAAG,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAChE,MAAM,GAAG,GAAG,OAAO,CAAC,oBAAoB,CAAC,WAAW,CAA4B,CAAC;IACjF,+EAA+E;IAC/E,iFAAiF;IACjF,qDAAqD;IACrD,aAAa,CAAC,QAAQ,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,cAAc,CAAC,CAAC;IACxE,OAAO,GAAG,CAAC;AACb,CAAC;AAWD,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,QAAkB,EAClB,GAAqB,EACrB,KAAa,EACb,IAAqG;IAErG,MAAM,GAAG,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IACnD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;IAE/E,IAAI,GAAG,CAAC;IACR,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE;YACvB,MAAM,EAAE,MAAM;YACd,OAAO;YACP,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,0DAA0D;YAC1D,cAAc,EAAE,OAAO;YACvB,WAAW,EAAE,OAAO;SACrB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,YAAY,QAAQ,CAAC,EAAE,oBAAqB,CAAW,CAAC,OAAO,EAAE,CAAkB,CAAC;QAC1G,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC;QACrB,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,IAAI,GAAG,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,YAAY,QAAQ,CAAC,EAAE,aAAa,GAAG,CAAC,UAAU,EAAE,CAAkB,CAAC;QAC7F,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC;QAC5B,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC;QAChB,GAAG,CAAC,SAAS,GAAG,iBAAiB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAClD,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;IAEzD,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IACnC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC5C,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,CAAC,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;AACxE,CAAC;AASD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAkB,EAAE,GAAuB;IAC5E,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,GAAG,IAAI,SAAS,CAAC;IAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC,CAAC;QACxG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACtB,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,EAAE,EAAE,GAAG,CAAC,UAAU,IAAI,GAAG,IAAI,GAAG,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;IACxG,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAG,CAAW,CAAC,OAAO,EAAE,CAAC;IACtE,CAAC;AACH,CAAC;AAED,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,CAAC"}
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/upstream/client.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,EAAE,OAAO,EAAE,MAAM,QAAQ,CAAC;AAGjC,OAAO,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAC;AAClD,OAAO,EAAE,aAAa,EAAuB,MAAM,kCAAkC,CAAC;AAStF;;;;;GAKG;AACH,SAAS,iBAAiB,CAAC,MAA0B;IACnD,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC,CAAC,kCAAkC;IACzE,IAAI,MAAM,KAAK,GAAG;QAAE,OAAO,IAAI,CAAC;IAChC,IAAI,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IAC/B,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,GAAuB;IAC/D,MAAM,OAAO,GAA2B;QACtC,cAAc,EAAE,kBAAkB;QAClC,GAAG,CAAC,QAAQ,CAAC,OAAO,IAAI,EAAE,CAAC;KAC5B,CAAC;IACF,IAAI,QAAQ,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;QACpC,IAAI,GAAG;YAAE,OAAO,CAAC,WAAW,CAAC,GAAG,GAAG,CAAC;QACpC,OAAO,CAAC,mBAAmB,CAAC,KAAK,YAAY,CAAC;IAChD,CAAC;SAAM,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACxC,IAAI,GAAG;YAAE,OAAO,CAAC,gBAAgB,CAAC,GAAG,GAAG,CAAC;IAC3C,CAAC;SAAM,CAAC;QACN,IAAI,GAAG;YAAE,OAAO,CAAC,eAAe,CAAC,GAAG,UAAU,GAAG,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;GAGG;AACH,SAAS,QAAQ,CAAC,QAAkB,EAAE,KAAa,EAAE,MAAe;IAClE,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAClD,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QACjC,MAAM,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,+BAA+B,CAAC,CAAC,CAAC,iBAAiB,CAAC;QAC5E,OAAO,GAAG,IAAI,WAAW,kBAAkB,CAAC,KAAK,CAAC,IAAI,MAAM,EAAE,CAAC;IACjE,CAAC;IACD,OAAO,IAAI,GAAG,CAAC,QAAQ,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,mBAAmB,CAAC,CAAC;AACtF,CAAC;AAED,SAAS,SAAS,CAChB,QAAkB,EAClB,GAAqB,EACrB,KAAa,EACb,MAAe,EACf,cAAsC;IAEtC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC5C,MAAM,WAAW,GAAqB,EAAE,GAAG,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAChE,MAAM,GAAG,GAAG,OAAO,CAAC,oBAAoB,CAAC,WAAW,CAA4B,CAAC;IACjF,iFAAiF;IACjF,6EAA6E;IAC7E,6EAA6E;IAC7E,iFAAiF;IACjF,0BAA0B;IAC1B,IAAI,MAAM,IAAI,QAAQ,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC3C,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,cAAc,IAAI,EAAE,CAA4B,CAAC;QACvE,GAAG,CAAC,cAAc,GAAG,EAAE,GAAG,QAAQ,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;IAC5D,CAAC;IACD,+EAA+E;IAC/E,iFAAiF;IACjF,qDAAqD;IACrD,aAAa,CAAC,QAAQ,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,QAAQ,CAAC,EAAE,EAAE,cAAc,CAAC,CAAC;IACxE,OAAO,GAAG,CAAC;AACb,CAAC;AAWD,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,QAAkB,EAClB,GAAqB,EACrB,KAAa,EACb,IAAqG;IAErG,MAAM,GAAG,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IACnD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,SAAS,CAAC,QAAQ,EAAE,GAAG,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,cAAc,CAAC,CAAC;IAE/E,IAAI,GAAG,CAAC;IACR,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE;YACvB,MAAM,EAAE,MAAM;YACd,OAAO;YACP,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;YAC1B,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,0DAA0D;YAC1D,cAAc,EAAE,OAAO;YACvB,WAAW,EAAE,OAAO;SACrB,CAAC,CAAC;IACL,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,YAAY,QAAQ,CAAC,EAAE,oBAAqB,CAAW,CAAC,OAAO,EAAE,CAAkB,CAAC;QAC1G,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC;QACrB,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,IAAI,GAAG,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;QAC1B,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,GAAG,GAAG,IAAI,KAAK,CAAC,YAAY,QAAQ,CAAC,EAAE,aAAa,GAAG,CAAC,UAAU,EAAE,CAAkB,CAAC;QAC7F,GAAG,CAAC,MAAM,GAAG,GAAG,CAAC,UAAU,CAAC;QAC5B,GAAG,CAAC,IAAI,GAAG,IAAI,CAAC;QAChB,GAAG,CAAC,SAAS,GAAG,iBAAiB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QAClD,MAAM,GAAG,CAAC;IACZ,CAAC;IAED,IAAI,IAAI,CAAC,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;IAEzD,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IACnC,MAAM,OAAO,GAAG,UAAU,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;IAC5C,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,CAAC,mBAAmB,CAAC,IAAI,CAAC,EAAE,CAAC;AACxE,CAAC;AASD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAkB,EAAE,GAAuB;IAC5E,MAAM,IAAI,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,GAAG,IAAI,SAAS,CAAC;IAC7B,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC5C,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,EAAE,CAAC,CAAC;QACxG,MAAM,GAAG,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QACtB,OAAO,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,CAAC,UAAU,EAAE,EAAE,EAAE,GAAG,CAAC,UAAU,IAAI,GAAG,IAAI,GAAG,CAAC,UAAU,GAAG,GAAG,EAAE,CAAC;IACxG,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAG,CAAW,CAAC,OAAO,EAAE,CAAC;IACtE,CAAC;AACH,CAAC;AAED,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "aigetwey",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.2.0",
|
|
4
4
|
"type": "module",
|
|
5
|
-
"description": "Personal AI gateway — route, translate and track requests across Anthropic
|
|
5
|
+
"description": "Personal AI gateway — route, translate and track requests across Anthropic and OpenAI-compatible providers, with a built-in dashboard.",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"ai",
|
|
8
8
|
"gateway",
|
|
@@ -10,7 +10,6 @@
|
|
|
10
10
|
"proxy",
|
|
11
11
|
"anthropic",
|
|
12
12
|
"openai",
|
|
13
|
-
"gemini",
|
|
14
13
|
"router",
|
|
15
14
|
"claude",
|
|
16
15
|
"dashboard"
|
|
@@ -29,7 +28,7 @@
|
|
|
29
28
|
"node": ">=22"
|
|
30
29
|
},
|
|
31
30
|
"bin": {
|
|
32
|
-
"aigetwey": "
|
|
31
|
+
"aigetwey": "dist/cli.js"
|
|
33
32
|
},
|
|
34
33
|
"files": [
|
|
35
34
|
"dist",
|
package/src/cli.ts
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
*/
|
|
15
15
|
import { spawn, execSync, type ChildProcess } from "node:child_process";
|
|
16
16
|
import { randomBytes } from "node:crypto";
|
|
17
|
-
import { existsSync, copyFileSync } from "node:fs";
|
|
17
|
+
import { existsSync, copyFileSync, readFileSync, writeFileSync, mkdirSync } from "node:fs";
|
|
18
18
|
import { resolve, dirname, join } from "node:path";
|
|
19
19
|
import { fileURLToPath } from "node:url";
|
|
20
20
|
import { createInterface } from "node:readline";
|
|
@@ -65,24 +65,57 @@ const HELP = `
|
|
|
65
65
|
const GATEWAY_PORT = opts.port ?? Number(process.env.AIGETWEY_PORT ?? 18080);
|
|
66
66
|
const DASHBOARD_PORT = Number(process.env.DASHBOARD_PORT ?? 3000);
|
|
67
67
|
|
|
68
|
-
// reuse env secrets if present, otherwise generate (admin) /
|
|
68
|
+
// reuse env secrets if present, otherwise generate (admin) / persist (session).
|
|
69
69
|
const adminPassword = process.env.AIGETWEY_ADMIN_PASSWORD ?? randomBytes(6).toString("hex");
|
|
70
|
-
const sessionSecret = process.env.SESSION_SECRET ?? randomBytes(24).toString("hex");
|
|
71
70
|
const generatedPw = !process.env.AIGETWEY_ADMIN_PASSWORD;
|
|
72
71
|
|
|
72
|
+
/**
|
|
73
|
+
* The dashboard session cookie is signed+encrypted with SESSION_SECRET. A fresh
|
|
74
|
+
* random secret each boot would invalidate every cookie on restart — the symptom
|
|
75
|
+
* being "re-enter the password after a relaunch" — so persist a generated one to
|
|
76
|
+
* the data dir (alongside auth.json) and reuse it. An explicit env var wins.
|
|
77
|
+
*/
|
|
78
|
+
function loadOrCreateSessionSecret(): string {
|
|
79
|
+
if (process.env.SESSION_SECRET) return process.env.SESSION_SECRET;
|
|
80
|
+
const dataDir = resolve(process.env.AIGETWEY_DATA_DIR ?? join(root, "data"));
|
|
81
|
+
const file = join(dataDir, "session-secret");
|
|
82
|
+
try {
|
|
83
|
+
const existing = readFileSync(file, "utf8").trim();
|
|
84
|
+
if (existing) return existing;
|
|
85
|
+
} catch {
|
|
86
|
+
// not created yet — fall through and generate.
|
|
87
|
+
}
|
|
88
|
+
const secret = randomBytes(24).toString("hex");
|
|
89
|
+
try {
|
|
90
|
+
mkdirSync(dataDir, { recursive: true });
|
|
91
|
+
writeFileSync(file, secret, { mode: 0o600 });
|
|
92
|
+
} catch {
|
|
93
|
+
// unwritable data dir — fall back to an ephemeral secret (cookies won't
|
|
94
|
+
// survive this boot, but the gateway still runs).
|
|
95
|
+
}
|
|
96
|
+
return secret;
|
|
97
|
+
}
|
|
98
|
+
const sessionSecret = loadOrCreateSessionSecret();
|
|
99
|
+
|
|
73
100
|
function openBrowser(url: string): void {
|
|
74
101
|
const cmd =
|
|
75
102
|
process.platform === "darwin" ? "open" : process.platform === "win32" ? "start" : "xdg-open";
|
|
76
103
|
spawn(cmd, [url], { stdio: "ignore", detached: true, shell: process.platform === "win32" }).unref();
|
|
77
104
|
}
|
|
78
105
|
|
|
79
|
-
async function waitForGateway(
|
|
106
|
+
async function waitForGateway(
|
|
107
|
+
url: string,
|
|
108
|
+
timeoutMs = 20000,
|
|
109
|
+
ready: (status: number) => boolean = (s) => s > 0,
|
|
110
|
+
): Promise<boolean> {
|
|
80
111
|
const deadline = Date.now() + timeoutMs;
|
|
81
112
|
while (Date.now() < deadline) {
|
|
82
113
|
try {
|
|
83
114
|
const res = await fetch(url, { method: "GET" });
|
|
84
|
-
// any HTTP answer (even 401/503) means the port is up
|
|
85
|
-
|
|
115
|
+
// default: any HTTP answer (even 401/503) means the port is up. A caller
|
|
116
|
+
// can demand more — e.g. a non-5xx, to wait past a proxy's boot-time 502/500
|
|
117
|
+
// while the upstream it fronts is still coming up.
|
|
118
|
+
if (ready(res.status)) return true;
|
|
86
119
|
} catch {
|
|
87
120
|
// not up yet
|
|
88
121
|
}
|
|
@@ -349,9 +382,12 @@ async function main(): Promise<void> {
|
|
|
349
382
|
});
|
|
350
383
|
|
|
351
384
|
// one URL for everything — the gateway reverse-proxies the dashboard. Wait for
|
|
352
|
-
// the dashboard to
|
|
385
|
+
// the dashboard to be READY before opening the browser. Probe it directly on
|
|
386
|
+
// its own port (not through the proxy) and require a non-5xx answer: a proxy
|
|
387
|
+
// hit during boot returns 500 (ECONNREFUSED upstream), which a bare "port up"
|
|
388
|
+
// check would mistake for ready and open the browser into a wall of 500s.
|
|
353
389
|
const appUrl = `http://127.0.0.1:${GATEWAY_PORT}`;
|
|
354
|
-
await waitForGateway(
|
|
390
|
+
await waitForGateway(`http://127.0.0.1:${DASHBOARD_PORT}/login`, 30000, (s) => s > 0 && s < 500);
|
|
355
391
|
console.log(`\n aigetwey ${appUrl} (dashboard + API, one URL)`);
|
|
356
392
|
if (generatedPw) {
|
|
357
393
|
console.log(`\n admin password (generated): ${adminPassword}`);
|
package/src/config.ts
CHANGED
|
@@ -9,6 +9,9 @@ import {
|
|
|
9
9
|
import { dirname } from "node:path";
|
|
10
10
|
import { parse as parseYaml, stringify as stringifyYaml } from "yaml";
|
|
11
11
|
import { z } from "zod";
|
|
12
|
+
import { clientKeyFingerprint } from "./middleware/auth.js";
|
|
13
|
+
|
|
14
|
+
export { clientKeyFingerprint } from "./middleware/auth.js";
|
|
12
15
|
|
|
13
16
|
// ---- schema (PLAN §8) -------------------------------------------------------
|
|
14
17
|
//
|
|
@@ -121,17 +124,28 @@ const ServerSchema = z
|
|
|
121
124
|
.default({ host: "127.0.0.1", port: 18080, api_keys: [] });
|
|
122
125
|
|
|
123
126
|
/**
|
|
124
|
-
*
|
|
125
|
-
*
|
|
126
|
-
* at 100%. Window math reuses the quota
|
|
127
|
+
* A spend budget scoped to the whole gateway, one provider, or one upstream
|
|
128
|
+
* model. unit picks what `limit` means — USD cost or total tokens. Soft-alert at
|
|
129
|
+
* alert_at (default 0.8), hard-stop at 100%. Window math reuses the quota
|
|
130
|
+
* calendar engine. Opt-in: omit / empty list to disable.
|
|
127
131
|
*/
|
|
132
|
+
const BudgetScopeSchema = z.discriminatedUnion("type", [
|
|
133
|
+
z.object({ type: z.literal("global") }),
|
|
134
|
+
z.object({ type: z.literal("provider"), id: z.string().min(1) }),
|
|
135
|
+
z.object({ type: z.literal("model"), id: z.string().min(1) }),
|
|
136
|
+
z.object({ type: z.literal("key"), id: z.string().min(1) }),
|
|
137
|
+
]);
|
|
138
|
+
|
|
128
139
|
const BudgetSchema = z.object({
|
|
140
|
+
scope: BudgetScopeSchema,
|
|
129
141
|
unit: z.enum(["usd", "tokens"]),
|
|
130
142
|
limit: z.number().positive(),
|
|
131
143
|
window: z.enum(["5h", "daily", "weekly", "monthly"]),
|
|
132
144
|
reset_at: z.string().optional(),
|
|
133
145
|
timezone: z.string().default("UTC"),
|
|
134
146
|
alert_at: z.number().gt(0).lte(1).optional(),
|
|
147
|
+
// optional free-text label so an operator remembers what a budget is for.
|
|
148
|
+
note: z.string().max(200).optional(),
|
|
135
149
|
});
|
|
136
150
|
|
|
137
151
|
const ConfigSchema = z.object({
|
|
@@ -140,7 +154,7 @@ const ConfigSchema = z.object({
|
|
|
140
154
|
providers: z.array(ProviderSchema).default([]),
|
|
141
155
|
// the routing layer. Each entry is a "combo": an alias + a provider chain.
|
|
142
156
|
models: z.array(ModelRouteSchema).default([]),
|
|
143
|
-
|
|
157
|
+
budgets: z.array(BudgetSchema).default([]),
|
|
144
158
|
});
|
|
145
159
|
|
|
146
160
|
export type Quota = z.infer<typeof QuotaSchema>;
|
|
@@ -148,6 +162,7 @@ export type ProviderModel = z.infer<typeof ProviderModelSchema>;
|
|
|
148
162
|
export type Provider = z.infer<typeof ProviderSchema>;
|
|
149
163
|
export type ModelRoute = z.infer<typeof ModelRouteSchema>;
|
|
150
164
|
export type EndpointSettings = z.infer<typeof EndpointSchema>;
|
|
165
|
+
export type BudgetScope = z.infer<typeof BudgetScopeSchema>;
|
|
151
166
|
export type Budget = z.infer<typeof BudgetSchema>;
|
|
152
167
|
export type Config = z.infer<typeof ConfigSchema>;
|
|
153
168
|
|
|
@@ -277,6 +292,16 @@ export class GatewayConfig {
|
|
|
277
292
|
|
|
278
293
|
/** Validate an already-parsed config object. Throws with readable issues. */
|
|
279
294
|
export function validateConfig(parsed: unknown): GatewayConfig {
|
|
295
|
+
// migrate the legacy single `budget` (pre-scoped) into a global-scoped entry
|
|
296
|
+
// before zod runs — zod would otherwise strip the unknown `budget` key.
|
|
297
|
+
if (parsed && typeof parsed === "object") {
|
|
298
|
+
const raw = parsed as Record<string, unknown>;
|
|
299
|
+
if (raw.budget && !raw.budgets) {
|
|
300
|
+
const legacy = raw.budget as Record<string, unknown>;
|
|
301
|
+
raw.budgets = [{ scope: { type: "global" }, ...legacy }];
|
|
302
|
+
}
|
|
303
|
+
delete raw.budget;
|
|
304
|
+
}
|
|
280
305
|
const result = ConfigSchema.safeParse(parsed ?? {});
|
|
281
306
|
if (!result.success) {
|
|
282
307
|
const issues = result.error.issues
|
|
@@ -757,19 +782,41 @@ export function setHeadroom(
|
|
|
757
782
|
return next;
|
|
758
783
|
}
|
|
759
784
|
|
|
760
|
-
// ----
|
|
785
|
+
// ---- scoped budgets --------------------------------------------------------
|
|
786
|
+
|
|
787
|
+
/** Stable identity key for a budget's scope. */
|
|
788
|
+
export function budgetKey(scope: BudgetScope): string {
|
|
789
|
+
return scope.type === "global" ? "global" : `${scope.type}:${scope.id}`;
|
|
790
|
+
}
|
|
761
791
|
|
|
762
|
-
/**
|
|
792
|
+
/** Add a budget, or replace the existing one with the same scope key. */
|
|
763
793
|
export function setBudget(config: Config, budget: Budget): Config {
|
|
794
|
+
if (budget.scope.type === "provider") {
|
|
795
|
+
const { id } = budget.scope;
|
|
796
|
+
if (!config.providers.some((p) => p.id === id)) {
|
|
797
|
+
throw new Error(`unknown provider "${id}" for budget scope`);
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
if (budget.scope.type === "key") {
|
|
801
|
+
const { id } = budget.scope;
|
|
802
|
+
if (!config.server.api_keys.some((k) => clientKeyFingerprint(k) === id)) {
|
|
803
|
+
throw new Error(`unknown API key fingerprint "${id}" for budget scope`);
|
|
804
|
+
}
|
|
805
|
+
}
|
|
764
806
|
const next = cloneConfig(config);
|
|
765
|
-
|
|
807
|
+
const key = budgetKey(budget.scope);
|
|
808
|
+
const idx = next.budgets.findIndex((b) => budgetKey(b.scope) === key);
|
|
809
|
+
if (idx === -1) next.budgets.push(budget);
|
|
810
|
+
else next.budgets[idx] = budget;
|
|
766
811
|
return next;
|
|
767
812
|
}
|
|
768
813
|
|
|
769
|
-
/** Remove
|
|
770
|
-
export function clearBudget(config: Config): Config {
|
|
814
|
+
/** Remove a budget by its scope key (global | provider:<id> | model:<id> | key:<fp>). */
|
|
815
|
+
export function clearBudget(config: Config, key: string): Config {
|
|
771
816
|
const next = cloneConfig(config);
|
|
772
|
-
|
|
817
|
+
const idx = next.budgets.findIndex((b) => budgetKey(b.scope) === key);
|
|
818
|
+
if (idx === -1) throw new Error(`no budget with scope "${key}"`);
|
|
819
|
+
next.budgets.splice(idx, 1);
|
|
773
820
|
return next;
|
|
774
821
|
}
|
|
775
822
|
|
package/src/core/budget.ts
CHANGED
|
@@ -1,71 +1,124 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
* truth) rather than a parallel counter.
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
2
|
+
* Scoped spend budgets, derived from the usage table (the single source of
|
|
3
|
+
* truth) rather than a parallel counter. Each budget targets the whole gateway,
|
|
4
|
+
* one provider, or one upstream model. statuses() computes every budget's spend
|
|
5
|
+
* over its window; the result list is cached a few seconds so the per-request
|
|
6
|
+
* hard-stop check stays cheap. blocks() answers "is a route to this
|
|
7
|
+
* provider/model barred by an exhausted budget?".
|
|
7
8
|
*/
|
|
8
|
-
import type { Budget } from "../config.js";
|
|
9
|
+
import type { Budget, BudgetScope } from "../config.js";
|
|
10
|
+
import { budgetKey } from "../config.js";
|
|
9
11
|
import { currentWindowStart, nextResetAt } from "./quota.js";
|
|
10
12
|
|
|
11
13
|
export interface BudgetStatus {
|
|
14
|
+
scope: BudgetScope;
|
|
15
|
+
key: string;
|
|
16
|
+
label: string;
|
|
17
|
+
note?: string;
|
|
12
18
|
unit: "usd" | "tokens";
|
|
13
19
|
limit: number;
|
|
14
20
|
spent: number;
|
|
15
21
|
pct: number;
|
|
16
22
|
alert: boolean;
|
|
23
|
+
alert_at: number;
|
|
17
24
|
exhausted: boolean;
|
|
18
|
-
/** estimate in the converse unit (tokens if unit=usd, usd if unit=tokens); null when no usage yet */
|
|
19
25
|
est_converse: number | null;
|
|
20
26
|
reset_in_ms: number;
|
|
21
27
|
window: Budget["window"];
|
|
22
28
|
}
|
|
23
29
|
|
|
24
|
-
interface
|
|
25
|
-
|
|
30
|
+
interface TotalsReader {
|
|
31
|
+
totals(sinceMs: number, filter?: { provider?: string; model?: string; client_key?: string }): {
|
|
32
|
+
tokens_in: number;
|
|
33
|
+
tokens_out: number;
|
|
34
|
+
cost: number;
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function scopeLabel(scope: BudgetScope, keyName: (fp: string) => string): string {
|
|
39
|
+
if (scope.type === "global") return "Global";
|
|
40
|
+
if (scope.type === "key") return keyName(scope.id);
|
|
41
|
+
return scope.id;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function scopeFilter(scope: BudgetScope): { provider?: string; model?: string; client_key?: string } | undefined {
|
|
45
|
+
if (scope.type === "provider") return { provider: scope.id };
|
|
46
|
+
if (scope.type === "model") return { model: scope.id };
|
|
47
|
+
if (scope.type === "key") return { client_key: scope.id };
|
|
48
|
+
return undefined;
|
|
26
49
|
}
|
|
27
50
|
|
|
28
51
|
export class BudgetTracker {
|
|
29
|
-
private cached?: { at: number;
|
|
52
|
+
private cached?: { at: number; list: BudgetStatus[] };
|
|
30
53
|
|
|
31
54
|
constructor(
|
|
32
|
-
private readonly
|
|
33
|
-
private readonly db:
|
|
55
|
+
private readonly getBudgets: () => Budget[],
|
|
56
|
+
private readonly db: TotalsReader,
|
|
34
57
|
private readonly now: () => number = Date.now,
|
|
35
58
|
private readonly cacheMs = 5000,
|
|
59
|
+
private readonly keyName: (fp: string) => string = (fp) => `key …${fp}`,
|
|
36
60
|
) {}
|
|
37
61
|
|
|
38
|
-
/** Flush the cached status — call after a config reload that may have changed the budget spec. */
|
|
39
62
|
clearCache(): void {
|
|
40
63
|
this.cached = undefined;
|
|
41
64
|
}
|
|
42
65
|
|
|
43
|
-
|
|
66
|
+
statuses(): BudgetStatus[] {
|
|
44
67
|
const t = this.now();
|
|
45
|
-
if (this.cached && t - this.cached.at < this.cacheMs) return this.cached.
|
|
46
|
-
const
|
|
47
|
-
this.cached = { at: t,
|
|
48
|
-
return
|
|
68
|
+
if (this.cached && t - this.cached.at < this.cacheMs) return this.cached.list;
|
|
69
|
+
const list = this.getBudgets().map((b) => this.compute(b, t));
|
|
70
|
+
this.cached = { at: t, list };
|
|
71
|
+
return list;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
globalStatus(): BudgetStatus | null {
|
|
75
|
+
return this.statuses().find((s) => s.scope.type === "global") ?? null;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** First exhausted provider/model budget matching a route, or null. */
|
|
79
|
+
blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null {
|
|
80
|
+
for (const s of this.statuses()) {
|
|
81
|
+
if (!s.exhausted) continue;
|
|
82
|
+
if (s.scope.type === "provider" && s.scope.id === providerId)
|
|
83
|
+
return { exhausted: true, reset_in_ms: s.reset_in_ms };
|
|
84
|
+
if (s.scope.type === "model" && s.scope.id === model)
|
|
85
|
+
return { exhausted: true, reset_in_ms: s.reset_in_ms };
|
|
86
|
+
}
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/** The exhausted key-scoped budget for this fingerprint, or null. */
|
|
91
|
+
blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null {
|
|
92
|
+
for (const s of this.statuses()) {
|
|
93
|
+
if (s.exhausted && s.scope.type === "key" && s.scope.id === fp) {
|
|
94
|
+
return { exhausted: true, reset_in_ms: s.reset_in_ms };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return null;
|
|
49
98
|
}
|
|
50
99
|
|
|
51
|
-
private compute(t: number): BudgetStatus
|
|
52
|
-
const spec = this.getSpec();
|
|
53
|
-
if (!spec) return null;
|
|
100
|
+
private compute(spec: Budget, t: number): BudgetStatus {
|
|
54
101
|
const windowStart = currentWindowStart(spec, t);
|
|
55
|
-
const total = this.db.
|
|
102
|
+
const total = this.db.totals(windowStart, scopeFilter(spec.scope));
|
|
56
103
|
const tokens = total.tokens_in + total.tokens_out;
|
|
57
104
|
const cost = total.cost;
|
|
58
|
-
const rate = tokens > 0 ? cost / tokens : undefined;
|
|
105
|
+
const rate = tokens > 0 ? cost / tokens : undefined;
|
|
59
106
|
const spent = spec.unit === "usd" ? cost : tokens;
|
|
60
107
|
const limit = spec.limit;
|
|
61
108
|
const pct = limit > 0 ? Math.min(1, spent / limit) : 0;
|
|
109
|
+
const alertAt = spec.alert_at ?? 0.8;
|
|
62
110
|
const est_converse = rate === undefined ? null : spec.unit === "usd" ? limit / rate : limit * rate;
|
|
63
111
|
return {
|
|
112
|
+
scope: spec.scope,
|
|
113
|
+
key: budgetKey(spec.scope),
|
|
114
|
+
label: scopeLabel(spec.scope, this.keyName),
|
|
115
|
+
note: spec.note,
|
|
64
116
|
unit: spec.unit,
|
|
65
117
|
limit,
|
|
66
118
|
spent,
|
|
67
119
|
pct,
|
|
68
|
-
alert: pct >=
|
|
120
|
+
alert: pct >= alertAt,
|
|
121
|
+
alert_at: alertAt,
|
|
69
122
|
exhausted: spent >= limit,
|
|
70
123
|
est_converse,
|
|
71
124
|
reset_in_ms: Math.max(0, nextResetAt(spec, windowStart, t) - t),
|
package/src/core/handler.ts
CHANGED
|
@@ -51,7 +51,12 @@ export interface HandleDeps {
|
|
|
51
51
|
pool: KeyPool;
|
|
52
52
|
db?: UsageDB;
|
|
53
53
|
quota?: QuotaTracker;
|
|
54
|
-
budget?: {
|
|
54
|
+
budget?: {
|
|
55
|
+
globalStatus(): { exhausted: boolean; reset_in_ms: number } | null;
|
|
56
|
+
blocks(providerId: string, model: string): { exhausted: true; reset_in_ms: number } | null;
|
|
57
|
+
blocksKey(fp: string): { exhausted: true; reset_in_ms: number } | null;
|
|
58
|
+
};
|
|
59
|
+
clientKeyFp?: string;
|
|
55
60
|
log?: (msg: string) => void;
|
|
56
61
|
now?: () => number;
|
|
57
62
|
}
|
|
@@ -85,6 +90,7 @@ function recordUsage(
|
|
|
85
90
|
status,
|
|
86
91
|
latency_ms: latencyMs,
|
|
87
92
|
stream: stream ? 1 : 0,
|
|
93
|
+
client_key: deps.clientKeyFp ?? "",
|
|
88
94
|
});
|
|
89
95
|
}
|
|
90
96
|
|
|
@@ -115,16 +121,27 @@ export async function handle(
|
|
|
115
121
|
const thinkingIntent: ThinkingConfig | null =
|
|
116
122
|
override ?? captureThinking(canonical as Record<string, unknown>);
|
|
117
123
|
|
|
118
|
-
|
|
124
|
+
let routes = config.resolve(canonical.model);
|
|
119
125
|
if (routes.length === 0) {
|
|
120
126
|
throw new GatewayError(404, { error: `unknown model "${canonical.model}"` });
|
|
121
127
|
}
|
|
122
128
|
|
|
123
|
-
//
|
|
124
|
-
//
|
|
125
|
-
|
|
126
|
-
if (
|
|
127
|
-
|
|
129
|
+
// Budget hard-stop. Global overrun fails fast. Provider/model budgets bar the
|
|
130
|
+
// matching routes (like the token-quota skip); if every candidate is barred,
|
|
131
|
+
// there's nothing to serve → 402.
|
|
132
|
+
if (deps.budget) {
|
|
133
|
+
const g = deps.budget.globalStatus();
|
|
134
|
+
if (g?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: g.reset_in_ms });
|
|
135
|
+
if (deps.clientKeyFp) {
|
|
136
|
+
const kb = deps.budget.blocksKey(deps.clientKeyFp);
|
|
137
|
+
if (kb?.exhausted) throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: kb.reset_in_ms });
|
|
138
|
+
}
|
|
139
|
+
const eligible = routes.filter((r) => !deps.budget!.blocks(r.provider.id, r.model));
|
|
140
|
+
if (eligible.length === 0) {
|
|
141
|
+
const b = deps.budget.blocks(routes[0]!.provider.id, routes[0]!.model);
|
|
142
|
+
throw new GatewayError(402, { error: "budget exceeded", reset_in_ms: b?.reset_in_ms ?? 0 });
|
|
143
|
+
}
|
|
144
|
+
routes = eligible;
|
|
128
145
|
}
|
|
129
146
|
|
|
130
147
|
// Pipeline order matters: RTK compresses tool_result in the INPUT first, then
|
package/src/core/state.ts
CHANGED
|
@@ -17,11 +17,20 @@ import {
|
|
|
17
17
|
validateConfig,
|
|
18
18
|
unmaskSecrets,
|
|
19
19
|
writeConfigFile,
|
|
20
|
+
maskKey,
|
|
20
21
|
} from "../config.js";
|
|
22
|
+
import { clientKeyFingerprint } from "../middleware/auth.js";
|
|
21
23
|
import { KeyPool } from "./keypool.js";
|
|
22
24
|
import { QuotaTracker } from "./quota.js";
|
|
23
25
|
import { BudgetTracker } from "./budget.js";
|
|
24
26
|
|
|
27
|
+
function serverKeyLabel(server: { api_keys: string[]; key_names?: Record<string, string> }, fp: string): string {
|
|
28
|
+
for (const k of server.api_keys) {
|
|
29
|
+
if (clientKeyFingerprint(k) === fp) return server.key_names?.[k] ?? maskKey(k);
|
|
30
|
+
}
|
|
31
|
+
return `key …${fp}`;
|
|
32
|
+
}
|
|
33
|
+
|
|
25
34
|
export class GatewayState {
|
|
26
35
|
private _config: GatewayConfig;
|
|
27
36
|
private _pool: KeyPool;
|
|
@@ -32,12 +41,18 @@ export class GatewayState {
|
|
|
32
41
|
private readonly configPath: string,
|
|
33
42
|
initial: GatewayConfig,
|
|
34
43
|
quota?: QuotaTracker,
|
|
35
|
-
budgetDb?: {
|
|
44
|
+
budgetDb?: { totals(since: number, filter?: { provider?: string; model?: string; client_key?: string }): { tokens_in: number; tokens_out: number; cost: number } },
|
|
36
45
|
) {
|
|
37
46
|
this._config = initial;
|
|
38
47
|
this._pool = new KeyPool();
|
|
39
48
|
this._quota = quota ?? new QuotaTracker();
|
|
40
|
-
this._budget = new BudgetTracker(
|
|
49
|
+
this._budget = new BudgetTracker(
|
|
50
|
+
() => this._config.raw.budgets,
|
|
51
|
+
budgetDb ?? { totals: () => ({ tokens_in: 0, tokens_out: 0, cost: 0 }) },
|
|
52
|
+
undefined,
|
|
53
|
+
undefined,
|
|
54
|
+
(fp) => serverKeyLabel(this._config.raw.server, fp),
|
|
55
|
+
);
|
|
41
56
|
}
|
|
42
57
|
|
|
43
58
|
get config(): GatewayConfig {
|
package/src/db.ts
CHANGED
|
@@ -27,6 +27,7 @@ export interface UsageRow {
|
|
|
27
27
|
status: number;
|
|
28
28
|
latency_ms: number;
|
|
29
29
|
stream: number; // 0/1
|
|
30
|
+
client_key: string;
|
|
30
31
|
}
|
|
31
32
|
|
|
32
33
|
export interface LogRow {
|
|
@@ -38,6 +39,12 @@ export interface LogRow {
|
|
|
38
39
|
response_summary: string;
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
export interface UsageTotals {
|
|
43
|
+
tokens_in: number;
|
|
44
|
+
tokens_out: number;
|
|
45
|
+
cost: number;
|
|
46
|
+
}
|
|
47
|
+
|
|
41
48
|
export interface UsageSummary {
|
|
42
49
|
total: { requests: number; tokens_in: number; tokens_out: number; cost: number };
|
|
43
50
|
by_provider: Array<{ provider: string; requests: number; tokens_in: number; tokens_out: number; cost: number }>;
|
|
@@ -79,7 +86,8 @@ export class UsageDB {
|
|
|
79
86
|
cost REAL NOT NULL DEFAULT 0,
|
|
80
87
|
status INTEGER NOT NULL,
|
|
81
88
|
latency_ms INTEGER NOT NULL DEFAULT 0,
|
|
82
|
-
stream INTEGER NOT NULL DEFAULT 0
|
|
89
|
+
stream INTEGER NOT NULL DEFAULT 0,
|
|
90
|
+
client_key TEXT NOT NULL DEFAULT ''
|
|
83
91
|
);
|
|
84
92
|
CREATE INDEX IF NOT EXISTS idx_usage_ts ON usage(ts);
|
|
85
93
|
CREATE TABLE IF NOT EXISTS logs (
|
|
@@ -99,10 +107,15 @@ export class UsageDB {
|
|
|
99
107
|
last_reset INTEGER NOT NULL DEFAULT 0
|
|
100
108
|
);
|
|
101
109
|
`);
|
|
110
|
+
// migrate older DBs created before client_key existed.
|
|
111
|
+
const cols = this.db.prepare(`PRAGMA table_info(usage)`).all() as SqlRow[];
|
|
112
|
+
if (!cols.some((c) => String(c.name) === "client_key")) {
|
|
113
|
+
this.db.exec(`ALTER TABLE usage ADD COLUMN client_key TEXT NOT NULL DEFAULT ''`);
|
|
114
|
+
}
|
|
102
115
|
this.now = now;
|
|
103
116
|
this.insertUsage = this.db.prepare(`
|
|
104
|
-
INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, cached_tokens, cost, status, latency_ms, stream)
|
|
105
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
117
|
+
INSERT INTO usage (ts, alias, provider, model, tokens_in, tokens_out, cached_tokens, cost, status, latency_ms, stream, client_key)
|
|
118
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
106
119
|
`);
|
|
107
120
|
this.insertLog = this.db.prepare(`
|
|
108
121
|
INSERT INTO logs (ts, direction, provider, status, request_summary, response_summary)
|
|
@@ -117,7 +130,7 @@ export class UsageDB {
|
|
|
117
130
|
`);
|
|
118
131
|
}
|
|
119
132
|
|
|
120
|
-
record(row: Omit<UsageRow, "ts"> & { ts?: number }): void {
|
|
133
|
+
record(row: Omit<UsageRow, "ts" | "client_key"> & { ts?: number; client_key?: string }): void {
|
|
121
134
|
this.insertUsage.run(
|
|
122
135
|
row.ts ?? this.now(),
|
|
123
136
|
row.alias,
|
|
@@ -130,6 +143,7 @@ export class UsageDB {
|
|
|
130
143
|
row.status,
|
|
131
144
|
row.latency_ms,
|
|
132
145
|
row.stream,
|
|
146
|
+
row.client_key ?? "",
|
|
133
147
|
);
|
|
134
148
|
}
|
|
135
149
|
|
|
@@ -195,6 +209,36 @@ export class UsageDB {
|
|
|
195
209
|
};
|
|
196
210
|
}
|
|
197
211
|
|
|
212
|
+
/**
|
|
213
|
+
* Summed token + cost totals over rows with ts >= sinceMs, optionally filtered
|
|
214
|
+
* to one provider and/or one model. Backs the scoped budget tracker — the usage
|
|
215
|
+
* table stays the single source of truth (no parallel counter).
|
|
216
|
+
*/
|
|
217
|
+
totals(sinceMs: number, filter?: { provider?: string; model?: string; client_key?: string }): UsageTotals {
|
|
218
|
+
const clauses = ["ts >= ?"];
|
|
219
|
+
const params: Array<number | string> = [sinceMs];
|
|
220
|
+
if (filter?.provider) {
|
|
221
|
+
clauses.push("provider = ?");
|
|
222
|
+
params.push(filter.provider);
|
|
223
|
+
}
|
|
224
|
+
if (filter?.model) {
|
|
225
|
+
clauses.push("model = ?");
|
|
226
|
+
params.push(filter.model);
|
|
227
|
+
}
|
|
228
|
+
if (filter?.client_key) {
|
|
229
|
+
clauses.push("client_key = ?");
|
|
230
|
+
params.push(filter.client_key);
|
|
231
|
+
}
|
|
232
|
+
const row = this.db
|
|
233
|
+
.prepare(
|
|
234
|
+
`SELECT COALESCE(SUM(tokens_in),0) tokens_in, COALESCE(SUM(tokens_out),0) tokens_out,
|
|
235
|
+
COALESCE(SUM(cost),0) cost
|
|
236
|
+
FROM usage WHERE ${clauses.join(" AND ")}`,
|
|
237
|
+
)
|
|
238
|
+
.get(...params) as SqlRow;
|
|
239
|
+
return { tokens_in: num(row.tokens_in), tokens_out: num(row.tokens_out), cost: num(row.cost) };
|
|
240
|
+
}
|
|
241
|
+
|
|
198
242
|
/**
|
|
199
243
|
* Bucketed time-series for charts: one point per `bucketMs` interval from
|
|
200
244
|
* `sinceMs` to now, aligned to the bucket boundary, with zero-filled gaps.
|
|
@@ -233,7 +277,7 @@ export class UsageDB {
|
|
|
233
277
|
const rows = this.db
|
|
234
278
|
.prepare(
|
|
235
279
|
`SELECT ts, alias, provider, model, tokens_in, tokens_out, cached_tokens,
|
|
236
|
-
cost, status, latency_ms, stream
|
|
280
|
+
cost, status, latency_ms, stream, client_key
|
|
237
281
|
FROM usage ORDER BY id DESC LIMIT ?`,
|
|
238
282
|
)
|
|
239
283
|
.all(Math.max(1, Math.min(limit, 1000))) as SqlRow[];
|
|
@@ -249,6 +293,7 @@ export class UsageDB {
|
|
|
249
293
|
status: num(r.status),
|
|
250
294
|
latency_ms: num(r.latency_ms),
|
|
251
295
|
stream: num(r.stream),
|
|
296
|
+
client_key: String(r.client_key ?? ""),
|
|
252
297
|
}));
|
|
253
298
|
}
|
|
254
299
|
|