@tokagent/tokagentos 2.0.15 → 2.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/templates/fullstack-app/plugins/plugin-tokagent-billing/package.json +1 -1
- package/templates/fullstack-app/plugins/plugin-tokagent-billing/src/index.ts +9 -0
- package/templates/fullstack-app/plugins/plugin-tokagent-billing/src/routes/messages-proxy-routes.ts +288 -0
- package/templates-manifest.json +1 -1
package/package.json
CHANGED
|
@@ -17,6 +17,8 @@ import { getSetupRoutes } from "./routes/setup-routes.js";
|
|
|
17
17
|
import { getSetupPanelRoutes } from "./routes/setup-panel-routes.js";
|
|
18
18
|
// Operator dashboard SPA (migrated from llm-api-gateway)
|
|
19
19
|
import { getDashboardRoutes } from "./routes/dashboard-routes.js";
|
|
20
|
+
// LiteLLM proxy for /v1/messages + /v1/chat/completions (server-mode only)
|
|
21
|
+
import { getMessagesProxyRoutes } from "./routes/messages-proxy-routes.js";
|
|
20
22
|
|
|
21
23
|
/**
|
|
22
24
|
* Detect the BILLING_MODE at module-load time. The Plugin.routes array is
|
|
@@ -118,6 +120,13 @@ export const tokagentBillingPlugin: Plugin = {
|
|
|
118
120
|
]
|
|
119
121
|
: [],
|
|
120
122
|
routes: [
|
|
123
|
+
// MUST be registered BEFORE other routes — these own /v1/messages and
|
|
124
|
+
// /v1/chat/completions in server-mode and run a pure LiteLLM proxy.
|
|
125
|
+
// Without these, elizaOS's chat-routes.ts dispatcher tries to handle
|
|
126
|
+
// /v1/messages as an agent chat (requires worlds DB + AI provider
|
|
127
|
+
// plugin) and fails with "tableName is required" on a billing-only
|
|
128
|
+
// deployment.
|
|
129
|
+
...getMessagesProxyRoutes(BILLING_MODE),
|
|
121
130
|
...getAuthRoutes(BILLING_MODE),
|
|
122
131
|
...getKeysRoutes(BILLING_MODE),
|
|
123
132
|
...getCreditsRoutes(BILLING_MODE),
|
package/templates/fullstack-app/plugins/plugin-tokagent-billing/src/routes/messages-proxy-routes.ts
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure-proxy /v1/messages + /v1/chat/completions handlers.
|
|
3
|
+
*
|
|
4
|
+
* Why this exists: the elizaOS upstream that the billing server is built on
|
|
5
|
+
* routes /v1/messages and /v1/chat/completions through its agent chat
|
|
6
|
+
* handler (handleChatRoutes), which:
|
|
7
|
+
* - requires a fully-seeded worlds/messages DB
|
|
8
|
+
* - requires an AI provider plugin (ANTHROPIC_API_KEY / OPENAI_API_KEY)
|
|
9
|
+
* - wraps the response in the agent's character-prompt envelope
|
|
10
|
+
*
|
|
11
|
+
* None of that is appropriate for a billing GATEWAY whose job is to:
|
|
12
|
+
* 1. auth the caller via sk-ai-* API key
|
|
13
|
+
* 2. reserve credits against the wallet's spendable balance
|
|
14
|
+
* 3. forward the request VERBATIM to BILLING_LITELLM_BASE_URL
|
|
15
|
+
* 4. commit actual usage from the upstream response
|
|
16
|
+
*
|
|
17
|
+
* This file registers plugin routes that own /v1/messages and
|
|
18
|
+
* /v1/chat/completions BEFORE the chat-routes dispatcher in server.ts gets a
|
|
19
|
+
* chance to handle them (see server.ts BILLING_HOOK ordering change made in
|
|
20
|
+
* the same commit). The handler is a thin proxy: identical request body
|
|
21
|
+
* forwarded with the operator's LiteLLM API key, identical response body
|
|
22
|
+
* returned to the caller.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import type { Route, RouteRequest, RouteResponse, IAgentRuntime } from "@elizaos/core";
|
|
26
|
+
import type { IncomingMessage } from "node:http";
|
|
27
|
+
import { getBillingState, isBillingStateInitialized } from "../state.js";
|
|
28
|
+
import { applyBillingGate } from "../middleware/billing-gate.js";
|
|
29
|
+
import { computeActualCostUsd } from "@tokagentos/billing";
|
|
30
|
+
|
|
31
|
+
function billingUnavailable(res: RouteResponse): void {
|
|
32
|
+
res.status(503).json({ error: "Billing service unavailable." });
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Convert a plugin RouteRequest into the IncomingMessage shape that
|
|
37
|
+
* applyBillingGate / resolveBillingIdentity expect.
|
|
38
|
+
*
|
|
39
|
+
* applyBillingGate reads:
|
|
40
|
+
* - req.headers (for x-api-key + bearer + content-type)
|
|
41
|
+
* - req.socket?.remoteAddress (rate limiting)
|
|
42
|
+
*
|
|
43
|
+
* Plugin RouteRequest already gives us headers; we provide a stub socket.
|
|
44
|
+
*/
|
|
45
|
+
function toIncomingMessage(req: RouteRequest): IncomingMessage {
|
|
46
|
+
return {
|
|
47
|
+
headers: req.headers ?? {},
|
|
48
|
+
socket: { remoteAddress: undefined },
|
|
49
|
+
} as unknown as IncomingMessage;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Pick the headers we forward upstream. NOT the caller's Authorization /
|
|
54
|
+
* x-api-key — those are OUR auth tokens, not LiteLLM's. We attach the
|
|
55
|
+
* operator's LiteLLM API key downstream.
|
|
56
|
+
*/
|
|
57
|
+
function pickUpstreamHeaders(
|
|
58
|
+
req: RouteRequest,
|
|
59
|
+
litellmApiKey: string | undefined,
|
|
60
|
+
): Record<string, string> {
|
|
61
|
+
const out: Record<string, string> = {
|
|
62
|
+
"Content-Type": "application/json",
|
|
63
|
+
};
|
|
64
|
+
if (litellmApiKey) {
|
|
65
|
+
// LiteLLM accepts both shapes — pick Bearer for OpenAI-style upstreams
|
|
66
|
+
// and x-api-key for Anthropic-style. Setting both is harmless.
|
|
67
|
+
out["Authorization"] = `Bearer ${litellmApiKey}`;
|
|
68
|
+
out["x-api-key"] = litellmApiKey;
|
|
69
|
+
}
|
|
70
|
+
const h = (req.headers ?? {}) as Record<string, string | string[] | undefined>;
|
|
71
|
+
const passthrough = ["anthropic-version", "anthropic-beta", "openai-organization"];
|
|
72
|
+
for (const name of passthrough) {
|
|
73
|
+
const v = h[name.toLowerCase()];
|
|
74
|
+
if (typeof v === "string") out[name] = v;
|
|
75
|
+
else if (Array.isArray(v) && typeof v[0] === "string") out[name] = v[0];
|
|
76
|
+
}
|
|
77
|
+
return out;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Shared proxy handler for /v1/messages and /v1/chat/completions.
|
|
82
|
+
*
|
|
83
|
+
* Flow:
|
|
84
|
+
* 1. applyBillingGate(req, body) — auth + reserve. Returns 401 on bad auth,
|
|
85
|
+
* 402 on insufficient balance, 400 on unsupported model.
|
|
86
|
+
* 2. fetch(`${litellmBaseUrl}${path}`, ...) — forward verbatim.
|
|
87
|
+
* 3. Parse usage from response, computeActualCostUsd, gate.commit(actual).
|
|
88
|
+
* 4. Write the upstream response body back to the caller.
|
|
89
|
+
*
|
|
90
|
+
* Failure modes:
|
|
91
|
+
* - Network error reaching LiteLLM → gate.release({ outcome: "upstream_error" }),
|
|
92
|
+
* return 502.
|
|
93
|
+
* - Upstream returned non-2xx → still call gate.release (no charge), pass
|
|
94
|
+
* the error body through with the upstream status.
|
|
95
|
+
* - Streaming requests (stream: true) → not yet supported; return 501.
|
|
96
|
+
*/
|
|
97
|
+
async function proxyToLiteLLM(
|
|
98
|
+
req: RouteRequest,
|
|
99
|
+
res: RouteResponse,
|
|
100
|
+
upstreamPath: string,
|
|
101
|
+
): Promise<void> {
|
|
102
|
+
if (!isBillingStateInitialized()) return billingUnavailable(res);
|
|
103
|
+
const state = getBillingState();
|
|
104
|
+
const config = state.config;
|
|
105
|
+
if (!config.enabled) return billingUnavailable(res);
|
|
106
|
+
|
|
107
|
+
const body = req.body as Record<string, unknown> | undefined;
|
|
108
|
+
if (!body || typeof body !== "object") {
|
|
109
|
+
res.status(400).json({
|
|
110
|
+
error: { type: "invalid_request_error", message: "JSON body required" },
|
|
111
|
+
});
|
|
112
|
+
return;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Streaming requires duplex passthrough — out of scope for this proxy
|
|
116
|
+
// until we wire up SSE forwarding. Reject loudly so clients don't hang.
|
|
117
|
+
if ((body as Record<string, unknown>).stream === true) {
|
|
118
|
+
res.status(501).json({
|
|
119
|
+
error: {
|
|
120
|
+
type: "not_implemented",
|
|
121
|
+
message:
|
|
122
|
+
"Streaming responses are not yet supported by this billing proxy. " +
|
|
123
|
+
"Set `stream: false` and retry.",
|
|
124
|
+
},
|
|
125
|
+
});
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ---- Auth + reserve ----
|
|
130
|
+
const incoming = toIncomingMessage(req);
|
|
131
|
+
const gate = await applyBillingGate(incoming, body);
|
|
132
|
+
if (!gate.allow) {
|
|
133
|
+
res.status(gate.status).json(gate.body ?? { error: "billing_error" });
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// ---- Forward upstream ----
|
|
138
|
+
const litellmBaseUrl = (config as { litellmBaseUrl?: string }).litellmBaseUrl;
|
|
139
|
+
const litellmApiKey = (config as { litellmApiKey?: string }).litellmApiKey;
|
|
140
|
+
if (!litellmBaseUrl) {
|
|
141
|
+
await gate.release?.("upstream_error");
|
|
142
|
+
res.status(503).json({
|
|
143
|
+
error: {
|
|
144
|
+
type: "service_unavailable",
|
|
145
|
+
message:
|
|
146
|
+
"BILLING_LITELLM_BASE_URL is not configured — operator must set it.",
|
|
147
|
+
},
|
|
148
|
+
});
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const upstreamUrl = `${litellmBaseUrl.replace(/\/$/, "")}${upstreamPath}`;
|
|
153
|
+
const upstreamHeaders = pickUpstreamHeaders(req, litellmApiKey);
|
|
154
|
+
|
|
155
|
+
let upstreamRes: Response;
|
|
156
|
+
try {
|
|
157
|
+
upstreamRes = await fetch(upstreamUrl, {
|
|
158
|
+
method: "POST",
|
|
159
|
+
headers: upstreamHeaders,
|
|
160
|
+
body: JSON.stringify(body),
|
|
161
|
+
});
|
|
162
|
+
} catch (err) {
|
|
163
|
+
await gate.release?.("released_error");
|
|
164
|
+
const msg = err instanceof Error ? err.message : "fetch failed";
|
|
165
|
+
res.status(502).json({
|
|
166
|
+
error: {
|
|
167
|
+
type: "upstream_error",
|
|
168
|
+
message: `LiteLLM proxy failed: ${msg}`,
|
|
169
|
+
},
|
|
170
|
+
});
|
|
171
|
+
return;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Parse the JSON body once — we both relay it to the client AND extract
|
|
175
|
+
// usage for billing commit.
|
|
176
|
+
const upstreamText = await upstreamRes.text();
|
|
177
|
+
let upstreamBody: unknown;
|
|
178
|
+
try {
|
|
179
|
+
upstreamBody = upstreamText.length > 0 ? JSON.parse(upstreamText) : {};
|
|
180
|
+
} catch {
|
|
181
|
+
upstreamBody = { error: { type: "upstream_error", message: upstreamText.slice(0, 500) } };
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (!upstreamRes.ok) {
|
|
185
|
+
// Upstream rejected. Release the reservation — no charge — and pass the
|
|
186
|
+
// error through with the upstream status.
|
|
187
|
+
await gate.release?.("released_error");
|
|
188
|
+
res.status(upstreamRes.status).json(upstreamBody);
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ---- Commit actual usage ----
|
|
193
|
+
// LiteLLM/Anthropic responses include a `usage` block — fields match
|
|
194
|
+
// the `ClaudeUsage` shape (input_tokens, output_tokens, cache_*).
|
|
195
|
+
const usageRaw =
|
|
196
|
+
(upstreamBody as Record<string, unknown> | null)?.["usage"];
|
|
197
|
+
const usage =
|
|
198
|
+
usageRaw && typeof usageRaw === "object"
|
|
199
|
+
? (usageRaw as Record<string, number>)
|
|
200
|
+
: {};
|
|
201
|
+
const model =
|
|
202
|
+
typeof (body as Record<string, unknown>)["model"] === "string"
|
|
203
|
+
? ((body as Record<string, unknown>)["model"] as string)
|
|
204
|
+
: "unknown";
|
|
205
|
+
|
|
206
|
+
let actualUsd = 0;
|
|
207
|
+
try {
|
|
208
|
+
actualUsd = computeActualCostUsd({ model, usage });
|
|
209
|
+
} catch {
|
|
210
|
+
// Pricing lookup failed (unknown model) → commit zero and let the
|
|
211
|
+
// operator reconcile from logs. Caller still gets their response.
|
|
212
|
+
actualUsd = 0;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
try {
|
|
216
|
+
// Both Anthropic-style (input_tokens/output_tokens) and OpenAI-style
|
|
217
|
+
// (prompt_tokens/completion_tokens) — prefer Anthropic shape, fall back.
|
|
218
|
+
const inputTokens = Number(
|
|
219
|
+
usage["input_tokens"] ?? usage["prompt_tokens"] ?? 0,
|
|
220
|
+
);
|
|
221
|
+
const outputTokens = Number(
|
|
222
|
+
usage["output_tokens"] ?? usage["completion_tokens"] ?? 0,
|
|
223
|
+
);
|
|
224
|
+
const cacheRead = Number(usage["cache_read_input_tokens"] ?? 0);
|
|
225
|
+
const cacheCreate = Number(usage["cache_creation_input_tokens"] ?? 0);
|
|
226
|
+
await gate.commit?.(actualUsd, {
|
|
227
|
+
model,
|
|
228
|
+
inputTokens,
|
|
229
|
+
outputTokens,
|
|
230
|
+
cacheInputTokens: cacheRead || undefined,
|
|
231
|
+
cacheCreationTokens: cacheCreate || undefined,
|
|
232
|
+
status: "ok",
|
|
233
|
+
});
|
|
234
|
+
} catch {
|
|
235
|
+
// Commit failure is non-fatal for the caller — the user got their
|
|
236
|
+
// response. The operator's audit log will catch the inconsistency.
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// ---- Relay response ----
|
|
240
|
+
res.status(upstreamRes.status).json(upstreamBody as object);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// ---------------------------------------------------------------------------
|
|
244
|
+
// Route definitions — registered first in index.ts so they run before
|
|
245
|
+
// elizaOS's chat handler.
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
async function handleMessages(
|
|
249
|
+
req: RouteRequest,
|
|
250
|
+
res: RouteResponse,
|
|
251
|
+
_runtime: IAgentRuntime,
|
|
252
|
+
): Promise<void> {
|
|
253
|
+
return proxyToLiteLLM(req, res, "/v1/messages");
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
async function handleChatCompletions(
|
|
257
|
+
req: RouteRequest,
|
|
258
|
+
res: RouteResponse,
|
|
259
|
+
_runtime: IAgentRuntime,
|
|
260
|
+
): Promise<void> {
|
|
261
|
+
return proxyToLiteLLM(req, res, "/v1/chat/completions");
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
export const messagesProxyRoutes: Route[] = [
|
|
265
|
+
{
|
|
266
|
+
type: "POST",
|
|
267
|
+
path: "/v1/messages",
|
|
268
|
+
rawPath: true,
|
|
269
|
+
public: true,
|
|
270
|
+
name: "billing-messages-proxy",
|
|
271
|
+
handler: handleMessages,
|
|
272
|
+
},
|
|
273
|
+
{
|
|
274
|
+
type: "POST",
|
|
275
|
+
path: "/v1/chat/completions",
|
|
276
|
+
rawPath: true,
|
|
277
|
+
public: true,
|
|
278
|
+
name: "billing-chat-completions-proxy",
|
|
279
|
+
handler: handleChatCompletions,
|
|
280
|
+
},
|
|
281
|
+
];
|
|
282
|
+
|
|
283
|
+
export function getMessagesProxyRoutes(mode: "server" | "client"): Route[] {
|
|
284
|
+
// Client-mode forwards everything through TOKAGENT_GATEWAY_URL — the
|
|
285
|
+
// upstream gateway already owns /v1/messages. Don't register here.
|
|
286
|
+
if (mode === "client") return [];
|
|
287
|
+
return messagesProxyRoutes;
|
|
288
|
+
}
|