@tokagent/tokagentos 2.0.14 → 2.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tokagent/tokagentos",
3
- "version": "2.0.14",
3
+ "version": "2.0.16",
4
4
  "description": "tokagentOS CLI - Create and upgrade tokagentOS project templates",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tokagent/plugin-tokagent-billing",
3
- "version": "2.0.7",
3
+ "version": "2.0.9",
4
4
  "description": "elizaOS plugin: Web3 credit-billing routes and middleware for the tokagentos LLM gateway.",
5
5
  "type": "module",
6
6
  "publishConfig": { "access": "public" },
@@ -17,6 +17,8 @@ import { getSetupRoutes } from "./routes/setup-routes.js";
17
17
  import { getSetupPanelRoutes } from "./routes/setup-panel-routes.js";
18
18
  // Operator dashboard SPA (migrated from llm-api-gateway)
19
19
  import { getDashboardRoutes } from "./routes/dashboard-routes.js";
20
+ // LiteLLM proxy for /v1/messages + /v1/chat/completions (server-mode only)
21
+ import { getMessagesProxyRoutes } from "./routes/messages-proxy-routes.js";
20
22
 
21
23
  /**
22
24
  * Detect the BILLING_MODE at module-load time. The Plugin.routes array is
@@ -118,6 +120,13 @@ export const tokagentBillingPlugin: Plugin = {
118
120
  ]
119
121
  : [],
120
122
  routes: [
123
+ // MUST be registered BEFORE other routes — these own /v1/messages and
124
+ // /v1/chat/completions in server-mode and run a pure LiteLLM proxy.
125
+ // Without these, elizaOS's chat-routes.ts dispatcher tries to handle
126
+ // /v1/messages as an agent chat (requires worlds DB + AI provider
127
+ // plugin) and fails with "tableName is required" on a billing-only
128
+ // deployment.
129
+ ...getMessagesProxyRoutes(BILLING_MODE),
121
130
  ...getAuthRoutes(BILLING_MODE),
122
131
  ...getKeysRoutes(BILLING_MODE),
123
132
  ...getCreditsRoutes(BILLING_MODE),
@@ -19,7 +19,7 @@ import {
19
19
  } from "../state.js";
20
20
  import { resolveBillingIdentity } from "../middleware/api-key-resolve.js";
21
21
  import { pickForward, forward, ensureClientReady } from "../lib/forward.js";
22
- import { creditState } from "@tokagentos/billing";
22
+ import { creditState, hydrate as hydrateCredits, readCredits } from "@tokagentos/billing";
23
23
  import { eq } from "drizzle-orm";
24
24
 
25
25
  // ---------------------------------------------------------------------------
@@ -63,7 +63,7 @@ async function handleGetCreditsMe(
63
63
  _runtime: IAgentRuntime,
64
64
  ): Promise<void> {
65
65
  if (!isBillingStateInitialized()) return billingUnavailable(res);
66
- const { db, config } = getServerBillingState();
66
+ const { db, config, clients } = getServerBillingState();
67
67
  if (!config.enabled) return billingUnavailable(res);
68
68
 
69
69
  const identity = await resolveBillingIdentity(toIncomingMessage(req));
@@ -75,6 +75,35 @@ async function handleGetCreditsMe(
75
75
  const wallet: Address = identity.wallet;
76
76
  const walletKey = wallet.toLowerCase();
77
77
 
78
+ // Sync on-chain credits → DB ledger BEFORE returning the balance.
79
+ //
80
+ // Why hydrate-on-read instead of a separate deposit watcher service:
81
+ // - depositX402 is now PERMISSIONLESS (any wallet can submit a user's
82
+ // signed EIP-3009 auth) — handleTopupSettle is only ONE possible
83
+ // submitter, so we can't rely on the settle path to credit the DB.
84
+ // - A dedicated event listener could miss events during agent
85
+ // downtime or RPC outages; hydrate-on-read self-heals at request
86
+ // time.
87
+ // - The vault's `credits[user]` mapping is the source of truth.
88
+ // hydrate() reconciles: balance = onChain - (reserved + accrued).
89
+ //
90
+ // Costs: one eth_call per dashboard refresh. Acceptable — this route is
91
+ // not on the hot inference path.
92
+ //
93
+ // Failure handling: if the RPC call throws, fall back to the stale DB
94
+ // row rather than 500ing. The user sees a slightly old balance instead
95
+ // of a broken page.
96
+ try {
97
+ const onChainCredits = await readCredits(
98
+ clients,
99
+ config.vaultAddress,
100
+ wallet,
101
+ );
102
+ await hydrateCredits(db, wallet, onChainCredits);
103
+ } catch (_err) {
104
+ // Swallow — fall back to whatever the DB has. Logged at hydrate level.
105
+ }
106
+
78
107
  // Read the credit state row (may not exist for a new wallet).
79
108
  const rows = await db
80
109
  .select()
@@ -0,0 +1,288 @@
1
+ /**
2
+ * Pure-proxy /v1/messages + /v1/chat/completions handlers.
3
+ *
4
+ * Why this exists: the elizaOS upstream that the billing server is built on
5
+ * routes /v1/messages and /v1/chat/completions through its agent chat
6
+ * handler (handleChatRoutes), which:
7
+ * - requires a fully-seeded worlds/messages DB
8
+ * - requires an AI provider plugin (ANTHROPIC_API_KEY / OPENAI_API_KEY)
9
+ * - wraps the response in the agent's character-prompt envelope
10
+ *
11
+ * None of that is appropriate for a billing GATEWAY whose job is to:
12
+ * 1. auth the caller via sk-ai-* API key
13
+ * 2. reserve credits against the wallet's spendable balance
14
+ * 3. forward the request VERBATIM to BILLING_LITELLM_BASE_URL
15
+ * 4. commit actual usage from the upstream response
16
+ *
17
+ * This file registers plugin routes that own /v1/messages and
18
+ * /v1/chat/completions BEFORE the chat-routes dispatcher in server.ts gets a
19
+ * chance to handle them (see server.ts BILLING_HOOK ordering change made in
20
+ * the same commit). The handler is a thin proxy: identical request body
21
+ * forwarded with the operator's LiteLLM API key, identical response body
22
+ * returned to the caller.
23
+ */
24
+
25
+ import type { Route, RouteRequest, RouteResponse, IAgentRuntime } from "@elizaos/core";
26
+ import type { IncomingMessage } from "node:http";
27
+ import { getBillingState, isBillingStateInitialized } from "../state.js";
28
+ import { applyBillingGate } from "../middleware/billing-gate.js";
29
+ import { computeActualCostUsd } from "@tokagentos/billing";
30
+
31
+ function billingUnavailable(res: RouteResponse): void {
32
+ res.status(503).json({ error: "Billing service unavailable." });
33
+ }
34
+
35
+ /**
36
+ * Convert a plugin RouteRequest into the IncomingMessage shape that
37
+ * applyBillingGate / resolveBillingIdentity expect.
38
+ *
39
+ * applyBillingGate reads:
40
+ * - req.headers (for x-api-key + bearer + content-type)
41
+ * - req.socket?.remoteAddress (rate limiting)
42
+ *
43
+ * Plugin RouteRequest already gives us headers; we provide a stub socket.
44
+ */
45
+ function toIncomingMessage(req: RouteRequest): IncomingMessage {
46
+ return {
47
+ headers: req.headers ?? {},
48
+ socket: { remoteAddress: undefined },
49
+ } as unknown as IncomingMessage;
50
+ }
51
+
52
+ /**
53
+ * Pick the headers we forward upstream. NOT the caller's Authorization /
54
+ * x-api-key — those are OUR auth tokens, not LiteLLM's. We attach the
55
+ * operator's LiteLLM API key downstream.
56
+ */
57
+ function pickUpstreamHeaders(
58
+ req: RouteRequest,
59
+ litellmApiKey: string | undefined,
60
+ ): Record<string, string> {
61
+ const out: Record<string, string> = {
62
+ "Content-Type": "application/json",
63
+ };
64
+ if (litellmApiKey) {
65
+ // LiteLLM accepts both shapes — pick Bearer for OpenAI-style upstreams
66
+ // and x-api-key for Anthropic-style. Setting both is harmless.
67
+ out["Authorization"] = `Bearer ${litellmApiKey}`;
68
+ out["x-api-key"] = litellmApiKey;
69
+ }
70
+ const h = (req.headers ?? {}) as Record<string, string | string[] | undefined>;
71
+ const passthrough = ["anthropic-version", "anthropic-beta", "openai-organization"];
72
+ for (const name of passthrough) {
73
+ const v = h[name.toLowerCase()];
74
+ if (typeof v === "string") out[name] = v;
75
+ else if (Array.isArray(v) && typeof v[0] === "string") out[name] = v[0];
76
+ }
77
+ return out;
78
+ }
79
+
80
+ /**
81
+ * Shared proxy handler for /v1/messages and /v1/chat/completions.
82
+ *
83
+ * Flow:
84
+ * 1. applyBillingGate(req, body) — auth + reserve. Returns 401 on bad auth,
85
+ * 402 on insufficient balance, 400 on unsupported model.
86
+ * 2. fetch(`${litellmBaseUrl}${path}`, ...) — forward verbatim.
87
+ * 3. Parse usage from response, computeActualCostUsd, gate.commit(actual).
88
+ * 4. Write the upstream response body back to the caller.
89
+ *
90
+ * Failure modes:
91
+ * - Network error reaching LiteLLM → gate.release({ outcome: "upstream_error" }),
92
+ * return 502.
93
+ * - Upstream returned non-2xx → still call gate.release (no charge), pass
94
+ * the error body through with the upstream status.
95
+ * - Streaming requests (stream: true) → not yet supported; return 501.
96
+ */
97
+ async function proxyToLiteLLM(
98
+ req: RouteRequest,
99
+ res: RouteResponse,
100
+ upstreamPath: string,
101
+ ): Promise<void> {
102
+ if (!isBillingStateInitialized()) return billingUnavailable(res);
103
+ const state = getBillingState();
104
+ const config = state.config;
105
+ if (!config.enabled) return billingUnavailable(res);
106
+
107
+ const body = req.body as Record<string, unknown> | undefined;
108
+ if (!body || typeof body !== "object") {
109
+ res.status(400).json({
110
+ error: { type: "invalid_request_error", message: "JSON body required" },
111
+ });
112
+ return;
113
+ }
114
+
115
+ // Streaming requires duplex passthrough — out of scope for this proxy
116
+ // until we wire up SSE forwarding. Reject loudly so clients don't hang.
117
+ if ((body as Record<string, unknown>).stream === true) {
118
+ res.status(501).json({
119
+ error: {
120
+ type: "not_implemented",
121
+ message:
122
+ "Streaming responses are not yet supported by this billing proxy. " +
123
+ "Set `stream: false` and retry.",
124
+ },
125
+ });
126
+ return;
127
+ }
128
+
129
+ // ---- Auth + reserve ----
130
+ const incoming = toIncomingMessage(req);
131
+ const gate = await applyBillingGate(incoming, body);
132
+ if (!gate.allow) {
133
+ res.status(gate.status).json(gate.body ?? { error: "billing_error" });
134
+ return;
135
+ }
136
+
137
+ // ---- Forward upstream ----
138
+ const litellmBaseUrl = (config as { litellmBaseUrl?: string }).litellmBaseUrl;
139
+ const litellmApiKey = (config as { litellmApiKey?: string }).litellmApiKey;
140
+ if (!litellmBaseUrl) {
141
+ await gate.release?.("upstream_error");
142
+ res.status(503).json({
143
+ error: {
144
+ type: "service_unavailable",
145
+ message:
146
+ "BILLING_LITELLM_BASE_URL is not configured — operator must set it.",
147
+ },
148
+ });
149
+ return;
150
+ }
151
+
152
+ const upstreamUrl = `${litellmBaseUrl.replace(/\/$/, "")}${upstreamPath}`;
153
+ const upstreamHeaders = pickUpstreamHeaders(req, litellmApiKey);
154
+
155
+ let upstreamRes: Response;
156
+ try {
157
+ upstreamRes = await fetch(upstreamUrl, {
158
+ method: "POST",
159
+ headers: upstreamHeaders,
160
+ body: JSON.stringify(body),
161
+ });
162
+ } catch (err) {
163
+ await gate.release?.("released_error");
164
+ const msg = err instanceof Error ? err.message : "fetch failed";
165
+ res.status(502).json({
166
+ error: {
167
+ type: "upstream_error",
168
+ message: `LiteLLM proxy failed: ${msg}`,
169
+ },
170
+ });
171
+ return;
172
+ }
173
+
174
+ // Parse the JSON body once — we both relay it to the client AND extract
175
+ // usage for billing commit.
176
+ const upstreamText = await upstreamRes.text();
177
+ let upstreamBody: unknown;
178
+ try {
179
+ upstreamBody = upstreamText.length > 0 ? JSON.parse(upstreamText) : {};
180
+ } catch {
181
+ upstreamBody = { error: { type: "upstream_error", message: upstreamText.slice(0, 500) } };
182
+ }
183
+
184
+ if (!upstreamRes.ok) {
185
+ // Upstream rejected. Release the reservation — no charge — and pass the
186
+ // error through with the upstream status.
187
+ await gate.release?.("released_error");
188
+ res.status(upstreamRes.status).json(upstreamBody);
189
+ return;
190
+ }
191
+
192
+ // ---- Commit actual usage ----
193
+ // LiteLLM/Anthropic responses include a `usage` block — fields match
194
+ // the `ClaudeUsage` shape (input_tokens, output_tokens, cache_*).
195
+ const usageRaw =
196
+ (upstreamBody as Record<string, unknown> | null)?.["usage"];
197
+ const usage =
198
+ usageRaw && typeof usageRaw === "object"
199
+ ? (usageRaw as Record<string, number>)
200
+ : {};
201
+ const model =
202
+ typeof (body as Record<string, unknown>)["model"] === "string"
203
+ ? ((body as Record<string, unknown>)["model"] as string)
204
+ : "unknown";
205
+
206
+ let actualUsd = 0;
207
+ try {
208
+ actualUsd = computeActualCostUsd({ model, usage });
209
+ } catch {
210
+ // Pricing lookup failed (unknown model) → commit zero and let the
211
+ // operator reconcile from logs. Caller still gets their response.
212
+ actualUsd = 0;
213
+ }
214
+
215
+ try {
216
+ // Both Anthropic-style (input_tokens/output_tokens) and OpenAI-style
217
+ // (prompt_tokens/completion_tokens) — prefer Anthropic shape, fall back.
218
+ const inputTokens = Number(
219
+ usage["input_tokens"] ?? usage["prompt_tokens"] ?? 0,
220
+ );
221
+ const outputTokens = Number(
222
+ usage["output_tokens"] ?? usage["completion_tokens"] ?? 0,
223
+ );
224
+ const cacheRead = Number(usage["cache_read_input_tokens"] ?? 0);
225
+ const cacheCreate = Number(usage["cache_creation_input_tokens"] ?? 0);
226
+ await gate.commit?.(actualUsd, {
227
+ model,
228
+ inputTokens,
229
+ outputTokens,
230
+ cacheInputTokens: cacheRead || undefined,
231
+ cacheCreationTokens: cacheCreate || undefined,
232
+ status: "ok",
233
+ });
234
+ } catch {
235
+ // Commit failure is non-fatal for the caller — the user got their
236
+ // response. The operator's audit log will catch the inconsistency.
237
+ }
238
+
239
+ // ---- Relay response ----
240
+ res.status(upstreamRes.status).json(upstreamBody as object);
241
+ }
242
+
243
+ // ---------------------------------------------------------------------------
244
+ // Route definitions — registered first in index.ts so they run before
245
+ // elizaOS's chat handler.
246
+ // ---------------------------------------------------------------------------
247
+
248
+ async function handleMessages(
249
+ req: RouteRequest,
250
+ res: RouteResponse,
251
+ _runtime: IAgentRuntime,
252
+ ): Promise<void> {
253
+ return proxyToLiteLLM(req, res, "/v1/messages");
254
+ }
255
+
256
+ async function handleChatCompletions(
257
+ req: RouteRequest,
258
+ res: RouteResponse,
259
+ _runtime: IAgentRuntime,
260
+ ): Promise<void> {
261
+ return proxyToLiteLLM(req, res, "/v1/chat/completions");
262
+ }
263
+
264
+ export const messagesProxyRoutes: Route[] = [
265
+ {
266
+ type: "POST",
267
+ path: "/v1/messages",
268
+ rawPath: true,
269
+ public: true,
270
+ name: "billing-messages-proxy",
271
+ handler: handleMessages,
272
+ },
273
+ {
274
+ type: "POST",
275
+ path: "/v1/chat/completions",
276
+ rawPath: true,
277
+ public: true,
278
+ name: "billing-chat-completions-proxy",
279
+ handler: handleChatCompletions,
280
+ },
281
+ ];
282
+
283
+ export function getMessagesProxyRoutes(mode: "server" | "client"): Route[] {
284
+ // Client-mode forwards everything through TOKAGENT_GATEWAY_URL — the
285
+ // upstream gateway already owns /v1/messages. Don't register here.
286
+ if (mode === "client") return [];
287
+ return messagesProxyRoutes;
288
+ }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "version": "1.0.0",
3
- "generatedAt": "2026-05-19T14:54:21.961Z",
3
+ "generatedAt": "2026-05-19T18:37:22.113Z",
4
4
  "repoUrl": "https://github.com/elizaos/eliza",
5
5
  "templates": [
6
6
  {