@tokagent/tokagentos 2.0.20 → 2.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scaffold-patches/packages/agent/src/runtime/core-plugins.ts +17 -1
- package/templates/fullstack-app/plugins/plugin-tokagent-billing/package.json +1 -1
- package/templates/fullstack-app/plugins/plugin-tokagent-billing/src/routes/messages-proxy-routes.ts +65 -0
- package/templates-manifest.json +1 -1
package/package.json
CHANGED
|
@@ -115,8 +115,24 @@ function configureBillingChatMirror(): void {
|
|
|
115
115
|
.replace(/\/v1$/, "");
|
|
116
116
|
process.env.OPENAI_API_KEY = process.env.BILLING_CHAT_KEY!.trim();
|
|
117
117
|
process.env.OPENAI_BASE_URL = `${base}/v1`;
|
|
118
|
+
|
|
119
|
+
// Default model: glm-4.7 is on the Tokamak LiteLLM allowlist. Without
|
|
120
|
+
// this, plugin-openai falls back to its OpenAI default (gpt-4o-mini /
|
|
121
|
+
// gpt-4o), which the billing gateway rejects with
|
|
122
|
+
// "unsupported_model" — and the agent's retry loop turns a fast 400
|
|
123
|
+
// into a multi-minute "Something went wrong" UX. User can override
|
|
124
|
+
// with OPENAI_SMALL_MODEL / OPENAI_LARGE_MODEL in .env.
|
|
125
|
+
const TOKAMAK_DEFAULT_MODEL = "glm-4.7";
|
|
126
|
+
if (!process.env.OPENAI_SMALL_MODEL?.trim()) {
|
|
127
|
+
process.env.OPENAI_SMALL_MODEL = TOKAMAK_DEFAULT_MODEL;
|
|
128
|
+
}
|
|
129
|
+
if (!process.env.OPENAI_LARGE_MODEL?.trim()) {
|
|
130
|
+
process.env.OPENAI_LARGE_MODEL = TOKAMAK_DEFAULT_MODEL;
|
|
131
|
+
}
|
|
118
132
|
console.info(
|
|
119
|
-
"[tokagent] BILLING_CHAT_KEY + TOKAGENT_GATEWAY_URL detected — wired as OpenAI-compatible provider for chat
|
|
133
|
+
"[tokagent] BILLING_CHAT_KEY + TOKAGENT_GATEWAY_URL detected — wired as OpenAI-compatible provider for chat (model=" +
|
|
134
|
+
process.env.OPENAI_SMALL_MODEL +
|
|
135
|
+
").",
|
|
120
136
|
);
|
|
121
137
|
}
|
|
122
138
|
configureBillingChatMirror();
|
package/templates/fullstack-app/plugins/plugin-tokagent-billing/src/routes/messages-proxy-routes.ts
CHANGED
|
@@ -261,6 +261,63 @@ async function handleChatCompletions(
|
|
|
261
261
|
return proxyToLiteLLM(req, res, "/v1/chat/completions");
|
|
262
262
|
}
|
|
263
263
|
|
|
264
|
+
/**
|
|
265
|
+
* OpenAI-compatible model catalog. plugin-openai (and many OpenAI SDKs)
|
|
266
|
+
* call GET /v1/models on startup to validate the API key — if this returns
|
|
267
|
+
* 401/404, the plugin marks the provider unhealthy and the agent's chat
|
|
268
|
+
* composer never gets an active backend.
|
|
269
|
+
*
|
|
270
|
+
* We return a static list of the models the gateway actually supports
|
|
271
|
+
* (currently glm-4.7 on Tokamak's LiteLLM). Two reasons static beats
|
|
272
|
+
* proxying upstream:
|
|
273
|
+
* 1. Tokamak's LiteLLM /v1/models requires the operator's key, not the
|
|
274
|
+
* user's sk-ai-* — proxying would either expose the operator key or
|
|
275
|
+
* require a separate auth path. Static avoids the leak.
|
|
276
|
+
* 2. The billing layer's allowlist is the source of truth for "what
|
|
277
|
+
* models a billing client can use"; the upstream catalog is the
|
|
278
|
+
* operator's concern. Decoupling them lets us add/remove allowlisted
|
|
279
|
+
* models without redeploying the upstream.
|
|
280
|
+
*
|
|
281
|
+
* Auth: still gated by applyBillingGate so only authenticated clients see
|
|
282
|
+
* the list. Returns the same 401 envelope as the chat routes on bad auth.
|
|
283
|
+
*/
|
|
284
|
+
async function handleModels(
|
|
285
|
+
req: RouteRequest,
|
|
286
|
+
res: RouteResponse,
|
|
287
|
+
_runtime: IAgentRuntime,
|
|
288
|
+
): Promise<void> {
|
|
289
|
+
if (!isBillingStateInitialized()) return billingUnavailable(res);
|
|
290
|
+
const state = getBillingState();
|
|
291
|
+
if (!state.config.enabled) return billingUnavailable(res);
|
|
292
|
+
|
|
293
|
+
// Auth check — applyBillingGate is overkill here (no model/body to gate
|
|
294
|
+
// on) but using it keeps the auth-error envelope consistent across routes.
|
|
295
|
+
const incoming = toIncomingMessage(req);
|
|
296
|
+
const { resolveBillingIdentity } = await import(
|
|
297
|
+
"../middleware/api-key-resolve.js"
|
|
298
|
+
);
|
|
299
|
+
const identity = await resolveBillingIdentity(incoming);
|
|
300
|
+
if (!identity) {
|
|
301
|
+
res.status(401).json({
|
|
302
|
+
error: { type: "invalid_auth", message: "Authentication required." },
|
|
303
|
+
});
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const now = Math.floor(Date.now() / 1000);
|
|
308
|
+
res.status(200).json({
|
|
309
|
+
object: "list",
|
|
310
|
+
data: [
|
|
311
|
+
{
|
|
312
|
+
id: "glm-4.7",
|
|
313
|
+
object: "model",
|
|
314
|
+
created: now,
|
|
315
|
+
owned_by: "tokamak",
|
|
316
|
+
},
|
|
317
|
+
],
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
|
|
264
321
|
export const messagesProxyRoutes: Route[] = [
|
|
265
322
|
{
|
|
266
323
|
type: "POST",
|
|
@@ -278,6 +335,14 @@ export const messagesProxyRoutes: Route[] = [
|
|
|
278
335
|
name: "billing-chat-completions-proxy",
|
|
279
336
|
handler: handleChatCompletions,
|
|
280
337
|
},
|
|
338
|
+
{
|
|
339
|
+
type: "GET",
|
|
340
|
+
path: "/v1/models",
|
|
341
|
+
rawPath: true,
|
|
342
|
+
public: true,
|
|
343
|
+
name: "billing-models-catalog",
|
|
344
|
+
handler: handleModels,
|
|
345
|
+
},
|
|
281
346
|
];
|
|
282
347
|
|
|
283
348
|
export function getMessagesProxyRoutes(mode: "server" | "client"): Route[] {
|