npm - @tokagent/tokagentos - Versions diffs - 2.0.24 → 2.0.30 - Mend

@tokagent/tokagentos 2.0.24 → 2.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/templates/fullstack-app/plugins/plugin-tokagent-billing/src/routes/keys-routes.ts CHANGED Viewed

@@ -13,6 +13,9 @@
 import type { Route, RouteRequest, RouteResponse, IAgentRuntime } from "@elizaos/core";
 import type { IncomingMessage } from "node:http";
+import fs from "node:fs/promises";
+import path from "node:path";
+import process from "node:process";
 import {
   mintApiKey,
   listApiKeys,
@@ -184,6 +187,149 @@ async function handleRevokeKey(
   }
 }
+// ---------------------------------------------------------------------------
+// POST /v1/keys/install — write BILLING_CHAT_KEY to project .env
+// ---------------------------------------------------------------------------
+//
+// LOCAL ONLY. This endpoint runs on the user's local agent (whether it's
+// configured as billing client or billing server) and:
+//   1. Validates the request body has a syntactically valid `sk-ai-*` key
+//   2. Atomically upserts `BILLING_CHAT_KEY=<key>` into `<cwd>/.env`
+//      (preserving all other entries; existing commented `# BILLING_CHAT_KEY`
+//      lines are replaced in place rather than duplicated)
+//   3. Mirrors the new value into process.env immediately so in-flight
+//      chat calls pick it up without waiting for the restart
+//
+// The restart itself is DELEGATED to the existing `POST /api/restart`
+// endpoint — the dashboard calls this install endpoint first, and then
+// the restart endpoint second. Splitting them avoids duplicating restart
+// strategy logic across runners (dev-ui in-process bounce, prod CLI
+// supervisor catching exit 75, etc.) and keeps this route a pure
+// "write the file" operation.
+//
+// AUTH: requires the same authenticated identity as the rest of /v1/keys/*
+// (SIWE session OR existing API key). Format-validates `sk-ai-...` but does
+// not verify the key was minted by this user — anyone with shell access to
+// the user's machine could already edit .env directly, so the auth check
+// is meant to guard against trivial CSRF, not a malicious LAN attacker.
+const SK_AI_KEY_RE = /^sk-ai-[A-Za-z0-9_-]{16,128}$/;
+async function readIfExists(filePath: string): Promise<string | null> {
+  try {
+    return await fs.readFile(filePath, "utf8");
+  } catch (err) {
+    if ((err as NodeJS.ErrnoException).code === "ENOENT") return null;
+    throw err;
+  }
+}
+/**
+ * Atomically upsert `KEY=VALUE` in a project-root .env file.
+ *
+ * - If the key already exists on a line (commented or not), replace that
+ *   line with the new uncommented `KEY=VALUE`.
+ * - Otherwise append `KEY=VALUE` to the end (with one preceding blank line
+ *   if the file ends with non-empty content).
+ *
+ * Atomicity: write to `<filePath>.tmp` then rename. The rename is atomic
+ * on POSIX. We do NOT keep a `.bak` for the project .env because users
+ * version-control their .env templates separately and the .env itself is
+ * gitignored — a `.bak` would just be visual noise.
+ *
+ * Values are written verbatim (no quoting). sk-ai-* keys are URL-safe
+ * base64 (`/^sk-ai-[A-Za-z0-9_-]+$/`) so they never need quoting; callers
+ * MUST validate before invoking this function.
+ */
+async function upsertDotenvLine(
+  filePath: string,
+  key: string,
+  value: string,
+): Promise<void> {
+  const existing = (await readIfExists(filePath)) ?? "";
+  const lines = existing.length === 0 ? [] : existing.split(/\r?\n/);
+  // dotenv-style split leaves a trailing empty element for files ending in
+  // newline. Strip it so we can manage trailing newlines explicitly.
+  if (lines.length > 0 && lines[lines.length - 1] === "") {
+    lines.pop();
+  }
+  const re = new RegExp(`^\\s*#?\\s*${key.replace(/[-/\\^$*+?.()|[\]{}]/g, "\\$&")}\\s*=`);
+  let updatedAt = -1;
+  for (let i = lines.length - 1; i >= 0; i -= 1) {
+    if (re.test(lines[i] ?? "")) {
+      lines[i] = `${key}=${value}`;
+      updatedAt = i;
+      break;
+    }
+  }
+  if (updatedAt < 0) {
+    if (lines.length > 0 && (lines[lines.length - 1] ?? "").trim() !== "") {
+      lines.push("");
+    }
+    lines.push(`${key}=${value}`);
+  }
+  const nextContents = `${lines.join("\n")}\n`;
+  const tmp = `${filePath}.tmp`;
+  await fs.mkdir(path.dirname(filePath), { recursive: true });
+  const handle = await fs.open(tmp, "w", 0o600);
+  try {
+    await handle.writeFile(nextContents, "utf8");
+    await handle.sync();
+  } finally {
+    await handle.close();
+  }
+  await fs.rename(tmp, filePath);
+}
+async function handleInstallKey(
+  req: RouteRequest,
+  res: RouteResponse,
+  _runtime: IAgentRuntime,
+): Promise<void> {
+  // Auth check. In client-mode, `resolveBillingIdentity` returns null (no
+  // local DB / authSecret) — but the local user is the one running the
+  // server, and we serve this from localhost only, so we accept the request
+  // unconditionally in client-mode as long as the body is well-formed.
+  // In server-mode, require a valid identity.
+  const identity = await resolveBillingIdentity(toIncomingMessage(req));
+  const billingState = getBillingState();
+  const isClientMode = billingState.config.billingMode === "client";
+  if (!identity && !isClientMode) {
+    res.status(401).json({ error: "Authentication required." });
+    return;
+  }
+  const body = req.body as Record<string, unknown> | undefined;
+  const key = typeof body?.["key"] === "string" ? body["key"].trim() : "";
+  if (!SK_AI_KEY_RE.test(key)) {
+    res.status(400).json({
+      error: "Invalid key format — expected sk-ai-... (16+ url-safe chars).",
+    });
+    return;
+  }
+  const envPath = path.join(process.cwd(), ".env");
+  try {
+    await upsertDotenvLine(envPath, "BILLING_CHAT_KEY", key);
+  } catch (err) {
+    const message = err instanceof Error ? err.message : String(err);
+    res.status(500).json({ error: `Failed to write .env: ${message}` });
+    return;
+  }
+  // Update in-flight env so subsequent chat calls work even before restart.
+  // configureBillingChatMirror() at startup mirrors BILLING_CHAT_KEY → OPENAI_API_KEY,
+  // but the OpenAI plugin may cache its key at init — restart is still the safe
+  // path. The dashboard calls POST /api/restart after this returns 200.
+  process.env["BILLING_CHAT_KEY"] = key;
+  process.env["OPENAI_API_KEY"] = key;
+  res.status(200).json({
+    ok: true,
+    envPath,
+    message: "Key saved to .env. Call POST /api/restart to apply.",
+  });
+}
 // ---------------------------------------------------------------------------
 // Route definitions
 // ---------------------------------------------------------------------------
@@ -197,6 +343,18 @@ export const keysRoutes: Route[] = [
     name: "billing-keys-mint",
     handler: handleMintKey,
   },
+  // MUST be registered BEFORE the /v1/keys/:id DELETE route in the array
+  // (routes are matched in registration order on rawPath: true with
+  // params). `install` is a string literal that could otherwise match the
+  // `:id` param and route the install POST through revoke handling.
+  {
+    type: "POST",
+    path: "/v1/keys/install",
+    rawPath: true,
+    public: true,
+    name: "billing-keys-install",
+    handler: handleInstallKey,
+  },
   {
     type: "GET",
     path: "/v1/keys",
@@ -235,6 +393,18 @@ function clientKeysRoutes(): Route[] {
         );
       },
     },
+    // /v1/keys/install is LOCAL on both modes (writes the local agent's
+    // own .env), so it uses the same direct handler as server-mode.
+    // Must precede /v1/keys/:id in this array for the same param-matching
+    // reason explained on the server-mode array.
+    {
+      type: "POST",
+      path: "/v1/keys/install",
+      rawPath: true,
+      public: true,
+      name: "billing-keys-install",
+      handler: handleInstallKey,
+    },
     {
       type: "GET",
       path: "/v1/keys",

package/templates/fullstack-app/plugins/plugin-tokagent-billing/src/routes/messages-proxy-routes.ts CHANGED Viewed

@@ -26,12 +26,115 @@ import type { Route, RouteRequest, RouteResponse, IAgentRuntime } from "@elizaos
 import type { IncomingMessage } from "node:http";
 import { getBillingState, isBillingStateInitialized } from "../state.js";
 import { applyBillingGate } from "../middleware/billing-gate.js";
-import { computeActualCostUsd } from "@tokagentos/billing";
+import {
+  computeActualCostUsd,
+  estimateInputTokens,
+} from "@tokagentos/billing";
 function billingUnavailable(res: RouteResponse): void {
   res.status(503).json({ error: "Billing service unavailable." });
 }
+// ---------------------------------------------------------------------------
+// Anthropic prompt-cache auto-injection
+// ---------------------------------------------------------------------------
+/**
+ * Anthropic's minimum cacheable prefix is 1024 tokens for Sonnet/Opus and
+ * 2048 for Haiku. Below that the cache_control marker is a no-op. Use the
+ * stricter bound so the optimization always pays off when we add it.
+ */
+const MIN_CACHEABLE_PREFIX_TOKENS = 2048;
+/** Returns true if any node anywhere in `value` has a `cache_control` key. */
+function hasCacheControlDeep(value: unknown): boolean {
+  if (!value || typeof value !== "object") return false;
+  if (Array.isArray(value)) return value.some(hasCacheControlDeep);
+  const obj = value as Record<string, unknown>;
+  if ("cache_control" in obj) return true;
+  for (const k of Object.keys(obj)) {
+    if (hasCacheControlDeep(obj[k])) return true;
+  }
+  return false;
+}
+/**
+ * Auto-inject Anthropic prompt-cache markers on stable parts of the request.
+ *
+ * The billing engine already supports cache pricing end-to-end (see
+ * pricing/rates.ts cacheRead/cacheWrite columns and computeActualCostUsd),
+ * but most anthropic-sdk callers never set cache_control themselves. Without
+ * markers, Anthropic re-reads the full system + tools prefix on every turn
+ * at base input rate. With markers, the prefix is served from cache at ~10×
+ * cheaper after the first call within 5 minutes.
+ *
+ * What we touch:
+ *   - `system`: normalised to array form, marker on the LAST text block
+ *   - `tools`: marker on the LAST tool definition (Anthropic caches the
+ *     entire prefix up to and including the marker, so this also covers
+ *     `system`)
+ *
+ * What we DON'T touch:
+ *   - Non-Claude models — other providers ignore or reject the field; their
+ *     caching is implicit.
+ *   - Bodies that already have ANY cache_control set — respect client intent.
+ *   - Bodies whose stable prefix is below Anthropic's minimum cacheable size.
+ *
+ * Returns a new body when injection happens; the same reference otherwise.
+ * Never mutates the input.
+ */
+function maybeInjectAnthropicCache(
+  body: Record<string, unknown>,
+): Record<string, unknown> {
+  const model = body.model;
+  if (typeof model !== "string" || !model.startsWith("claude-")) return body;
+  if (hasCacheControlDeep(body)) return body;
+  const tools = Array.isArray(body.tools) ? body.tools : undefined;
+  const sys = body.system;
+  const prefixTokens = estimateInputTokens([], tools, sys);
+  if (prefixTokens < MIN_CACHEABLE_PREFIX_TOKENS) return body;
+  const next: Record<string, unknown> = { ...body };
+  if (typeof sys === "string" && sys.length > 0) {
+    next.system = [
+      { type: "text", text: sys, cache_control: { type: "ephemeral" } },
+    ];
+  } else if (Array.isArray(sys) && sys.length > 0) {
+    const cloned = sys.map((b) =>
+      b && typeof b === "object" ? { ...(b as Record<string, unknown>) } : b,
+    );
+    for (let i = cloned.length - 1; i >= 0; i--) {
+      const blk = cloned[i];
+      if (
+        blk &&
+        typeof blk === "object" &&
+        (blk as Record<string, unknown>).type === "text"
+      ) {
+        (cloned[i] as Record<string, unknown>).cache_control = {
+          type: "ephemeral",
+        };
+        break;
+      }
+    }
+    next.system = cloned;
+  }
+  if (tools && tools.length > 0) {
+    const clonedTools = tools.map((t) =>
+      t && typeof t === "object" ? { ...(t as Record<string, unknown>) } : t,
+    );
+    const last = clonedTools[clonedTools.length - 1];
+    if (last && typeof last === "object") {
+      (last as Record<string, unknown>).cache_control = { type: "ephemeral" };
+    }
+    next.tools = clonedTools;
+  }
+  return next;
+}
 /**
  * Convert a plugin RouteRequest into the IncomingMessage shape that
  * applyBillingGate / resolveBillingIdentity expect.
@@ -104,14 +207,22 @@ async function proxyToLiteLLM(
   const config = state.config;
   if (!config.enabled) return billingUnavailable(res);
-  const body = req.body as Record<string, unknown> | undefined;
-  if (!body || typeof body !== "object") {
+  const rawBody = req.body as Record<string, unknown> | undefined;
+  if (!rawBody || typeof rawBody !== "object") {
     res.status(400).json({
       error: { type: "invalid_request_error", message: "JSON body required" },
     });
     return;
   }
+  // Auto-inject Anthropic prompt-cache markers on stable parts of the
+  // request. Done BEFORE the billing gate so the reservation sees the
+  // markers (gate.detectCacheControl reads them to size at cacheWrite rate
+  // — slightly higher first-call reservation, dramatically lower steady
+  // state). No-op for non-Claude models or bodies where the caller already
+  // set cache_control. See maybeInjectAnthropicCache for the full policy.
+  const body = maybeInjectAnthropicCache(rawBody);
   // Detect streaming. plugin-openai (Vercel AI SDK) defaults to
   // stream:true and there's no way to disable from the agent's chat flow,
   // so we MUST support it. For non-stream we buffer the JSON response;

package/templates/fullstack-app/plugins/plugin-web-fetch/build.ts ADDED Viewed

@@ -0,0 +1,35 @@
+#!/usr/bin/env bun
+/**
+ * Build script for @tokagent/plugin-web-fetch. Produces ESM in dist/.
+ */
+import { existsSync, rmSync } from "node:fs";
+const watch = process.argv.includes("--watch");
+async function build() {
+  if (existsSync("dist")) rmSync("dist", { recursive: true });
+  await Bun.build({
+    entrypoints: ["./src/index.ts"],
+    outdir: "./dist",
+    target: "node",
+    format: "esm",
+    external: ["@elizaos/core"],
+    sourcemap: "external",
+  });
+  console.log("✓ build complete");
+}
+if (watch) {
+  await build();
+  const watcher = Bun.watch("./src", { recursive: true });
+  for await (const _ of watcher) {
+    console.log("[watch] rebuilding...");
+    try {
+      await build();
+    } catch (e) {
+      console.error(e);
+    }
+  }
+} else {
+  await build();
+}

package/templates/fullstack-app/plugins/plugin-web-fetch/package.json ADDED Viewed

@@ -0,0 +1,37 @@
+{
+  "name": "@tokagent/plugin-web-fetch",
+  "version": "0.1.0",
+  "description": "Web fetch + Tavily search actions for the agent. FETCH_URL (Node built-in fetch, no key) + WEB_SEARCH (Tavily-backed, requires TAVILY_API_KEY).",
+  "type": "module",
+  "private": true,
+  "main": "dist/index.js",
+  "module": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    },
+    "./package.json": "./package.json"
+  },
+  "files": ["dist", "README.md"],
+  "scripts": {
+    "build": "bun run build.ts",
+    "dev": "bun run build.ts --watch",
+    "typecheck": "tsc --noEmit"
+  },
+  "peerDependencies": {
+    "@elizaos/core": "workspace:*"
+  },
+  "agentConfig": {
+    "pluginType": "elizaos:plugin:1.0.0",
+    "pluginParameters": {
+      "TAVILY_API_KEY": {
+        "type": "string",
+        "description": "Tavily search API key — required for the WEB_SEARCH action. Get a free key (1,000 searches/month, no credit card) at https://app.tavily.com/sign-in. Without it the agent cannot fulfill 'search the web' requests; it will reply with a clear error pointing here. Saving this key persists to config.env and triggers a runtime restart so the action picks it up.",
+        "required": false,
+        "sensitive": true
+      }
+    }
+  }
+}