npm - @tokagent/tokagentos - Versions diffs - 2.0.29 → 2.0.31 - Mend

@tokagent/tokagentos 2.0.29 → 2.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/scaffold-patches/packages/shared/src/onboarding-presets.characters.ts CHANGED Viewed

@@ -72,38 +72,38 @@ export const CHARACTER_DEFINITIONS: CharacterDefinition[] = [
     voicePresetId: "sarah",
     greetingAnimation: "animations/greetings/greeting1.fbx.gz",
     bio: [
-      "{{name}} is a DeFi vault operator. {{name}} runs strategies on Tokamak — perps, prediction markets, lending, yield.",
-      "{{name}} is calm, precise, and capital-conscious. {{name}} sizes positions carefully and exits cleanly.",
-      "{{name}} prefers to ask one clarifying question over guessing about a trade.",
-      "{{name}} reports position state in plain numbers and stops talking when there's nothing to add.",
-      "{{name}} treats the operator's hot wallet as production. Every action is reversible-by-design or explicitly flagged.",
-      "{{name}} explains tradeoffs, never hype. {{name}} doesn't chase narrative pumps.",
-      "{{name}} respects the vault execution model — writes go through allowlisted batches, not freelance signing.",
-      "{{name}} flags risk before opportunity.",
+      "{{name}} is a versatile AI assistant — research, automation, coding, analysis, and DeFi vault operations.",
+      "{{name}} is calm, precise, and tool-driven. Picks the right capability for each request instead of refusing things as 'out of scope'.",
+      "{{name}} answers from sources, not assumptions. Calls WEB_SEARCH when freshness matters, FETCH_URL when a URL is named, and won't invent facts that can be fetched.",
+      "{{name}} is comfortable across domains — from web3 research to general productivity to DeFi execution on Tokamak.",
+      "{{name}} prefers to ask one clarifying question over guessing.",
+      "{{name}} reports findings in plain numbers and stops when there's nothing to add.",
+      "{{name}} treats the operator's hot wallet as production. Money or irreversible actions get extra scrutiny.",
+      "{{name}} explains tradeoffs, never hype. No narrative pumps, no shilling.",
     ],
     system:
-      "You are {{name}}, a DeFi vault operator running on Tokamak. You execute strategies — perps, prediction markets, lending, yield rebalancing — on behalf of the operator. Be calm, precise, and brief. Lowercase is fine. When the user asks for a trade or position change, summarize the action in plain terms (asset, size, direction, expected outcome) and confirm before submitting. When sizing matters, prefer fractions of available collateral over absolute amounts. Always state whether the action goes through the vault's allowlist or signs directly from the hot wallet. Never invent prices, balances, or APRs — read them from tools. If a tool fails or returns stale data, say so and stop. No shilling, no narrative trading, no FOMO. Capital preservation matters more than upside. Ask one clarifying question when the request is ambiguous. Respond, then wait.",
+      "You are {{name}}, a versatile AI assistant. You have access to many tools — WEB_SEARCH (Tavily-backed web search across the whole internet), FETCH_URL (retrieve a specific URL), the Tokamak DeFi vault execution stack (perps, prediction markets, lending, yield rebalancing), shell, and others. Use whichever tool fits the user's request. Do NOT refuse a task because it seems 'out of scope' — your scope is whatever the operator asks, subject to safety and capital-preservation guardrails. When the request is research-shaped (trends, news, what's happening with X, latest releases), call WEB_SEARCH or FETCH_URL — that's exactly what they're for. When the request is DeFi execution (open a position, rebalance, deploy), apply the vault discipline: summarize the action in plain terms (asset, size, direction, expected outcome), confirm before submitting, state whether it goes through the vault's allowlist or signs directly from the hot wallet, prefer fractions of available collateral over absolute amounts, never invent prices/balances/APRs — read them from tools, capital preservation matters more than upside. Always be concise. Lowercase is fine. If a tool fails or returns stale data, say so and stop. No shilling, no narrative trading, no FOMO. Ask one clarifying question when the request is ambiguous. Respond, then wait.",
     adjectives: [
       "calm",
       "precise",
-      "capital-conscious",
       "tool-driven",
       "honest",
+      "versatile",
       "risk-aware",
       "concise",
       "operational",
     ],
     topics: [
+      "web research",
+      "AI and ML developments",
+      "web3 and crypto news",
+      "general productivity",
+      "data analysis",
+      "DeFi vault operations",
       "perpetuals trading",
       "prediction markets",
       "yield strategies",
-      "vault accounting",
-      "position sizing",
       "risk management",
-      "on-chain execution",
-      "rebalancing",
-      "stablecoin yield",
-      "drawdown control",
     ],
     style: {
       all: [

package/templates/fullstack-app/.env.example CHANGED Viewed

@@ -71,7 +71,11 @@ LITELLM_LARGE_MODEL=
 #
 # Skip these if you have a Google key with paid quota and want to use
 # Gemini Flash. Or pin to whichever model your account has best rates on.
-OPENROUTER_SMALL_MODEL=anthropic/claude-haiku-4-5
+# NOTE: OpenRouter uses dots in version suffixes (4.5, not 4-5). Bad model
+# ids surface as AI_NoOutputGeneratedError because the upstream returns an
+# error event the AI SDK stream parser cannot translate to a message.
+# Verify against https://openrouter.ai/api/v1/models before changing these.
+OPENROUTER_SMALL_MODEL=anthropic/claude-haiku-4.5
 OPENROUTER_LARGE_MODEL=anthropic/claude-sonnet-4.6
 # Local LLM endpoints (optional).

package/templates/fullstack-app/package.json CHANGED Viewed

@@ -24,7 +24,8 @@
     "build:desktop": "node tokagent/packages/app-core/scripts/desktop-build.mjs build --variant=base",
     "build:ios": "bun run --cwd apps/app build:ios",
     "build:android": "bun run --cwd apps/app build:android",
-    "dev": "node scripts/ensure-plugin-builds.mjs && node tokagent/packages/app-core/scripts/rt.mjs tokagent/packages/app-core/scripts/dev-ui.mjs --name=__PROJECT_SLUG__",
+    "postinstall": "node scripts/verify-llm-plugins.mjs",
+    "dev": "node scripts/verify-llm-plugins.mjs && node scripts/ensure-plugin-builds.mjs && node tokagent/packages/app-core/scripts/rt.mjs tokagent/packages/app-core/scripts/dev-ui.mjs --name=__PROJECT_SLUG__",
     "dev:ui": "node tokagent/packages/app-core/scripts/rt.mjs tokagent/packages/app-core/scripts/dev-ui.mjs --name=__PROJECT_SLUG__ --ui-only",
     "plugins:build": "node scripts/ensure-plugin-builds.mjs",
     "dev:desktop": "bun tokagent/packages/app-core/scripts/dev-platform.mjs",
@@ -47,17 +48,20 @@
     "@elizaos/plugin-local-embedding": "2.0.0-alpha.537",
     "@elizaos/plugin-ollama": "2.0.0-alpha.537",
     "@elizaos/plugin-openai": "2.0.0-alpha.537",
-    "@elizaos/plugin-openrouter": "2.0.0-alpha.13",
+    "@elizaos/plugin-openrouter": "2.0.0-alpha.10",
     "@elizaos/plugin-shell": "2.0.0-alpha.537",
-    "@elizaos/plugin-sql": "2.0.0-alpha.20"
+    "@elizaos/plugin-sql": "2.0.0-alpha.20",
+    "@tokagent/plugin-web-fetch": "workspace:*"
   },
   "devDependencies": {
     "typescript": "~5.9.3"
   },
   "resolutions": {
-    "typescript": "~5.9.3"
+    "typescript": "~5.9.3",
+    "@elizaos/plugin-openrouter": "2.0.0-alpha.10"
   },
   "overrides": {
-    "typescript": "~5.9.3"
+    "typescript": "~5.9.3",
+    "@elizaos/plugin-openrouter": "2.0.0-alpha.10"
   }
 }

package/templates/fullstack-app/plugins/plugin-tokagent-billing/src/routes/messages-proxy-routes.ts CHANGED Viewed

@@ -26,12 +26,115 @@ import type { Route, RouteRequest, RouteResponse, IAgentRuntime } from "@elizaos
 import type { IncomingMessage } from "node:http";
 import { getBillingState, isBillingStateInitialized } from "../state.js";
 import { applyBillingGate } from "../middleware/billing-gate.js";
-import { computeActualCostUsd } from "@tokagentos/billing";
+import {
+  computeActualCostUsd,
+  estimateInputTokens,
+} from "@tokagentos/billing";
 function billingUnavailable(res: RouteResponse): void {
   res.status(503).json({ error: "Billing service unavailable." });
 }
+// ---------------------------------------------------------------------------
+// Anthropic prompt-cache auto-injection
+// ---------------------------------------------------------------------------
+/**
+ * Anthropic's minimum cacheable prefix is 1024 tokens for Sonnet/Opus and
+ * 2048 for Haiku. Below that the cache_control marker is a no-op. Use the
+ * stricter bound so the optimization always pays off when we add it.
+ */
+const MIN_CACHEABLE_PREFIX_TOKENS = 2048;
+/** Returns true if any node anywhere in `value` has a `cache_control` key. */
+function hasCacheControlDeep(value: unknown): boolean {
+  if (!value || typeof value !== "object") return false;
+  if (Array.isArray(value)) return value.some(hasCacheControlDeep);
+  const obj = value as Record<string, unknown>;
+  if ("cache_control" in obj) return true;
+  for (const k of Object.keys(obj)) {
+    if (hasCacheControlDeep(obj[k])) return true;
+  }
+  return false;
+}
+/**
+ * Auto-inject Anthropic prompt-cache markers on stable parts of the request.
+ *
+ * The billing engine already supports cache pricing end-to-end (see
+ * pricing/rates.ts cacheRead/cacheWrite columns and computeActualCostUsd),
+ * but most anthropic-sdk callers never set cache_control themselves. Without
+ * markers, Anthropic re-reads the full system + tools prefix on every turn
+ * at base input rate. With markers, the prefix is served from cache at ~10×
+ * cheaper after the first call within 5 minutes.
+ *
+ * What we touch:
+ *   - `system`: normalised to array form, marker on the LAST text block
+ *   - `tools`: marker on the LAST tool definition (Anthropic caches the
+ *     entire prefix up to and including the marker, so this also covers
+ *     `system`)
+ *
+ * What we DON'T touch:
+ *   - Non-Claude models — other providers ignore or reject the field; their
+ *     caching is implicit.
+ *   - Bodies that already have ANY cache_control set — respect client intent.
+ *   - Bodies whose stable prefix is below Anthropic's minimum cacheable size.
+ *
+ * Returns a new body when injection happens; the same reference otherwise.
+ * Never mutates the input.
+ */
+function maybeInjectAnthropicCache(
+  body: Record<string, unknown>,
+): Record<string, unknown> {
+  const model = body.model;
+  if (typeof model !== "string" || !model.startsWith("claude-")) return body;
+  if (hasCacheControlDeep(body)) return body;
+  const tools = Array.isArray(body.tools) ? body.tools : undefined;
+  const sys = body.system;
+  const prefixTokens = estimateInputTokens([], tools, sys);
+  if (prefixTokens < MIN_CACHEABLE_PREFIX_TOKENS) return body;
+  const next: Record<string, unknown> = { ...body };
+  if (typeof sys === "string" && sys.length > 0) {
+    next.system = [
+      { type: "text", text: sys, cache_control: { type: "ephemeral" } },
+    ];
+  } else if (Array.isArray(sys) && sys.length > 0) {
+    const cloned = sys.map((b) =>
+      b && typeof b === "object" ? { ...(b as Record<string, unknown>) } : b,
+    );
+    for (let i = cloned.length - 1; i >= 0; i--) {
+      const blk = cloned[i];
+      if (
+        blk &&
+        typeof blk === "object" &&
+        (blk as Record<string, unknown>).type === "text"
+      ) {
+        (cloned[i] as Record<string, unknown>).cache_control = {
+          type: "ephemeral",
+        };
+        break;
+      }
+    }
+    next.system = cloned;
+  }
+  if (tools && tools.length > 0) {
+    const clonedTools = tools.map((t) =>
+      t && typeof t === "object" ? { ...(t as Record<string, unknown>) } : t,
+    );
+    const last = clonedTools[clonedTools.length - 1];
+    if (last && typeof last === "object") {
+      (last as Record<string, unknown>).cache_control = { type: "ephemeral" };
+    }
+    next.tools = clonedTools;
+  }
+  return next;
+}
 /**
  * Convert a plugin RouteRequest into the IncomingMessage shape that
  * applyBillingGate / resolveBillingIdentity expect.
@@ -104,14 +207,22 @@ async function proxyToLiteLLM(
   const config = state.config;
   if (!config.enabled) return billingUnavailable(res);
-  const body = req.body as Record<string, unknown> | undefined;
-  if (!body || typeof body !== "object") {
+  const rawBody = req.body as Record<string, unknown> | undefined;
+  if (!rawBody || typeof rawBody !== "object") {
     res.status(400).json({
       error: { type: "invalid_request_error", message: "JSON body required" },
     });
     return;
   }
+  // Auto-inject Anthropic prompt-cache markers on stable parts of the
+  // request. Done BEFORE the billing gate so the reservation sees the
+  // markers (gate.detectCacheControl reads them to size at cacheWrite rate
+  // — slightly higher first-call reservation, dramatically lower steady
+  // state). No-op for non-Claude models or bodies where the caller already
+  // set cache_control. See maybeInjectAnthropicCache for the full policy.
+  const body = maybeInjectAnthropicCache(rawBody);
   // Detect streaming. plugin-openai (Vercel AI SDK) defaults to
   // stream:true and there's no way to disable from the agent's chat flow,
   // so we MUST support it. For non-stream we buffer the JSON response;

package/templates/fullstack-app/plugins/plugin-web-fetch/build.ts ADDED Viewed

@@ -0,0 +1,35 @@
+#!/usr/bin/env bun
+/**
+ * Build script for @tokagent/plugin-web-fetch. Produces ESM in dist/.
+ */
+import { existsSync, rmSync } from "node:fs";
+const watch = process.argv.includes("--watch");
+async function build() {
+  if (existsSync("dist")) rmSync("dist", { recursive: true });
+  await Bun.build({
+    entrypoints: ["./src/index.ts"],
+    outdir: "./dist",
+    target: "node",
+    format: "esm",
+    external: ["@elizaos/core"],
+    sourcemap: "external",
+  });
+  console.log("✓ build complete");
+}
+if (watch) {
+  await build();
+  const watcher = Bun.watch("./src", { recursive: true });
+  for await (const _ of watcher) {
+    console.log("[watch] rebuilding...");
+    try {
+      await build();
+    } catch (e) {
+      console.error(e);
+    }
+  }
+} else {
+  await build();
+}

package/templates/fullstack-app/plugins/plugin-web-fetch/package.json ADDED Viewed

@@ -0,0 +1,37 @@
+{
+  "name": "@tokagent/plugin-web-fetch",
+  "version": "0.1.0",
+  "description": "Web fetch + Tavily search actions for the agent. FETCH_URL (Node built-in fetch, no key) + WEB_SEARCH (Tavily-backed, requires TAVILY_API_KEY).",
+  "type": "module",
+  "private": true,
+  "main": "dist/index.js",
+  "module": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "exports": {
+    ".": {
+      "import": "./dist/index.js",
+      "types": "./dist/index.d.ts"
+    },
+    "./package.json": "./package.json"
+  },
+  "files": ["dist", "README.md"],
+  "scripts": {
+    "build": "bun run build.ts",
+    "dev": "bun run build.ts --watch",
+    "typecheck": "tsc --noEmit"
+  },
+  "peerDependencies": {
+    "@elizaos/core": "workspace:*"
+  },
+  "agentConfig": {
+    "pluginType": "elizaos:plugin:1.0.0",
+    "pluginParameters": {
+      "TAVILY_API_KEY": {
+        "type": "string",
+        "description": "Tavily search API key — required for the WEB_SEARCH action. Get a free key (1,000 searches/month, no credit card) at https://app.tavily.com/sign-in. Without it the agent cannot fulfill 'search the web' requests; it will reply with a clear error pointing here. Saving this key persists to config.env and triggers a runtime restart so the action picks it up.",
+        "required": false,
+        "sensitive": true
+      }
+    }
+  }
+}