npm - talon-agent - Versions diffs - 1.4.0 → 1.6.0 - Mend

talon-agent 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/package.json +2 -2
package/prompts/heartbeat.md +18 -6
package/src/__tests__/chat-settings.test.ts +20 -7
package/src/__tests__/fuzz.test.ts +3 -0
package/src/__tests__/heartbeat.test.ts +21 -0
package/src/__tests__/reload-plugins.test.ts +205 -0
package/src/__tests__/sessions.test.ts +155 -121
package/src/backend/claude-sdk/constants.ts +63 -0
package/src/backend/claude-sdk/handler.ts +236 -0
package/src/backend/claude-sdk/index.ts +10 -423
package/src/backend/claude-sdk/models.ts +216 -0
package/src/backend/claude-sdk/options.ts +129 -0
package/src/backend/claude-sdk/state.ts +59 -0
package/src/backend/claude-sdk/stream.ts +221 -0
package/src/backend/claude-sdk/warm.ts +89 -0
package/src/bootstrap.ts +22 -108
package/src/cli.ts +30 -15
package/src/core/dream.ts +5 -17
package/src/core/gateway-actions.ts +34 -2
package/src/core/gateway.ts +5 -2
package/src/core/heartbeat.ts +12 -22
package/src/core/models.ts +149 -0
package/src/core/plugin.ts +147 -0
package/src/core/tools/admin.ts +22 -0
package/src/core/tools/index.ts +2 -0
package/src/core/tools/types.ts +2 -1
package/src/core/types.ts +4 -0
package/src/frontend/teams/index.ts +7 -10
package/src/frontend/telegram/callbacks.ts +15 -27
package/src/frontend/telegram/commands.ts +32 -36
package/src/frontend/telegram/helpers.ts +13 -15
package/src/frontend/telegram/index.ts +1 -1
package/src/frontend/terminal/commands.ts +7 -4
package/src/index.ts +2 -1
package/src/storage/chat-settings.ts +5 -19
package/src/storage/sessions.ts +34 -40

package/src/__tests__/sessions.test.ts CHANGED Viewed

@@ -72,7 +72,6 @@ describe("sessions", () => {
       expect(session.usage.totalCacheRead).toBe(0);
       expect(session.usage.totalCacheWrite).toBe(0);
       expect(session.usage.lastPromptTokens).toBe(0);
-      expect(session.usage.estimatedCostUsd).toBe(0);
       expect(session.usage.totalResponseMs).toBe(0);
       expect(session.usage.lastResponseMs).toBe(0);
       expect(session.usage.fastestResponseMs).toBe(Infinity);
@@ -151,20 +150,6 @@ describe("sessions", () => {
       expect(getSession(chatId).usage.lastPromptTokens).toBe(250);
     });
-    it("calculates estimated cost", () => {
-      const chatId = "test-cost";
-      getSession(chatId);
-      recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
-        cacheRead: 0,
-        cacheWrite: 0,
-      });
-      // Cost for 1M input tokens at $3/M = $3
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
-    });
     it("tracks response time duration", () => {
       const chatId = "test-duration";
       getSession(chatId);
@@ -251,98 +236,129 @@ describe("sessions", () => {
     });
   });
-  describe("recordUsage with model pricing", () => {
-    it("applies haiku pricing for haiku model", () => {
-      const chatId = "test-haiku-pricing";
+  describe("recordUsage — model tracking", () => {
+    it("tracks lastModel", () => {
+      const chatId = "test-last-model";
       getSession(chatId);
       recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-haiku-4-5",
+        model: "claude-opus-4-6",
       });
-      // Haiku input: $0.8/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(0.8, 1);
+      expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
     });
-    it("applies opus pricing for opus model", () => {
-      const chatId = "test-opus-pricing";
+    it("updates fastestResponseMs correctly across turns", () => {
+      const chatId = "test-fastest-response";
       getSession(chatId);
       recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-opus-4-6",
+        durationMs: 2000,
       });
-      // Opus input: $15/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
-    });
-    it("applies sonnet pricing by default (no model)", () => {
-      const chatId = "test-sonnet-pricing-default";
-      getSession(chatId);
+      recordUsage(chatId, {
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 0,
+        cacheWrite: 0,
+        durationMs: 500,
+      });
       recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
+        durationMs: 1000,
+      });
+      const usage = getSession(chatId).usage;
+      expect(usage.fastestResponseMs).toBe(500);
+      expect(usage.lastResponseMs).toBe(1000);
+      expect(usage.totalResponseMs).toBe(3500);
+    });
+  });
+  describe("recordUsage — context tracking fields", () => {
+    it("stores contextTokens from SDK iteration data", () => {
+      const chatId = "test-ctx-tokens";
+      getSession(chatId);
+      recordUsage(chatId, {
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 10,
+        cacheWrite: 5,
+        contextTokens: 85000,
       });
-      // Sonnet input: $3/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
+      expect(getSession(chatId).usage.contextTokens).toBe(85000);
     });
-    it("calculates output cost correctly", () => {
-      const chatId = "test-output-cost";
+    it("stores contextWindow from SDK modelUsage", () => {
+      const chatId = "test-ctx-window";
       getSession(chatId);
       recordUsage(chatId, {
-        inputTokens: 0,
-        outputTokens: 1_000_000,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-sonnet-4-6",
+        contextWindow: 1_000_000,
       });
-      // Sonnet output: $15/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
     });
-    it("calculates cache read cost correctly", () => {
-      const chatId = "test-cache-read-cost";
+    it("stores numApiCalls from SDK num_turns", () => {
+      const chatId = "test-num-api-calls";
       getSession(chatId);
       recordUsage(chatId, {
-        inputTokens: 0,
-        outputTokens: 0,
-        cacheRead: 1_000_000,
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-sonnet-4-6",
+        numApiCalls: 3,
       });
-      // Sonnet cacheRead: $0.3/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(0.3, 2);
+      expect(getSession(chatId).usage.numApiCalls).toBe(3);
     });
-    it("calculates cache write cost correctly", () => {
-      const chatId = "test-cache-write-cost";
+    it("resets contextTokens to 0 when not provided", () => {
+      const chatId = "test-ctx-tokens-reset";
       getSession(chatId);
+      // First turn with context data
       recordUsage(chatId, {
-        inputTokens: 0,
-        outputTokens: 0,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
-        cacheWrite: 1_000_000,
-        model: "claude-sonnet-4-6",
+        cacheWrite: 0,
+        contextTokens: 50000,
       });
-      // Sonnet cacheWrite: $3.75/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3.75, 2);
+      expect(getSession(chatId).usage.contextTokens).toBe(50000);
+      // Second turn without context data — resets to 0
+      recordUsage(chatId, {
+        inputTokens: 200,
+        outputTokens: 100,
+        cacheRead: 0,
+        cacheWrite: 0,
+      });
+      expect(getSession(chatId).usage.contextTokens).toBe(0);
     });
-    it("tracks lastModel", () => {
-      const chatId = "test-last-model";
+    it("preserves contextWindow across turns when not reported", () => {
+      const chatId = "test-ctx-window-preserve";
       getSession(chatId);
       recordUsage(chatId, {
@@ -350,44 +366,75 @@ describe("sessions", () => {
         outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-opus-4-6",
+        contextWindow: 1_000_000,
       });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
-      expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
+      // Turn without contextWindow — preserves previous value
+      recordUsage(chatId, {
+        inputTokens: 200,
+        outputTokens: 100,
+        cacheRead: 0,
+        cacheWrite: 0,
+      });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
     });
-    it("updates fastestResponseMs correctly across turns", () => {
-      const chatId = "test-fastest-response";
+    it("rejects non-finite contextWindow values and keeps previous", () => {
+      const chatId = "test-ctx-window-nan";
       getSession(chatId);
+      // Set a valid contextWindow first
       recordUsage(chatId, {
         inputTokens: 100,
         outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        durationMs: 2000,
+        contextWindow: 1_000_000,
       });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
+      // NaN should not overwrite
       recordUsage(chatId, {
         inputTokens: 100,
         outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        durationMs: 500,
+        contextWindow: NaN,
       });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
+      // Infinity should not overwrite
       recordUsage(chatId, {
         inputTokens: 100,
         outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        durationMs: 1000,
+        contextWindow: Infinity,
       });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
+    });
-      const usage = getSession(chatId).usage;
-      expect(usage.fastestResponseMs).toBe(500);
-      expect(usage.lastResponseMs).toBe(1000);
-      expect(usage.totalResponseMs).toBe(3500);
+    it("rejects negative contextWindow values and keeps previous", () => {
+      const chatId = "test-ctx-window-neg";
+      getSession(chatId);
+      recordUsage(chatId, {
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 0,
+        cacheWrite: 0,
+        contextWindow: 200_000,
+      });
+      recordUsage(chatId, {
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 0,
+        cacheWrite: 0,
+        contextWindow: -100,
+      });
+      expect(getSession(chatId).usage.contextWindow).toBe(200_000);
     });
   });
@@ -484,52 +531,6 @@ describe("sessions", () => {
     });
   });
-  describe("cost calculation math", () => {
-    it("calculates multi-component cost correctly (input + output + cache)", () => {
-      const chatId = "test-cost-math";
-      getSession(chatId);
-      // Use exact token counts to verify the formula:
-      // cost = (input * pricing.input + cacheWrite * pricing.cacheWrite +
-      //         cacheRead * pricing.cacheRead + output * pricing.output) / 1_000_000
-      // Sonnet: input=$3/M, output=$15/M, cacheRead=$0.3/M, cacheWrite=$3.75/M
-      recordUsage(chatId, {
-        inputTokens: 500_000, // 500k * 3 / 1M = $1.50
-        outputTokens: 100_000, // 100k * 15 / 1M = $1.50
-        cacheRead: 200_000, // 200k * 0.3 / 1M = $0.06
-        cacheWrite: 100_000, // 100k * 3.75 / 1M = $0.375
-        model: "claude-sonnet-4-6",
-      });
-      const usage = getSession(chatId).usage;
-      // Total: 1.50 + 1.50 + 0.06 + 0.375 = $3.435
-      expect(usage.estimatedCostUsd).toBeCloseTo(3.435, 3);
-    });
-    it("accumulates cost across multiple recordUsage calls", () => {
-      const chatId = "test-cost-accum";
-      getSession(chatId);
-      recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
-        cacheRead: 0,
-        cacheWrite: 0,
-      });
-      // Sonnet input: $3
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 2);
-      recordUsage(chatId, {
-        inputTokens: 0,
-        outputTokens: 1_000_000,
-        cacheRead: 0,
-        cacheWrite: 0,
-      });
-      // + Sonnet output: $15. Total: $18
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(18, 2);
-    });
-  });
   describe("cache hit rate tracking", () => {
     it("tracks cache read tokens across multiple turns", () => {
       const chatId = "test-cache-track-read";
@@ -571,7 +572,6 @@ describe("sessions", () => {
       const fresh = getSession(chatId);
       expect(fresh.sessionId).toBeUndefined();
       expect(fresh.turns).toBe(0);
-      expect(fresh.usage.estimatedCostUsd).toBe(0);
       expect(fresh.usage.totalInputTokens).toBe(0);
     });
   });
@@ -642,6 +642,40 @@ describe("sessions — migration of legacy field formats", () => {
     expect(session.createdAt).toBe(9999999);
   });
+  it("backfills missing context tracking fields on legacy sessions", () => {
+    vi.mocked(existsSync).mockReturnValueOnce(true);
+    vi.mocked(readFileSync).mockReturnValueOnce(
+      JSON.stringify({
+        "migrate-chat-ctx": {
+          sessionId: undefined,
+          turns: 4,
+          lastActive: 2000,
+          createdAt: 2000,
+          usage: {
+            totalInputTokens: 100,
+            totalOutputTokens: 50,
+            totalCacheRead: 10,
+            totalCacheWrite: 5,
+            lastPromptTokens: 115,
+            estimatedCostUsd: 0.5,
+            totalResponseMs: 1000,
+            lastResponseMs: 500,
+            fastestResponseMs: 500,
+            // contextTokens, contextWindow, numApiCalls deliberately omitted
+          },
+        },
+      }),
+    );
+    loadSessions();
+    const session = getSession("migrate-chat-ctx");
+    expect(session.usage.contextTokens).toBe(0);
+    expect(session.usage.contextWindow).toBe(0);
+    expect(session.usage.numApiCalls).toBe(0);
+    // Existing fields should be preserved
+    expect(session.usage.totalInputTokens).toBe(100);
+    expect(session.usage.lastPromptTokens).toBe(115);
+  });
   it("fixes fastestResponseMs of 0 to Infinity", () => {
     vi.mocked(existsSync).mockReturnValueOnce(true);
     vi.mocked(readFileSync).mockReturnValueOnce(

package/src/backend/claude-sdk/constants.ts ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * Shared constants for Claude SDK backend and background agents.
+ *
+ * Single source of truth for disallowed tool lists, thinking effort
+ * configuration, and streaming parameters.
+ */
+// ── Disallowed tool lists ──────────────────────────────────────────────────
+/**
+ * Core tools disallowed in all SDK query contexts (chat, heartbeat, dream).
+ * These are interactive or planning-only tools that make no sense in a
+ * headless agent context.
+ */
+export const DISALLOWED_TOOLS_CORE = [
+  "EnterPlanMode",
+  "ExitPlanMode",
+  "EnterWorktree",
+  "ExitWorktree",
+  "TodoWrite",
+  "TodoRead",
+  "TaskCreate",
+  "TaskUpdate",
+  "TaskGet",
+  "TaskList",
+  "TaskOutput",
+  "TaskStop",
+  "AskUserQuestion",
+] as const;
+/** Disallowed tools for the main chat handler (core + web tools replaced by Brave MCP). */
+export const DISALLOWED_TOOLS_CHAT = [
+  ...DISALLOWED_TOOLS_CORE,
+  "WebSearch",
+  "WebFetch",
+] as const;
+/** Disallowed tools for background agents — heartbeat and dream (core + Agent). */
+export const DISALLOWED_TOOLS_BACKGROUND = [
+  ...DISALLOWED_TOOLS_CORE,
+  "Agent",
+] as const;
+// ── Thinking / effort configuration ────────────────────────────────────────
+export const EFFORT_MAP: Record<
+  string,
+  {
+    thinking: { type: "adaptive" | "disabled" };
+    effort?: "low" | "medium" | "high" | "max";
+  }
+> = {
+  off: { thinking: { type: "disabled" } },
+  low: { thinking: { type: "adaptive" }, effort: "low" },
+  medium: { thinking: { type: "adaptive" }, effort: "medium" },
+  high: { thinking: { type: "adaptive" }, effort: "high" },
+  max: { thinking: { type: "adaptive" }, effort: "max" },
+};
+// ── Streaming ──────────────────────────────────────────────────────────────
+/** Minimum interval (ms) between streaming delta callbacks to avoid flooding frontends. */
+export const STREAM_INTERVAL = 1000;

package/src/backend/claude-sdk/handler.ts ADDED Viewed

@@ -0,0 +1,236 @@
+/**
+ * Main message handler — executes a user query through the Claude Agent SDK.
+ *
+ * Orchestrates the full lifecycle: prompt formatting, SDK query, stream
+ * processing, error recovery (session expired / context overflow / model
+ * fallback), token accounting, and session persistence.
+ */
+import { query } from "@anthropic-ai/claude-agent-sdk";
+import {
+  getSession,
+  incrementTurns,
+  recordUsage,
+  resetSession,
+  setSessionId,
+  setSessionName,
+} from "../../storage/sessions.js";
+import { getChatSettings, setChatModel } from "../../storage/chat-settings.js";
+import { classify } from "../../core/errors.js";
+import { getFallbackModel } from "../../core/models.js";
+import { rebuildSystemPrompt } from "../../util/config.js";
+import { getPluginPromptAdditions } from "../../core/plugin.js";
+import { log, logError, logWarn } from "../../util/log.js";
+import { traceMessage } from "../../util/trace.js";
+import { formatFullDatetime } from "../../util/time.js";
+import type { QueryParams, QueryResult } from "../../core/types.js";
+import { getConfig } from "./state.js";
+import { buildSdkOptions } from "./options.js";
+import {
+  createStreamState,
+  isSystemInit,
+  isStreamEvent,
+  isAssistant,
+  isResult,
+  processStreamDelta,
+  processAssistantMessage,
+  processResultMessage,
+} from "./stream.js";
+// ── Main handler ─────────────────────────────────────────────────────────────
+export async function handleMessage(
+  params: QueryParams,
+  _retried = false,
+): Promise<QueryResult> {
+  const config = getConfig();
+  const {
+    chatId,
+    text,
+    senderName,
+    isGroup,
+    onTextBlock,
+    onStreamDelta,
+    onToolUse,
+  } = params;
+  const session = getSession(chatId);
+  const t0 = Date.now();
+  // Rebuild system prompt on first turn of a new/reset session so identity,
+  // memory, and workspace listing are fresh
+  if (session.turns === 0) {
+    rebuildSystemPrompt(config, getPluginPromptAdditions());
+  }
+  const { options, activeModel } = buildSdkOptions(chatId);
+  const msgIdHint = params.messageId ? ` [msg_id:${params.messageId}]` : "";
+  const nowTag = `[${formatFullDatetime()}]`;
+  const prompt = isGroup
+    ? `${nowTag} [${senderName}]${msgIdHint}: ${text}`
+    : `${nowTag}${msgIdHint} ${text}`;
+  log("agent", `[${chatId}] <- (${text.length} chars)`);
+  traceMessage(chatId, "in", text, { senderName, isGroup });
+  const qi = query({ prompt, options });
+  const state = createStreamState();
+  try {
+    for await (const message of qi) {
+      // Session ID capture
+      if (isSystemInit(message)) {
+        state.newSessionId = message.session_id;
+        continue;
+      }
+      // Stream text deltas and thinking deltas
+      if (isStreamEvent(message)) {
+        processStreamDelta(message, state, onStreamDelta);
+        continue;
+      }
+      // Complete assistant message — extract text blocks and tool calls
+      if (isAssistant(message)) {
+        const result = processAssistantMessage(message, state);
+        // Notify tool usage
+        for (const tool of result.tools) {
+          if (onToolUse) {
+            try {
+              onToolUse(tool.name, tool.input);
+            } catch {
+              /* non-fatal */
+            }
+          }
+        }
+        // Send progress text segments (text before each tool call) in order
+        if (onTextBlock) {
+          for (const text of result.progressTexts) {
+            try {
+              await onTextBlock(text);
+            } catch {
+              /* non-fatal — don't abort the stream loop */
+            }
+          }
+        }
+        continue;
+      }
+      // Final result — read token counts and context info
+      if (isResult(message)) {
+        processResultMessage(message, state);
+      }
+    }
+  } catch (err) {
+    const classified = classify(err);
+    // Session expired — reset and retry once
+    if (classified.reason === "session_expired" && !_retried) {
+      logWarn(
+        "agent",
+        `[${chatId}] Stale session, retrying with fresh session`,
+      );
+      resetSession(chatId);
+      return handleMessage(params, true);
+    }
+    // Context length exceeded — safety net for edge cases where SDK
+    // auto-compaction doesn't prevent overflow
+    if (classified.reason === "context_length" && !_retried) {
+      logWarn(
+        "agent",
+        `[${chatId}] Context length exceeded, resetting session and retrying`,
+      );
+      resetSession(chatId);
+      return handleMessage(params, true);
+    }
+    // Model fallback: if overloaded/timeout, retry with the next-tier model
+    if (!_retried && classified.retryable) {
+      const fallback = getFallbackModel(activeModel);
+      if (fallback) {
+        logWarn(
+          "agent",
+          `[${chatId}] ${classified.reason}, falling back to ${fallback.replace("claude-", "")}`,
+        );
+        resetSession(chatId);
+        const originalModel = getChatSettings(chatId).model;
+        setChatModel(chatId, fallback);
+        try {
+          return await handleMessage(params, true);
+        } finally {
+          setChatModel(chatId, originalModel);
+        }
+      }
+    }
+    logError("agent", `[${chatId}] SDK error: ${classified.message}`);
+    throw classified;
+  }
+  // ── Persist session and usage ─────────────────────────────────────────────
+  const durationMs = Date.now() - t0;
+  if (state.newSessionId) setSessionId(chatId, state.newSessionId);
+  incrementTurns(chatId);
+  recordUsage(chatId, {
+    inputTokens: state.sdkInputTokens,
+    outputTokens: state.sdkOutputTokens,
+    cacheRead: state.sdkCacheRead,
+    cacheWrite: state.sdkCacheWrite,
+    durationMs,
+    model: activeModel,
+    contextTokens: state.contextTokens,
+    contextWindow: state.contextWindow,
+    numApiCalls: state.numApiCalls,
+  });
+  // Set a descriptive session name from the first message
+  if (session.turns === 0 && text) {
+    const cleanText = text
+      .replace(/^\[.*?\]\s*/g, "")
+      .replace(/\[msg_id:\d+\]\s*/g, "")
+      .trim();
+    if (cleanText) {
+      const name =
+        cleanText.length > 30 ? cleanText.slice(0, 30) + "..." : cleanText;
+      setSessionName(chatId, name);
+    }
+  }
+  // ── Build result ──────────────────────────────────────────────────────────
+  state.allResponseText += state.currentBlockText;
+  const totalPrompt =
+    state.sdkInputTokens + state.sdkCacheRead + state.sdkCacheWrite;
+  const cacheHitPct =
+    totalPrompt > 0 ? Math.round((state.sdkCacheRead / totalPrompt) * 100) : 0;
+  log(
+    "agent",
+    `[${chatId}] -> (${durationMs}ms, in=${state.sdkInputTokens} out=${state.sdkOutputTokens} cache=${cacheHitPct}%` +
+      `${state.toolCalls > 0 ? ` tools=${state.toolCalls}` : ""})`,
+  );
+  traceMessage(chatId, "out", state.allResponseText, {
+    durationMs,
+    inputTokens: state.sdkInputTokens,
+    outputTokens: state.sdkOutputTokens,
+    cacheRead: state.sdkCacheRead,
+    cacheWrite: state.sdkCacheWrite,
+    toolCalls: state.toolCalls,
+    model: activeModel,
+  });
+  return {
+    text: state.allResponseText.trim(),
+    durationMs,
+    inputTokens: state.sdkInputTokens,
+    outputTokens: state.sdkOutputTokens,
+    cacheRead: state.sdkCacheRead,
+    cacheWrite: state.sdkCacheWrite,
+  };
+}