npm - talon-agent - Versions diffs - 1.4.0 → 1.5.0 - Mend

talon-agent 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/package.json +2 -2
package/prompts/heartbeat.md +18 -6
package/src/__tests__/heartbeat.test.ts +21 -0
package/src/__tests__/reload-plugins.test.ts +199 -0
package/src/__tests__/sessions.test.ts +155 -121
package/src/backend/claude-sdk/index.ts +198 -62
package/src/bootstrap.ts +3 -103
package/src/core/gateway-actions.ts +42 -1
package/src/core/heartbeat.ts +8 -5
package/src/core/plugin.ts +147 -0
package/src/core/tools/admin.ts +22 -0
package/src/core/tools/index.ts +2 -0
package/src/core/tools/types.ts +2 -1
package/src/frontend/teams/index.ts +9 -10
package/src/frontend/telegram/commands.ts +11 -10
package/src/storage/sessions.ts +34 -40

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "talon-agent",
-  "version": "1.4.0",
+  "version": "1.5.0",
   "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
   "author": "Dylan Neve",
   "license": "MIT",
@@ -51,7 +51,7 @@
     "format:check": "prettier --check src/ prompts/"
   },
   "dependencies": {
-    "@anthropic-ai/claude-agent-sdk": "^0.2.97",
+    "@anthropic-ai/claude-agent-sdk": "^0.2.104",
     "@brave/brave-search-mcp-server": "^2.0.75",
     "@clack/prompts": "^1.2.0",
     "@grammyjs/auto-retry": "^2.0.2",

package/prompts/heartbeat.md CHANGED Viewed

@@ -1,6 +1,14 @@
 You are Talon's background heartbeat agent. You run periodically (every {{intervalMinutes}} minutes) to perform maintenance tasks defined by the user.
-You have access ONLY to filesystem tools (Read, Write, Edit, Bash, Glob, Grep). Do NOT attempt to use any Telegram, MCP, or messaging tools.
+You have access to filesystem tools (Read, Write, Edit, Bash, Glob, Grep) and all loaded MCP plugins. Do NOT use Telegram messaging tools — you cannot send messages to users.
+## Available MCP Tools
+You have access to all registered MCP plugin tools (excluding Telegram messaging tools). The exact set depends on what plugins are enabled in the current configuration, but may include email, memory/knowledge graph, web search, Wikipedia, GitHub, media processing, browser automation, and more.
+Only use tools that are actually available in your current session. Do not assume any specific tool is present — check what's exposed to you at runtime.
+Use available tools when they help accomplish the user-defined tasks (e.g. checking email, querying the knowledge graph, searching the web for updates).
 ## Context
@@ -20,11 +28,15 @@ If the instructions file does not exist or is empty, perform these default tasks
 1. **Review recent logs** — Check `{{logsDir}}/` for log files dated after `{{lastRunIso}}`. If `{{lastRunIso}}` is `never`, treat it as the beginning of time and review all available logs. Extract any new facts, preferences, or notable events.
 2. **Update memory** — Merge any new information into `{{memoryFile}}`, keeping entries concise and factual.
 3. **Update daily notes** — Write today's learnings, observations, corrections, and follow-ups to `{{dailyMemoryFile}}`. Keep entries concise — the bot reads this file on demand for context.
-4. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
+4. **Check email** — If email tools are available, check the inbox for new messages and note anything important.
+5. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
 ## Rules
-- Be surgical and precise. Do not rewrite files unnecessarily.
-- Do not modify files outside the workspace unless the instructions explicitly allow it.
-- Keep your work focused and efficient — you have a 10-minute time limit.
-- When done, stop. The system handles all state tracking.
+- Do NOT use Telegram messaging tools — they are not available in heartbeat mode.
+- Be concise in log entries and memory updates.
+- If a task fails, log the error and move on to the next task.
+- Do NOT modify the instructions file — only read it.
+- Be surgical: only make the minimal file changes needed to complete the current task.
+- Do NOT create, modify, move, or delete files outside `{{workspace}}` unless the user-defined instructions explicitly require it.
+- Complete all tasks within the time budget. If running low, prioritize memory updates.

package/src/__tests__/heartbeat.test.ts CHANGED Viewed

@@ -48,6 +48,10 @@ vi.mock("@anthropic-ai/claude-agent-sdk", () => ({
   query: queryMock,
 }));
+vi.mock("../core/plugin.js", () => ({
+  getPluginMcpServers: vi.fn(() => ({})),
+}));
 vi.mock("../util/paths.js", () => ({
   files: {
     heartbeatState: "/fake/.talon/workspace/memory/heartbeat_state.json",
@@ -184,6 +188,23 @@ describe("forceHeartbeat", () => {
     expect(finalState.status).toBe("idle");
   });
+  it("passes plugin MCP servers to the agent via getPluginMcpServers", async () => {
+    const { getPluginMcpServers } = await import("../core/plugin.js");
+    const mockServers = {
+      "email-tools": { command: "node", args: ["email.js"], env: {} },
+    };
+    vi.mocked(getPluginMcpServers).mockReturnValue(mockServers);
+    await forceHeartbeat();
+    expect(getPluginMcpServers).toHaveBeenCalledWith("", "heartbeat");
+    // Verify mcpServers was passed through to query()
+    const queryCall = queryMock.mock.calls[0] as unknown as [
+      { options: { mcpServers: Record<string, unknown> } },
+    ];
+    expect(queryCall[0].options.mcpServers).toEqual(mockServers);
+  });
   it("preserves previous last_run on failure", async () => {
     const previousLastRun = Date.now() - 3600_000;
     existsSyncMock.mockReturnValue(true);

package/src/__tests__/reload-plugins.test.ts ADDED Viewed

@@ -0,0 +1,199 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+// ── Module mocks ──────────────────────────────────────────────────────────
+vi.mock("../util/log.js", () => ({
+  log: vi.fn(),
+  logError: vi.fn(),
+  logWarn: vi.fn(),
+  logDebug: vi.fn(),
+}));
+vi.mock("write-file-atomic", () => ({
+  default: { sync: vi.fn() },
+}));
+// Mock cheerio (required by gateway-actions via extractText)
+vi.mock("cheerio", () => ({
+  load: vi.fn(() => {
+    const $ = (sel: string) => ({
+      remove: vi.fn(),
+      text: () => "",
+    });
+    ($ as any).root = vi.fn();
+    return $;
+  }),
+}));
+// Mock storage modules required by gateway-actions
+vi.mock("../storage/history.js", () => ({
+  getRecentFormatted: vi.fn(() => ""),
+  searchHistory: vi.fn(() => ""),
+  getMessagesByUser: vi.fn(() => ""),
+  getKnownUsers: vi.fn(() => ""),
+}));
+vi.mock("../storage/media-index.js", () => ({
+  formatMediaIndex: vi.fn(() => ""),
+}));
+vi.mock("../storage/cron-store.js", () => ({
+  addCronJob: vi.fn(),
+  getCronJob: vi.fn(),
+  getCronJobsForChat: vi.fn(() => []),
+  updateCronJob: vi.fn(),
+  deleteCronJob: vi.fn(),
+  validateCronExpression: vi.fn(() => ({ valid: true })),
+  generateCronId: vi.fn(() => "test-id"),
+}));
+// ── Plugin mocking ──────────────────────────────────────────────────────
+const DEFAULT_CONFIG = {
+  model: "claude-opus-4-6",
+  frontend: "telegram",
+  plugins: [],
+  systemPrompt: "test prompt",
+};
+const mockReloadPlugins = vi.fn(async () => ({
+  names: ["extras", "brave-search"],
+  config: { ...DEFAULT_CONFIG },
+}));
+const mockGetPluginPromptAdditions = vi.fn(() => "prompt additions");
+const mockRebuildSystemPrompt = vi.fn();
+const mockUpdateSystemPrompt = vi.fn();
+vi.mock("../core/plugin.js", () => ({
+  reloadPlugins: (...args: unknown[]) =>
+    mockReloadPlugins(...(args as Parameters<typeof mockReloadPlugins>)),
+  getPluginPromptAdditions: () => mockGetPluginPromptAdditions(),
+}));
+vi.mock("../util/config.js", () => ({
+  rebuildSystemPrompt: (...args: unknown[]) =>
+    mockRebuildSystemPrompt(
+      ...(args as Parameters<typeof mockRebuildSystemPrompt>),
+    ),
+}));
+vi.mock("../backend/claude-sdk/index.js", () => ({
+  updateSystemPrompt: (...args: unknown[]) =>
+    mockUpdateSystemPrompt(
+      ...(args as Parameters<typeof mockUpdateSystemPrompt>),
+    ),
+}));
+// ── Import after mocks ────────────────────────────────────────────────────
+import { handleSharedAction } from "../core/gateway-actions.js";
+// ── Tests ─────────────────────────────────────────────────────────────────
+describe("reload_plugins gateway action", () => {
+  beforeEach(() => {
+    vi.resetAllMocks();
+    // Re-establish default implementations after reset
+    mockReloadPlugins.mockImplementation(async () => ({
+      names: ["extras", "brave-search"],
+      config: { ...DEFAULT_CONFIG },
+    }));
+    mockGetPluginPromptAdditions.mockReturnValue("prompt additions");
+    mockRebuildSystemPrompt.mockImplementation(() => {});
+    mockUpdateSystemPrompt.mockImplementation(() => {});
+  });
+  it("returns loaded plugin names on success", async () => {
+    const result = await handleSharedAction(
+      { action: "reload_plugins" },
+      12345,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.ok).toBe(true);
+    expect(result!.text).toContain("Plugins reloaded successfully");
+    expect(result!.text).toContain("extras");
+    expect(result!.text).toContain("brave-search");
+    expect(result!.text).toContain("(2)");
+  });
+  it("calls reloadPlugins without explicit frontends (derived from config)", async () => {
+    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    // Gateway no longer passes frontends — reloadPlugins derives them from config
+    expect(mockReloadPlugins).toHaveBeenCalledWith();
+  });
+  it("rebuilds system prompt after reloading", async () => {
+    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    expect(mockRebuildSystemPrompt).toHaveBeenCalledTimes(1);
+    expect(mockGetPluginPromptAdditions).toHaveBeenCalledTimes(1);
+  });
+  it("updates backend system prompt after rebuild", async () => {
+    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    expect(mockUpdateSystemPrompt).toHaveBeenCalledTimes(1);
+  });
+  it("returns error when reloadPlugins throws", async () => {
+    mockReloadPlugins.mockRejectedValueOnce(
+      new Error("Config validation failed"),
+    );
+    const result = await handleSharedAction(
+      { action: "reload_plugins" },
+      12345,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.ok).toBe(false);
+    expect(result!.error).toContain("Config validation failed");
+  });
+  it("returns error when config is malformed", async () => {
+    mockReloadPlugins.mockRejectedValueOnce(
+      new Error("Invalid JSON in config"),
+    );
+    const result = await handleSharedAction(
+      { action: "reload_plugins" },
+      12345,
+    );
+    expect(result!.ok).toBe(false);
+    expect(result!.error).toContain("Invalid JSON in config");
+  });
+  it("reports zero plugins when none configured", async () => {
+    mockReloadPlugins.mockImplementation(async () => ({
+      names: [],
+      config: { ...DEFAULT_CONFIG },
+    }));
+    const result = await handleSharedAction(
+      { action: "reload_plugins" },
+      12345,
+    );
+    expect(result!.ok).toBe(true);
+    expect(result!.text).toContain("(0)");
+    expect(result!.text).toContain("(none)");
+  });
+});
+// ── Admin tool description tests ──────────────────────────────────────────
+describe("admin tool description", () => {
+  it("does not mention session reset or MCP subprocesses", async () => {
+    const { adminTools } = await import("../core/tools/admin.js");
+    const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
+    expect(reloadTool).toBeDefined();
+    expect(reloadTool!.description).not.toContain("resets sessions");
+    expect(reloadTool!.description).not.toContain("sessions reset");
+    expect(reloadTool!.description).not.toContain("MCP subprocesses");
+    expect(reloadTool!.description).toContain("without restarting");
+    expect(reloadTool!.description).toContain("without downtime");
+  });
+  it("mentions env var cleanup", async () => {
+    const { adminTools } = await import("../core/tools/admin.js");
+    const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
+    expect(reloadTool!.description).toContain("env vars");
+  });
+  it("has admin tag", async () => {
+    const { adminTools } = await import("../core/tools/admin.js");
+    const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
+    expect(reloadTool!.tag).toBe("admin");
+  });
+});

package/src/__tests__/sessions.test.ts CHANGED Viewed

@@ -72,7 +72,6 @@ describe("sessions", () => {
       expect(session.usage.totalCacheRead).toBe(0);
       expect(session.usage.totalCacheWrite).toBe(0);
       expect(session.usage.lastPromptTokens).toBe(0);
-      expect(session.usage.estimatedCostUsd).toBe(0);
       expect(session.usage.totalResponseMs).toBe(0);
       expect(session.usage.lastResponseMs).toBe(0);
       expect(session.usage.fastestResponseMs).toBe(Infinity);
@@ -151,20 +150,6 @@ describe("sessions", () => {
       expect(getSession(chatId).usage.lastPromptTokens).toBe(250);
     });
-    it("calculates estimated cost", () => {
-      const chatId = "test-cost";
-      getSession(chatId);
-      recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
-        cacheRead: 0,
-        cacheWrite: 0,
-      });
-      // Cost for 1M input tokens at $3/M = $3
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
-    });
     it("tracks response time duration", () => {
       const chatId = "test-duration";
       getSession(chatId);
@@ -251,98 +236,129 @@ describe("sessions", () => {
     });
   });
-  describe("recordUsage with model pricing", () => {
-    it("applies haiku pricing for haiku model", () => {
-      const chatId = "test-haiku-pricing";
+  describe("recordUsage — model tracking", () => {
+    it("tracks lastModel", () => {
+      const chatId = "test-last-model";
       getSession(chatId);
       recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-haiku-4-5",
+        model: "claude-opus-4-6",
       });
-      // Haiku input: $0.8/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(0.8, 1);
+      expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
     });
-    it("applies opus pricing for opus model", () => {
-      const chatId = "test-opus-pricing";
+    it("updates fastestResponseMs correctly across turns", () => {
+      const chatId = "test-fastest-response";
       getSession(chatId);
       recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-opus-4-6",
+        durationMs: 2000,
       });
-      // Opus input: $15/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
-    });
-    it("applies sonnet pricing by default (no model)", () => {
-      const chatId = "test-sonnet-pricing-default";
-      getSession(chatId);
+      recordUsage(chatId, {
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 0,
+        cacheWrite: 0,
+        durationMs: 500,
+      });
       recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
+        durationMs: 1000,
+      });
+      const usage = getSession(chatId).usage;
+      expect(usage.fastestResponseMs).toBe(500);
+      expect(usage.lastResponseMs).toBe(1000);
+      expect(usage.totalResponseMs).toBe(3500);
+    });
+  });
+  describe("recordUsage — context tracking fields", () => {
+    it("stores contextTokens from SDK iteration data", () => {
+      const chatId = "test-ctx-tokens";
+      getSession(chatId);
+      recordUsage(chatId, {
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 10,
+        cacheWrite: 5,
+        contextTokens: 85000,
       });
-      // Sonnet input: $3/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
+      expect(getSession(chatId).usage.contextTokens).toBe(85000);
     });
-    it("calculates output cost correctly", () => {
-      const chatId = "test-output-cost";
+    it("stores contextWindow from SDK modelUsage", () => {
+      const chatId = "test-ctx-window";
       getSession(chatId);
       recordUsage(chatId, {
-        inputTokens: 0,
-        outputTokens: 1_000_000,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-sonnet-4-6",
+        contextWindow: 1_000_000,
       });
-      // Sonnet output: $15/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
     });
-    it("calculates cache read cost correctly", () => {
-      const chatId = "test-cache-read-cost";
+    it("stores numApiCalls from SDK num_turns", () => {
+      const chatId = "test-num-api-calls";
       getSession(chatId);
       recordUsage(chatId, {
-        inputTokens: 0,
-        outputTokens: 0,
-        cacheRead: 1_000_000,
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-sonnet-4-6",
+        numApiCalls: 3,
       });
-      // Sonnet cacheRead: $0.3/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(0.3, 2);
+      expect(getSession(chatId).usage.numApiCalls).toBe(3);
     });
-    it("calculates cache write cost correctly", () => {
-      const chatId = "test-cache-write-cost";
+    it("resets contextTokens to 0 when not provided", () => {
+      const chatId = "test-ctx-tokens-reset";
       getSession(chatId);
+      // First turn with context data
       recordUsage(chatId, {
-        inputTokens: 0,
-        outputTokens: 0,
+        inputTokens: 100,
+        outputTokens: 50,
         cacheRead: 0,
-        cacheWrite: 1_000_000,
-        model: "claude-sonnet-4-6",
+        cacheWrite: 0,
+        contextTokens: 50000,
       });
-      // Sonnet cacheWrite: $3.75/M
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3.75, 2);
+      expect(getSession(chatId).usage.contextTokens).toBe(50000);
+      // Second turn without context data — resets to 0
+      recordUsage(chatId, {
+        inputTokens: 200,
+        outputTokens: 100,
+        cacheRead: 0,
+        cacheWrite: 0,
+      });
+      expect(getSession(chatId).usage.contextTokens).toBe(0);
     });
-    it("tracks lastModel", () => {
-      const chatId = "test-last-model";
+    it("preserves contextWindow across turns when not reported", () => {
+      const chatId = "test-ctx-window-preserve";
       getSession(chatId);
       recordUsage(chatId, {
@@ -350,44 +366,75 @@ describe("sessions", () => {
         outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        model: "claude-opus-4-6",
+        contextWindow: 1_000_000,
       });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
-      expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
+      // Turn without contextWindow — preserves previous value
+      recordUsage(chatId, {
+        inputTokens: 200,
+        outputTokens: 100,
+        cacheRead: 0,
+        cacheWrite: 0,
+      });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
     });
-    it("updates fastestResponseMs correctly across turns", () => {
-      const chatId = "test-fastest-response";
+    it("rejects non-finite contextWindow values and keeps previous", () => {
+      const chatId = "test-ctx-window-nan";
       getSession(chatId);
+      // Set a valid contextWindow first
       recordUsage(chatId, {
         inputTokens: 100,
         outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        durationMs: 2000,
+        contextWindow: 1_000_000,
       });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
+      // NaN should not overwrite
       recordUsage(chatId, {
         inputTokens: 100,
         outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        durationMs: 500,
+        contextWindow: NaN,
       });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
+      // Infinity should not overwrite
       recordUsage(chatId, {
         inputTokens: 100,
         outputTokens: 50,
         cacheRead: 0,
         cacheWrite: 0,
-        durationMs: 1000,
+        contextWindow: Infinity,
       });
+      expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
+    });
-      const usage = getSession(chatId).usage;
-      expect(usage.fastestResponseMs).toBe(500);
-      expect(usage.lastResponseMs).toBe(1000);
-      expect(usage.totalResponseMs).toBe(3500);
+    it("rejects negative contextWindow values and keeps previous", () => {
+      const chatId = "test-ctx-window-neg";
+      getSession(chatId);
+      recordUsage(chatId, {
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 0,
+        cacheWrite: 0,
+        contextWindow: 200_000,
+      });
+      recordUsage(chatId, {
+        inputTokens: 100,
+        outputTokens: 50,
+        cacheRead: 0,
+        cacheWrite: 0,
+        contextWindow: -100,
+      });
+      expect(getSession(chatId).usage.contextWindow).toBe(200_000);
     });
   });
@@ -484,52 +531,6 @@ describe("sessions", () => {
     });
   });
-  describe("cost calculation math", () => {
-    it("calculates multi-component cost correctly (input + output + cache)", () => {
-      const chatId = "test-cost-math";
-      getSession(chatId);
-      // Use exact token counts to verify the formula:
-      // cost = (input * pricing.input + cacheWrite * pricing.cacheWrite +
-      //         cacheRead * pricing.cacheRead + output * pricing.output) / 1_000_000
-      // Sonnet: input=$3/M, output=$15/M, cacheRead=$0.3/M, cacheWrite=$3.75/M
-      recordUsage(chatId, {
-        inputTokens: 500_000, // 500k * 3 / 1M = $1.50
-        outputTokens: 100_000, // 100k * 15 / 1M = $1.50
-        cacheRead: 200_000, // 200k * 0.3 / 1M = $0.06
-        cacheWrite: 100_000, // 100k * 3.75 / 1M = $0.375
-        model: "claude-sonnet-4-6",
-      });
-      const usage = getSession(chatId).usage;
-      // Total: 1.50 + 1.50 + 0.06 + 0.375 = $3.435
-      expect(usage.estimatedCostUsd).toBeCloseTo(3.435, 3);
-    });
-    it("accumulates cost across multiple recordUsage calls", () => {
-      const chatId = "test-cost-accum";
-      getSession(chatId);
-      recordUsage(chatId, {
-        inputTokens: 1_000_000,
-        outputTokens: 0,
-        cacheRead: 0,
-        cacheWrite: 0,
-      });
-      // Sonnet input: $3
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 2);
-      recordUsage(chatId, {
-        inputTokens: 0,
-        outputTokens: 1_000_000,
-        cacheRead: 0,
-        cacheWrite: 0,
-      });
-      // + Sonnet output: $15. Total: $18
-      expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(18, 2);
-    });
-  });
   describe("cache hit rate tracking", () => {
     it("tracks cache read tokens across multiple turns", () => {
       const chatId = "test-cache-track-read";
@@ -571,7 +572,6 @@ describe("sessions", () => {
       const fresh = getSession(chatId);
       expect(fresh.sessionId).toBeUndefined();
       expect(fresh.turns).toBe(0);
-      expect(fresh.usage.estimatedCostUsd).toBe(0);
       expect(fresh.usage.totalInputTokens).toBe(0);
     });
   });
@@ -642,6 +642,40 @@ describe("sessions — migration of legacy field formats", () => {
     expect(session.createdAt).toBe(9999999);
   });
+  it("backfills missing context tracking fields on legacy sessions", () => {
+    vi.mocked(existsSync).mockReturnValueOnce(true);
+    vi.mocked(readFileSync).mockReturnValueOnce(
+      JSON.stringify({
+        "migrate-chat-ctx": {
+          sessionId: undefined,
+          turns: 4,
+          lastActive: 2000,
+          createdAt: 2000,
+          usage: {
+            totalInputTokens: 100,
+            totalOutputTokens: 50,
+            totalCacheRead: 10,
+            totalCacheWrite: 5,
+            lastPromptTokens: 115,
+            estimatedCostUsd: 0.5,
+            totalResponseMs: 1000,
+            lastResponseMs: 500,
+            fastestResponseMs: 500,
+            // contextTokens, contextWindow, numApiCalls deliberately omitted
+          },
+        },
+      }),
+    );
+    loadSessions();
+    const session = getSession("migrate-chat-ctx");
+    expect(session.usage.contextTokens).toBe(0);
+    expect(session.usage.contextWindow).toBe(0);
+    expect(session.usage.numApiCalls).toBe(0);
+    // Existing fields should be preserved
+    expect(session.usage.totalInputTokens).toBe(100);
+    expect(session.usage.lastPromptTokens).toBe(115);
+  });
   it("fixes fastestResponseMs of 0 to Infinity", () => {
     vi.mocked(existsSync).mockReturnValueOnce(true);
     vi.mocked(readFileSync).mockReturnValueOnce(