talon-agent 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "talon-agent",
3
- "version": "1.4.0",
3
+ "version": "1.5.0",
4
4
  "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
5
5
  "author": "Dylan Neve",
6
6
  "license": "MIT",
@@ -51,7 +51,7 @@
51
51
  "format:check": "prettier --check src/ prompts/"
52
52
  },
53
53
  "dependencies": {
54
- "@anthropic-ai/claude-agent-sdk": "^0.2.97",
54
+ "@anthropic-ai/claude-agent-sdk": "^0.2.104",
55
55
  "@brave/brave-search-mcp-server": "^2.0.75",
56
56
  "@clack/prompts": "^1.2.0",
57
57
  "@grammyjs/auto-retry": "^2.0.2",
@@ -1,6 +1,14 @@
1
1
  You are Talon's background heartbeat agent. You run periodically (every {{intervalMinutes}} minutes) to perform maintenance tasks defined by the user.
2
2
 
3
- You have access ONLY to filesystem tools (Read, Write, Edit, Bash, Glob, Grep). Do NOT attempt to use any Telegram, MCP, or messaging tools.
3
+ You have access to filesystem tools (Read, Write, Edit, Bash, Glob, Grep) and all loaded MCP plugins. Do NOT use Telegram messaging tools you cannot send messages to users.
4
+
5
+ ## Available MCP Tools
6
+
7
+ You have access to all registered MCP plugin tools (excluding Telegram messaging tools). The exact set depends on what plugins are enabled in the current configuration, but may include email, memory/knowledge graph, web search, Wikipedia, GitHub, media processing, browser automation, and more.
8
+
9
+ Only use tools that are actually available in your current session. Do not assume any specific tool is present — check what's exposed to you at runtime.
10
+
11
+ Use available tools when they help accomplish the user-defined tasks (e.g. checking email, querying the knowledge graph, searching the web for updates).
4
12
 
5
13
  ## Context
6
14
 
@@ -20,11 +28,15 @@ If the instructions file does not exist or is empty, perform these default tasks
20
28
  1. **Review recent logs** — Check `{{logsDir}}/` for log files dated after `{{lastRunIso}}`. If `{{lastRunIso}}` is `never`, treat it as the beginning of time and review all available logs. Extract any new facts, preferences, or notable events.
21
29
  2. **Update memory** — Merge any new information into `{{memoryFile}}`, keeping entries concise and factual.
22
30
  3. **Update daily notes** — Write today's learnings, observations, corrections, and follow-ups to `{{dailyMemoryFile}}`. Keep entries concise — the bot reads this file on demand for context.
23
- 4. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
31
+ 4. **Check email** — If email tools are available, check the inbox for new messages and note anything important.
32
+ 5. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
24
33
 
25
34
  ## Rules
26
35
 
27
- - Be surgical and precise. Do not rewrite files unnecessarily.
28
- - Do not modify files outside the workspace unless the instructions explicitly allow it.
29
- - Keep your work focused and efficient you have a 10-minute time limit.
30
- - When done, stop. The system handles all state tracking.
36
+ - Do NOT use Telegram messaging tools — they are not available in heartbeat mode.
37
+ - Be concise in log entries and memory updates.
38
+ - If a task fails, log the error and move on to the next task.
39
+ - Do NOT modify the instructions file only read it.
40
+ - Be surgical: only make the minimal file changes needed to complete the current task.
41
+ - Do NOT create, modify, move, or delete files outside `{{workspace}}` unless the user-defined instructions explicitly require it.
42
+ - Complete all tasks within the time budget. If running low, prioritize memory updates.
@@ -48,6 +48,10 @@ vi.mock("@anthropic-ai/claude-agent-sdk", () => ({
48
48
  query: queryMock,
49
49
  }));
50
50
 
51
+ vi.mock("../core/plugin.js", () => ({
52
+ getPluginMcpServers: vi.fn(() => ({})),
53
+ }));
54
+
51
55
  vi.mock("../util/paths.js", () => ({
52
56
  files: {
53
57
  heartbeatState: "/fake/.talon/workspace/memory/heartbeat_state.json",
@@ -184,6 +188,23 @@ describe("forceHeartbeat", () => {
184
188
  expect(finalState.status).toBe("idle");
185
189
  });
186
190
 
191
+ it("passes plugin MCP servers to the agent via getPluginMcpServers", async () => {
192
+ const { getPluginMcpServers } = await import("../core/plugin.js");
193
+ const mockServers = {
194
+ "email-tools": { command: "node", args: ["email.js"], env: {} },
195
+ };
196
+ vi.mocked(getPluginMcpServers).mockReturnValue(mockServers);
197
+
198
+ await forceHeartbeat();
199
+
200
+ expect(getPluginMcpServers).toHaveBeenCalledWith("", "heartbeat");
201
+ // Verify mcpServers was passed through to query()
202
+ const queryCall = queryMock.mock.calls[0] as unknown as [
203
+ { options: { mcpServers: Record<string, unknown> } },
204
+ ];
205
+ expect(queryCall[0].options.mcpServers).toEqual(mockServers);
206
+ });
207
+
187
208
  it("preserves previous last_run on failure", async () => {
188
209
  const previousLastRun = Date.now() - 3600_000;
189
210
  existsSyncMock.mockReturnValue(true);
@@ -0,0 +1,199 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+
3
+ // ── Module mocks ──────────────────────────────────────────────────────────
4
+
5
+ vi.mock("../util/log.js", () => ({
6
+ log: vi.fn(),
7
+ logError: vi.fn(),
8
+ logWarn: vi.fn(),
9
+ logDebug: vi.fn(),
10
+ }));
11
+
12
+ vi.mock("write-file-atomic", () => ({
13
+ default: { sync: vi.fn() },
14
+ }));
15
+
16
+ // Mock cheerio (required by gateway-actions via extractText)
17
+ vi.mock("cheerio", () => ({
18
+ load: vi.fn(() => {
19
+ const $ = (sel: string) => ({
20
+ remove: vi.fn(),
21
+ text: () => "",
22
+ });
23
+ ($ as any).root = vi.fn();
24
+ return $;
25
+ }),
26
+ }));
27
+
28
+ // Mock storage modules required by gateway-actions
29
+ vi.mock("../storage/history.js", () => ({
30
+ getRecentFormatted: vi.fn(() => ""),
31
+ searchHistory: vi.fn(() => ""),
32
+ getMessagesByUser: vi.fn(() => ""),
33
+ getKnownUsers: vi.fn(() => ""),
34
+ }));
35
+ vi.mock("../storage/media-index.js", () => ({
36
+ formatMediaIndex: vi.fn(() => ""),
37
+ }));
38
+ vi.mock("../storage/cron-store.js", () => ({
39
+ addCronJob: vi.fn(),
40
+ getCronJob: vi.fn(),
41
+ getCronJobsForChat: vi.fn(() => []),
42
+ updateCronJob: vi.fn(),
43
+ deleteCronJob: vi.fn(),
44
+ validateCronExpression: vi.fn(() => ({ valid: true })),
45
+ generateCronId: vi.fn(() => "test-id"),
46
+ }));
47
+
48
+ // ── Plugin mocking ──────────────────────────────────────────────────────
49
+
50
+ const DEFAULT_CONFIG = {
51
+ model: "claude-opus-4-6",
52
+ frontend: "telegram",
53
+ plugins: [],
54
+ systemPrompt: "test prompt",
55
+ };
56
+
57
+ const mockReloadPlugins = vi.fn(async () => ({
58
+ names: ["extras", "brave-search"],
59
+ config: { ...DEFAULT_CONFIG },
60
+ }));
61
+ const mockGetPluginPromptAdditions = vi.fn(() => "prompt additions");
62
+ const mockRebuildSystemPrompt = vi.fn();
63
+ const mockUpdateSystemPrompt = vi.fn();
64
+
65
+ vi.mock("../core/plugin.js", () => ({
66
+ reloadPlugins: (...args: unknown[]) =>
67
+ mockReloadPlugins(...(args as Parameters<typeof mockReloadPlugins>)),
68
+ getPluginPromptAdditions: () => mockGetPluginPromptAdditions(),
69
+ }));
70
+
71
+ vi.mock("../util/config.js", () => ({
72
+ rebuildSystemPrompt: (...args: unknown[]) =>
73
+ mockRebuildSystemPrompt(
74
+ ...(args as Parameters<typeof mockRebuildSystemPrompt>),
75
+ ),
76
+ }));
77
+
78
+ vi.mock("../backend/claude-sdk/index.js", () => ({
79
+ updateSystemPrompt: (...args: unknown[]) =>
80
+ mockUpdateSystemPrompt(
81
+ ...(args as Parameters<typeof mockUpdateSystemPrompt>),
82
+ ),
83
+ }));
84
+
85
+ // ── Import after mocks ────────────────────────────────────────────────────
86
+
87
+ import { handleSharedAction } from "../core/gateway-actions.js";
88
+
89
+ // ── Tests ─────────────────────────────────────────────────────────────────
90
+
91
+ describe("reload_plugins gateway action", () => {
92
+ beforeEach(() => {
93
+ vi.resetAllMocks();
94
+ // Re-establish default implementations after reset
95
+ mockReloadPlugins.mockImplementation(async () => ({
96
+ names: ["extras", "brave-search"],
97
+ config: { ...DEFAULT_CONFIG },
98
+ }));
99
+ mockGetPluginPromptAdditions.mockReturnValue("prompt additions");
100
+ mockRebuildSystemPrompt.mockImplementation(() => {});
101
+ mockUpdateSystemPrompt.mockImplementation(() => {});
102
+ });
103
+
104
+ it("returns loaded plugin names on success", async () => {
105
+ const result = await handleSharedAction(
106
+ { action: "reload_plugins" },
107
+ 12345,
108
+ );
109
+ expect(result).not.toBeNull();
110
+ expect(result!.ok).toBe(true);
111
+ expect(result!.text).toContain("Plugins reloaded successfully");
112
+ expect(result!.text).toContain("extras");
113
+ expect(result!.text).toContain("brave-search");
114
+ expect(result!.text).toContain("(2)");
115
+ });
116
+
117
+ it("calls reloadPlugins without explicit frontends (derived from config)", async () => {
118
+ await handleSharedAction({ action: "reload_plugins" }, 12345);
119
+ // Gateway no longer passes frontends — reloadPlugins derives them from config
120
+ expect(mockReloadPlugins).toHaveBeenCalledWith();
121
+ });
122
+
123
+ it("rebuilds system prompt after reloading", async () => {
124
+ await handleSharedAction({ action: "reload_plugins" }, 12345);
125
+ expect(mockRebuildSystemPrompt).toHaveBeenCalledTimes(1);
126
+ expect(mockGetPluginPromptAdditions).toHaveBeenCalledTimes(1);
127
+ });
128
+
129
+ it("updates backend system prompt after rebuild", async () => {
130
+ await handleSharedAction({ action: "reload_plugins" }, 12345);
131
+ expect(mockUpdateSystemPrompt).toHaveBeenCalledTimes(1);
132
+ });
133
+
134
+ it("returns error when reloadPlugins throws", async () => {
135
+ mockReloadPlugins.mockRejectedValueOnce(
136
+ new Error("Config validation failed"),
137
+ );
138
+ const result = await handleSharedAction(
139
+ { action: "reload_plugins" },
140
+ 12345,
141
+ );
142
+ expect(result).not.toBeNull();
143
+ expect(result!.ok).toBe(false);
144
+ expect(result!.error).toContain("Config validation failed");
145
+ });
146
+
147
+ it("returns error when config is malformed", async () => {
148
+ mockReloadPlugins.mockRejectedValueOnce(
149
+ new Error("Invalid JSON in config"),
150
+ );
151
+ const result = await handleSharedAction(
152
+ { action: "reload_plugins" },
153
+ 12345,
154
+ );
155
+ expect(result!.ok).toBe(false);
156
+ expect(result!.error).toContain("Invalid JSON in config");
157
+ });
158
+
159
+ it("reports zero plugins when none configured", async () => {
160
+ mockReloadPlugins.mockImplementation(async () => ({
161
+ names: [],
162
+ config: { ...DEFAULT_CONFIG },
163
+ }));
164
+ const result = await handleSharedAction(
165
+ { action: "reload_plugins" },
166
+ 12345,
167
+ );
168
+ expect(result!.ok).toBe(true);
169
+ expect(result!.text).toContain("(0)");
170
+ expect(result!.text).toContain("(none)");
171
+ });
172
+ });
173
+
174
+ // ── Admin tool description tests ──────────────────────────────────────────
175
+
176
+ describe("admin tool description", () => {
177
+ it("does not mention session reset or MCP subprocesses", async () => {
178
+ const { adminTools } = await import("../core/tools/admin.js");
179
+ const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
180
+ expect(reloadTool).toBeDefined();
181
+ expect(reloadTool!.description).not.toContain("resets sessions");
182
+ expect(reloadTool!.description).not.toContain("sessions reset");
183
+ expect(reloadTool!.description).not.toContain("MCP subprocesses");
184
+ expect(reloadTool!.description).toContain("without restarting");
185
+ expect(reloadTool!.description).toContain("without downtime");
186
+ });
187
+
188
+ it("mentions env var cleanup", async () => {
189
+ const { adminTools } = await import("../core/tools/admin.js");
190
+ const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
191
+ expect(reloadTool!.description).toContain("env vars");
192
+ });
193
+
194
+ it("has admin tag", async () => {
195
+ const { adminTools } = await import("../core/tools/admin.js");
196
+ const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
197
+ expect(reloadTool!.tag).toBe("admin");
198
+ });
199
+ });
@@ -72,7 +72,6 @@ describe("sessions", () => {
72
72
  expect(session.usage.totalCacheRead).toBe(0);
73
73
  expect(session.usage.totalCacheWrite).toBe(0);
74
74
  expect(session.usage.lastPromptTokens).toBe(0);
75
- expect(session.usage.estimatedCostUsd).toBe(0);
76
75
  expect(session.usage.totalResponseMs).toBe(0);
77
76
  expect(session.usage.lastResponseMs).toBe(0);
78
77
  expect(session.usage.fastestResponseMs).toBe(Infinity);
@@ -151,20 +150,6 @@ describe("sessions", () => {
151
150
  expect(getSession(chatId).usage.lastPromptTokens).toBe(250);
152
151
  });
153
152
 
154
- it("calculates estimated cost", () => {
155
- const chatId = "test-cost";
156
- getSession(chatId);
157
-
158
- recordUsage(chatId, {
159
- inputTokens: 1_000_000,
160
- outputTokens: 0,
161
- cacheRead: 0,
162
- cacheWrite: 0,
163
- });
164
- // Cost for 1M input tokens at $3/M = $3
165
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
166
- });
167
-
168
153
  it("tracks response time duration", () => {
169
154
  const chatId = "test-duration";
170
155
  getSession(chatId);
@@ -251,98 +236,129 @@ describe("sessions", () => {
251
236
  });
252
237
  });
253
238
 
254
- describe("recordUsage with model pricing", () => {
255
- it("applies haiku pricing for haiku model", () => {
256
- const chatId = "test-haiku-pricing";
239
+ describe("recordUsage model tracking", () => {
240
+ it("tracks lastModel", () => {
241
+ const chatId = "test-last-model";
257
242
  getSession(chatId);
258
243
 
259
244
  recordUsage(chatId, {
260
- inputTokens: 1_000_000,
261
- outputTokens: 0,
245
+ inputTokens: 100,
246
+ outputTokens: 50,
262
247
  cacheRead: 0,
263
248
  cacheWrite: 0,
264
- model: "claude-haiku-4-5",
249
+ model: "claude-opus-4-6",
265
250
  });
266
- // Haiku input: $0.8/M
267
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(0.8, 1);
251
+
252
+ expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
268
253
  });
269
254
 
270
- it("applies opus pricing for opus model", () => {
271
- const chatId = "test-opus-pricing";
255
+ it("updates fastestResponseMs correctly across turns", () => {
256
+ const chatId = "test-fastest-response";
272
257
  getSession(chatId);
273
258
 
274
259
  recordUsage(chatId, {
275
- inputTokens: 1_000_000,
276
- outputTokens: 0,
260
+ inputTokens: 100,
261
+ outputTokens: 50,
277
262
  cacheRead: 0,
278
263
  cacheWrite: 0,
279
- model: "claude-opus-4-6",
264
+ durationMs: 2000,
280
265
  });
281
- // Opus input: $15/M
282
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
283
- });
284
266
 
285
- it("applies sonnet pricing by default (no model)", () => {
286
- const chatId = "test-sonnet-pricing-default";
287
- getSession(chatId);
267
+ recordUsage(chatId, {
268
+ inputTokens: 100,
269
+ outputTokens: 50,
270
+ cacheRead: 0,
271
+ cacheWrite: 0,
272
+ durationMs: 500,
273
+ });
288
274
 
289
275
  recordUsage(chatId, {
290
- inputTokens: 1_000_000,
291
- outputTokens: 0,
276
+ inputTokens: 100,
277
+ outputTokens: 50,
292
278
  cacheRead: 0,
293
279
  cacheWrite: 0,
280
+ durationMs: 1000,
281
+ });
282
+
283
+ const usage = getSession(chatId).usage;
284
+ expect(usage.fastestResponseMs).toBe(500);
285
+ expect(usage.lastResponseMs).toBe(1000);
286
+ expect(usage.totalResponseMs).toBe(3500);
287
+ });
288
+ });
289
+
290
+ describe("recordUsage — context tracking fields", () => {
291
+ it("stores contextTokens from SDK iteration data", () => {
292
+ const chatId = "test-ctx-tokens";
293
+ getSession(chatId);
294
+
295
+ recordUsage(chatId, {
296
+ inputTokens: 100,
297
+ outputTokens: 50,
298
+ cacheRead: 10,
299
+ cacheWrite: 5,
300
+ contextTokens: 85000,
294
301
  });
295
- // Sonnet input: $3/M
296
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
302
+
303
+ expect(getSession(chatId).usage.contextTokens).toBe(85000);
297
304
  });
298
305
 
299
- it("calculates output cost correctly", () => {
300
- const chatId = "test-output-cost";
306
+ it("stores contextWindow from SDK modelUsage", () => {
307
+ const chatId = "test-ctx-window";
301
308
  getSession(chatId);
302
309
 
303
310
  recordUsage(chatId, {
304
- inputTokens: 0,
305
- outputTokens: 1_000_000,
311
+ inputTokens: 100,
312
+ outputTokens: 50,
306
313
  cacheRead: 0,
307
314
  cacheWrite: 0,
308
- model: "claude-sonnet-4-6",
315
+ contextWindow: 1_000_000,
309
316
  });
310
- // Sonnet output: $15/M
311
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
317
+
318
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
312
319
  });
313
320
 
314
- it("calculates cache read cost correctly", () => {
315
- const chatId = "test-cache-read-cost";
321
+ it("stores numApiCalls from SDK num_turns", () => {
322
+ const chatId = "test-num-api-calls";
316
323
  getSession(chatId);
317
324
 
318
325
  recordUsage(chatId, {
319
- inputTokens: 0,
320
- outputTokens: 0,
321
- cacheRead: 1_000_000,
326
+ inputTokens: 100,
327
+ outputTokens: 50,
328
+ cacheRead: 0,
322
329
  cacheWrite: 0,
323
- model: "claude-sonnet-4-6",
330
+ numApiCalls: 3,
324
331
  });
325
- // Sonnet cacheRead: $0.3/M
326
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(0.3, 2);
332
+
333
+ expect(getSession(chatId).usage.numApiCalls).toBe(3);
327
334
  });
328
335
 
329
- it("calculates cache write cost correctly", () => {
330
- const chatId = "test-cache-write-cost";
336
+ it("resets contextTokens to 0 when not provided", () => {
337
+ const chatId = "test-ctx-tokens-reset";
331
338
  getSession(chatId);
332
339
 
340
+ // First turn with context data
333
341
  recordUsage(chatId, {
334
- inputTokens: 0,
335
- outputTokens: 0,
342
+ inputTokens: 100,
343
+ outputTokens: 50,
336
344
  cacheRead: 0,
337
- cacheWrite: 1_000_000,
338
- model: "claude-sonnet-4-6",
345
+ cacheWrite: 0,
346
+ contextTokens: 50000,
339
347
  });
340
- // Sonnet cacheWrite: $3.75/M
341
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3.75, 2);
348
+ expect(getSession(chatId).usage.contextTokens).toBe(50000);
349
+
350
+ // Second turn without context data — resets to 0
351
+ recordUsage(chatId, {
352
+ inputTokens: 200,
353
+ outputTokens: 100,
354
+ cacheRead: 0,
355
+ cacheWrite: 0,
356
+ });
357
+ expect(getSession(chatId).usage.contextTokens).toBe(0);
342
358
  });
343
359
 
344
- it("tracks lastModel", () => {
345
- const chatId = "test-last-model";
360
+ it("preserves contextWindow across turns when not reported", () => {
361
+ const chatId = "test-ctx-window-preserve";
346
362
  getSession(chatId);
347
363
 
348
364
  recordUsage(chatId, {
@@ -350,44 +366,75 @@ describe("sessions", () => {
350
366
  outputTokens: 50,
351
367
  cacheRead: 0,
352
368
  cacheWrite: 0,
353
- model: "claude-opus-4-6",
369
+ contextWindow: 1_000_000,
354
370
  });
371
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
355
372
 
356
- expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
373
+ // Turn without contextWindow — preserves previous value
374
+ recordUsage(chatId, {
375
+ inputTokens: 200,
376
+ outputTokens: 100,
377
+ cacheRead: 0,
378
+ cacheWrite: 0,
379
+ });
380
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
357
381
  });
358
382
 
359
- it("updates fastestResponseMs correctly across turns", () => {
360
- const chatId = "test-fastest-response";
383
+ it("rejects non-finite contextWindow values and keeps previous", () => {
384
+ const chatId = "test-ctx-window-nan";
361
385
  getSession(chatId);
362
386
 
387
+ // Set a valid contextWindow first
363
388
  recordUsage(chatId, {
364
389
  inputTokens: 100,
365
390
  outputTokens: 50,
366
391
  cacheRead: 0,
367
392
  cacheWrite: 0,
368
- durationMs: 2000,
393
+ contextWindow: 1_000_000,
369
394
  });
395
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
370
396
 
397
+ // NaN should not overwrite
371
398
  recordUsage(chatId, {
372
399
  inputTokens: 100,
373
400
  outputTokens: 50,
374
401
  cacheRead: 0,
375
402
  cacheWrite: 0,
376
- durationMs: 500,
403
+ contextWindow: NaN,
377
404
  });
405
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
378
406
 
407
+ // Infinity should not overwrite
379
408
  recordUsage(chatId, {
380
409
  inputTokens: 100,
381
410
  outputTokens: 50,
382
411
  cacheRead: 0,
383
412
  cacheWrite: 0,
384
- durationMs: 1000,
413
+ contextWindow: Infinity,
385
414
  });
415
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
416
+ });
386
417
 
387
- const usage = getSession(chatId).usage;
388
- expect(usage.fastestResponseMs).toBe(500);
389
- expect(usage.lastResponseMs).toBe(1000);
390
- expect(usage.totalResponseMs).toBe(3500);
418
+ it("rejects negative contextWindow values and keeps previous", () => {
419
+ const chatId = "test-ctx-window-neg";
420
+ getSession(chatId);
421
+
422
+ recordUsage(chatId, {
423
+ inputTokens: 100,
424
+ outputTokens: 50,
425
+ cacheRead: 0,
426
+ cacheWrite: 0,
427
+ contextWindow: 200_000,
428
+ });
429
+
430
+ recordUsage(chatId, {
431
+ inputTokens: 100,
432
+ outputTokens: 50,
433
+ cacheRead: 0,
434
+ cacheWrite: 0,
435
+ contextWindow: -100,
436
+ });
437
+ expect(getSession(chatId).usage.contextWindow).toBe(200_000);
391
438
  });
392
439
  });
393
440
 
@@ -484,52 +531,6 @@ describe("sessions", () => {
484
531
  });
485
532
  });
486
533
 
487
- describe("cost calculation math", () => {
488
- it("calculates multi-component cost correctly (input + output + cache)", () => {
489
- const chatId = "test-cost-math";
490
- getSession(chatId);
491
-
492
- // Use exact token counts to verify the formula:
493
- // cost = (input * pricing.input + cacheWrite * pricing.cacheWrite +
494
- // cacheRead * pricing.cacheRead + output * pricing.output) / 1_000_000
495
- // Sonnet: input=$3/M, output=$15/M, cacheRead=$0.3/M, cacheWrite=$3.75/M
496
- recordUsage(chatId, {
497
- inputTokens: 500_000, // 500k * 3 / 1M = $1.50
498
- outputTokens: 100_000, // 100k * 15 / 1M = $1.50
499
- cacheRead: 200_000, // 200k * 0.3 / 1M = $0.06
500
- cacheWrite: 100_000, // 100k * 3.75 / 1M = $0.375
501
- model: "claude-sonnet-4-6",
502
- });
503
-
504
- const usage = getSession(chatId).usage;
505
- // Total: 1.50 + 1.50 + 0.06 + 0.375 = $3.435
506
- expect(usage.estimatedCostUsd).toBeCloseTo(3.435, 3);
507
- });
508
-
509
- it("accumulates cost across multiple recordUsage calls", () => {
510
- const chatId = "test-cost-accum";
511
- getSession(chatId);
512
-
513
- recordUsage(chatId, {
514
- inputTokens: 1_000_000,
515
- outputTokens: 0,
516
- cacheRead: 0,
517
- cacheWrite: 0,
518
- });
519
- // Sonnet input: $3
520
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 2);
521
-
522
- recordUsage(chatId, {
523
- inputTokens: 0,
524
- outputTokens: 1_000_000,
525
- cacheRead: 0,
526
- cacheWrite: 0,
527
- });
528
- // + Sonnet output: $15. Total: $18
529
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(18, 2);
530
- });
531
- });
532
-
533
534
  describe("cache hit rate tracking", () => {
534
535
  it("tracks cache read tokens across multiple turns", () => {
535
536
  const chatId = "test-cache-track-read";
@@ -571,7 +572,6 @@ describe("sessions", () => {
571
572
  const fresh = getSession(chatId);
572
573
  expect(fresh.sessionId).toBeUndefined();
573
574
  expect(fresh.turns).toBe(0);
574
- expect(fresh.usage.estimatedCostUsd).toBe(0);
575
575
  expect(fresh.usage.totalInputTokens).toBe(0);
576
576
  });
577
577
  });
@@ -642,6 +642,40 @@ describe("sessions — migration of legacy field formats", () => {
642
642
  expect(session.createdAt).toBe(9999999);
643
643
  });
644
644
 
645
+ it("backfills missing context tracking fields on legacy sessions", () => {
646
+ vi.mocked(existsSync).mockReturnValueOnce(true);
647
+ vi.mocked(readFileSync).mockReturnValueOnce(
648
+ JSON.stringify({
649
+ "migrate-chat-ctx": {
650
+ sessionId: undefined,
651
+ turns: 4,
652
+ lastActive: 2000,
653
+ createdAt: 2000,
654
+ usage: {
655
+ totalInputTokens: 100,
656
+ totalOutputTokens: 50,
657
+ totalCacheRead: 10,
658
+ totalCacheWrite: 5,
659
+ lastPromptTokens: 115,
660
+ estimatedCostUsd: 0.5,
661
+ totalResponseMs: 1000,
662
+ lastResponseMs: 500,
663
+ fastestResponseMs: 500,
664
+ // contextTokens, contextWindow, numApiCalls deliberately omitted
665
+ },
666
+ },
667
+ }),
668
+ );
669
+ loadSessions();
670
+ const session = getSession("migrate-chat-ctx");
671
+ expect(session.usage.contextTokens).toBe(0);
672
+ expect(session.usage.contextWindow).toBe(0);
673
+ expect(session.usage.numApiCalls).toBe(0);
674
+ // Existing fields should be preserved
675
+ expect(session.usage.totalInputTokens).toBe(100);
676
+ expect(session.usage.lastPromptTokens).toBe(115);
677
+ });
678
+
645
679
  it("fixes fastestResponseMs of 0 to Infinity", () => {
646
680
  vi.mocked(existsSync).mockReturnValueOnce(true);
647
681
  vi.mocked(readFileSync).mockReturnValueOnce(