talon-agent 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/package.json +2 -2
  2. package/prompts/heartbeat.md +18 -6
  3. package/src/__tests__/chat-settings.test.ts +20 -7
  4. package/src/__tests__/fuzz.test.ts +3 -0
  5. package/src/__tests__/heartbeat.test.ts +21 -0
  6. package/src/__tests__/reload-plugins.test.ts +205 -0
  7. package/src/__tests__/sessions.test.ts +155 -121
  8. package/src/backend/claude-sdk/constants.ts +63 -0
  9. package/src/backend/claude-sdk/handler.ts +236 -0
  10. package/src/backend/claude-sdk/index.ts +10 -423
  11. package/src/backend/claude-sdk/models.ts +216 -0
  12. package/src/backend/claude-sdk/options.ts +129 -0
  13. package/src/backend/claude-sdk/state.ts +59 -0
  14. package/src/backend/claude-sdk/stream.ts +221 -0
  15. package/src/backend/claude-sdk/warm.ts +89 -0
  16. package/src/bootstrap.ts +22 -108
  17. package/src/cli.ts +30 -15
  18. package/src/core/dream.ts +5 -17
  19. package/src/core/gateway-actions.ts +34 -2
  20. package/src/core/gateway.ts +5 -2
  21. package/src/core/heartbeat.ts +12 -22
  22. package/src/core/models.ts +149 -0
  23. package/src/core/plugin.ts +147 -0
  24. package/src/core/tools/admin.ts +22 -0
  25. package/src/core/tools/index.ts +2 -0
  26. package/src/core/tools/types.ts +2 -1
  27. package/src/core/types.ts +4 -0
  28. package/src/frontend/teams/index.ts +7 -10
  29. package/src/frontend/telegram/callbacks.ts +15 -27
  30. package/src/frontend/telegram/commands.ts +32 -36
  31. package/src/frontend/telegram/helpers.ts +13 -15
  32. package/src/frontend/telegram/index.ts +1 -1
  33. package/src/frontend/terminal/commands.ts +7 -4
  34. package/src/index.ts +2 -1
  35. package/src/storage/chat-settings.ts +5 -19
  36. package/src/storage/sessions.ts +34 -40
@@ -72,7 +72,6 @@ describe("sessions", () => {
72
72
  expect(session.usage.totalCacheRead).toBe(0);
73
73
  expect(session.usage.totalCacheWrite).toBe(0);
74
74
  expect(session.usage.lastPromptTokens).toBe(0);
75
- expect(session.usage.estimatedCostUsd).toBe(0);
76
75
  expect(session.usage.totalResponseMs).toBe(0);
77
76
  expect(session.usage.lastResponseMs).toBe(0);
78
77
  expect(session.usage.fastestResponseMs).toBe(Infinity);
@@ -151,20 +150,6 @@ describe("sessions", () => {
151
150
  expect(getSession(chatId).usage.lastPromptTokens).toBe(250);
152
151
  });
153
152
 
154
- it("calculates estimated cost", () => {
155
- const chatId = "test-cost";
156
- getSession(chatId);
157
-
158
- recordUsage(chatId, {
159
- inputTokens: 1_000_000,
160
- outputTokens: 0,
161
- cacheRead: 0,
162
- cacheWrite: 0,
163
- });
164
- // Cost for 1M input tokens at $3/M = $3
165
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
166
- });
167
-
168
153
  it("tracks response time duration", () => {
169
154
  const chatId = "test-duration";
170
155
  getSession(chatId);
@@ -251,98 +236,129 @@ describe("sessions", () => {
251
236
  });
252
237
  });
253
238
 
254
- describe("recordUsage with model pricing", () => {
255
- it("applies haiku pricing for haiku model", () => {
256
- const chatId = "test-haiku-pricing";
239
+ describe("recordUsage model tracking", () => {
240
+ it("tracks lastModel", () => {
241
+ const chatId = "test-last-model";
257
242
  getSession(chatId);
258
243
 
259
244
  recordUsage(chatId, {
260
- inputTokens: 1_000_000,
261
- outputTokens: 0,
245
+ inputTokens: 100,
246
+ outputTokens: 50,
262
247
  cacheRead: 0,
263
248
  cacheWrite: 0,
264
- model: "claude-haiku-4-5",
249
+ model: "claude-opus-4-6",
265
250
  });
266
- // Haiku input: $0.8/M
267
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(0.8, 1);
251
+
252
+ expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
268
253
  });
269
254
 
270
- it("applies opus pricing for opus model", () => {
271
- const chatId = "test-opus-pricing";
255
+ it("updates fastestResponseMs correctly across turns", () => {
256
+ const chatId = "test-fastest-response";
272
257
  getSession(chatId);
273
258
 
274
259
  recordUsage(chatId, {
275
- inputTokens: 1_000_000,
276
- outputTokens: 0,
260
+ inputTokens: 100,
261
+ outputTokens: 50,
277
262
  cacheRead: 0,
278
263
  cacheWrite: 0,
279
- model: "claude-opus-4-6",
264
+ durationMs: 2000,
280
265
  });
281
- // Opus input: $15/M
282
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
283
- });
284
266
 
285
- it("applies sonnet pricing by default (no model)", () => {
286
- const chatId = "test-sonnet-pricing-default";
287
- getSession(chatId);
267
+ recordUsage(chatId, {
268
+ inputTokens: 100,
269
+ outputTokens: 50,
270
+ cacheRead: 0,
271
+ cacheWrite: 0,
272
+ durationMs: 500,
273
+ });
288
274
 
289
275
  recordUsage(chatId, {
290
- inputTokens: 1_000_000,
291
- outputTokens: 0,
276
+ inputTokens: 100,
277
+ outputTokens: 50,
292
278
  cacheRead: 0,
293
279
  cacheWrite: 0,
280
+ durationMs: 1000,
281
+ });
282
+
283
+ const usage = getSession(chatId).usage;
284
+ expect(usage.fastestResponseMs).toBe(500);
285
+ expect(usage.lastResponseMs).toBe(1000);
286
+ expect(usage.totalResponseMs).toBe(3500);
287
+ });
288
+ });
289
+
290
+ describe("recordUsage — context tracking fields", () => {
291
+ it("stores contextTokens from SDK iteration data", () => {
292
+ const chatId = "test-ctx-tokens";
293
+ getSession(chatId);
294
+
295
+ recordUsage(chatId, {
296
+ inputTokens: 100,
297
+ outputTokens: 50,
298
+ cacheRead: 10,
299
+ cacheWrite: 5,
300
+ contextTokens: 85000,
294
301
  });
295
- // Sonnet input: $3/M
296
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 1);
302
+
303
+ expect(getSession(chatId).usage.contextTokens).toBe(85000);
297
304
  });
298
305
 
299
- it("calculates output cost correctly", () => {
300
- const chatId = "test-output-cost";
306
+ it("stores contextWindow from SDK modelUsage", () => {
307
+ const chatId = "test-ctx-window";
301
308
  getSession(chatId);
302
309
 
303
310
  recordUsage(chatId, {
304
- inputTokens: 0,
305
- outputTokens: 1_000_000,
311
+ inputTokens: 100,
312
+ outputTokens: 50,
306
313
  cacheRead: 0,
307
314
  cacheWrite: 0,
308
- model: "claude-sonnet-4-6",
315
+ contextWindow: 1_000_000,
309
316
  });
310
- // Sonnet output: $15/M
311
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(15, 1);
317
+
318
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
312
319
  });
313
320
 
314
- it("calculates cache read cost correctly", () => {
315
- const chatId = "test-cache-read-cost";
321
+ it("stores numApiCalls from SDK num_turns", () => {
322
+ const chatId = "test-num-api-calls";
316
323
  getSession(chatId);
317
324
 
318
325
  recordUsage(chatId, {
319
- inputTokens: 0,
320
- outputTokens: 0,
321
- cacheRead: 1_000_000,
326
+ inputTokens: 100,
327
+ outputTokens: 50,
328
+ cacheRead: 0,
322
329
  cacheWrite: 0,
323
- model: "claude-sonnet-4-6",
330
+ numApiCalls: 3,
324
331
  });
325
- // Sonnet cacheRead: $0.3/M
326
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(0.3, 2);
332
+
333
+ expect(getSession(chatId).usage.numApiCalls).toBe(3);
327
334
  });
328
335
 
329
- it("calculates cache write cost correctly", () => {
330
- const chatId = "test-cache-write-cost";
336
+ it("resets contextTokens to 0 when not provided", () => {
337
+ const chatId = "test-ctx-tokens-reset";
331
338
  getSession(chatId);
332
339
 
340
+ // First turn with context data
333
341
  recordUsage(chatId, {
334
- inputTokens: 0,
335
- outputTokens: 0,
342
+ inputTokens: 100,
343
+ outputTokens: 50,
336
344
  cacheRead: 0,
337
- cacheWrite: 1_000_000,
338
- model: "claude-sonnet-4-6",
345
+ cacheWrite: 0,
346
+ contextTokens: 50000,
339
347
  });
340
- // Sonnet cacheWrite: $3.75/M
341
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3.75, 2);
348
+ expect(getSession(chatId).usage.contextTokens).toBe(50000);
349
+
350
+ // Second turn without context data — resets to 0
351
+ recordUsage(chatId, {
352
+ inputTokens: 200,
353
+ outputTokens: 100,
354
+ cacheRead: 0,
355
+ cacheWrite: 0,
356
+ });
357
+ expect(getSession(chatId).usage.contextTokens).toBe(0);
342
358
  });
343
359
 
344
- it("tracks lastModel", () => {
345
- const chatId = "test-last-model";
360
+ it("preserves contextWindow across turns when not reported", () => {
361
+ const chatId = "test-ctx-window-preserve";
346
362
  getSession(chatId);
347
363
 
348
364
  recordUsage(chatId, {
@@ -350,44 +366,75 @@ describe("sessions", () => {
350
366
  outputTokens: 50,
351
367
  cacheRead: 0,
352
368
  cacheWrite: 0,
353
- model: "claude-opus-4-6",
369
+ contextWindow: 1_000_000,
354
370
  });
371
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
355
372
 
356
- expect(getSession(chatId).lastModel).toBe("claude-opus-4-6");
373
+ // Turn without contextWindow — preserves previous value
374
+ recordUsage(chatId, {
375
+ inputTokens: 200,
376
+ outputTokens: 100,
377
+ cacheRead: 0,
378
+ cacheWrite: 0,
379
+ });
380
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
357
381
  });
358
382
 
359
- it("updates fastestResponseMs correctly across turns", () => {
360
- const chatId = "test-fastest-response";
383
+ it("rejects non-finite contextWindow values and keeps previous", () => {
384
+ const chatId = "test-ctx-window-nan";
361
385
  getSession(chatId);
362
386
 
387
+ // Set a valid contextWindow first
363
388
  recordUsage(chatId, {
364
389
  inputTokens: 100,
365
390
  outputTokens: 50,
366
391
  cacheRead: 0,
367
392
  cacheWrite: 0,
368
- durationMs: 2000,
393
+ contextWindow: 1_000_000,
369
394
  });
395
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
370
396
 
397
+ // NaN should not overwrite
371
398
  recordUsage(chatId, {
372
399
  inputTokens: 100,
373
400
  outputTokens: 50,
374
401
  cacheRead: 0,
375
402
  cacheWrite: 0,
376
- durationMs: 500,
403
+ contextWindow: NaN,
377
404
  });
405
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
378
406
 
407
+ // Infinity should not overwrite
379
408
  recordUsage(chatId, {
380
409
  inputTokens: 100,
381
410
  outputTokens: 50,
382
411
  cacheRead: 0,
383
412
  cacheWrite: 0,
384
- durationMs: 1000,
413
+ contextWindow: Infinity,
385
414
  });
415
+ expect(getSession(chatId).usage.contextWindow).toBe(1_000_000);
416
+ });
386
417
 
387
- const usage = getSession(chatId).usage;
388
- expect(usage.fastestResponseMs).toBe(500);
389
- expect(usage.lastResponseMs).toBe(1000);
390
- expect(usage.totalResponseMs).toBe(3500);
418
+ it("rejects negative contextWindow values and keeps previous", () => {
419
+ const chatId = "test-ctx-window-neg";
420
+ getSession(chatId);
421
+
422
+ recordUsage(chatId, {
423
+ inputTokens: 100,
424
+ outputTokens: 50,
425
+ cacheRead: 0,
426
+ cacheWrite: 0,
427
+ contextWindow: 200_000,
428
+ });
429
+
430
+ recordUsage(chatId, {
431
+ inputTokens: 100,
432
+ outputTokens: 50,
433
+ cacheRead: 0,
434
+ cacheWrite: 0,
435
+ contextWindow: -100,
436
+ });
437
+ expect(getSession(chatId).usage.contextWindow).toBe(200_000);
391
438
  });
392
439
  });
393
440
 
@@ -484,52 +531,6 @@ describe("sessions", () => {
484
531
  });
485
532
  });
486
533
 
487
- describe("cost calculation math", () => {
488
- it("calculates multi-component cost correctly (input + output + cache)", () => {
489
- const chatId = "test-cost-math";
490
- getSession(chatId);
491
-
492
- // Use exact token counts to verify the formula:
493
- // cost = (input * pricing.input + cacheWrite * pricing.cacheWrite +
494
- // cacheRead * pricing.cacheRead + output * pricing.output) / 1_000_000
495
- // Sonnet: input=$3/M, output=$15/M, cacheRead=$0.3/M, cacheWrite=$3.75/M
496
- recordUsage(chatId, {
497
- inputTokens: 500_000, // 500k * 3 / 1M = $1.50
498
- outputTokens: 100_000, // 100k * 15 / 1M = $1.50
499
- cacheRead: 200_000, // 200k * 0.3 / 1M = $0.06
500
- cacheWrite: 100_000, // 100k * 3.75 / 1M = $0.375
501
- model: "claude-sonnet-4-6",
502
- });
503
-
504
- const usage = getSession(chatId).usage;
505
- // Total: 1.50 + 1.50 + 0.06 + 0.375 = $3.435
506
- expect(usage.estimatedCostUsd).toBeCloseTo(3.435, 3);
507
- });
508
-
509
- it("accumulates cost across multiple recordUsage calls", () => {
510
- const chatId = "test-cost-accum";
511
- getSession(chatId);
512
-
513
- recordUsage(chatId, {
514
- inputTokens: 1_000_000,
515
- outputTokens: 0,
516
- cacheRead: 0,
517
- cacheWrite: 0,
518
- });
519
- // Sonnet input: $3
520
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(3, 2);
521
-
522
- recordUsage(chatId, {
523
- inputTokens: 0,
524
- outputTokens: 1_000_000,
525
- cacheRead: 0,
526
- cacheWrite: 0,
527
- });
528
- // + Sonnet output: $15. Total: $18
529
- expect(getSession(chatId).usage.estimatedCostUsd).toBeCloseTo(18, 2);
530
- });
531
- });
532
-
533
534
  describe("cache hit rate tracking", () => {
534
535
  it("tracks cache read tokens across multiple turns", () => {
535
536
  const chatId = "test-cache-track-read";
@@ -571,7 +572,6 @@ describe("sessions", () => {
571
572
  const fresh = getSession(chatId);
572
573
  expect(fresh.sessionId).toBeUndefined();
573
574
  expect(fresh.turns).toBe(0);
574
- expect(fresh.usage.estimatedCostUsd).toBe(0);
575
575
  expect(fresh.usage.totalInputTokens).toBe(0);
576
576
  });
577
577
  });
@@ -642,6 +642,40 @@ describe("sessions — migration of legacy field formats", () => {
642
642
  expect(session.createdAt).toBe(9999999);
643
643
  });
644
644
 
645
+ it("backfills missing context tracking fields on legacy sessions", () => {
646
+ vi.mocked(existsSync).mockReturnValueOnce(true);
647
+ vi.mocked(readFileSync).mockReturnValueOnce(
648
+ JSON.stringify({
649
+ "migrate-chat-ctx": {
650
+ sessionId: undefined,
651
+ turns: 4,
652
+ lastActive: 2000,
653
+ createdAt: 2000,
654
+ usage: {
655
+ totalInputTokens: 100,
656
+ totalOutputTokens: 50,
657
+ totalCacheRead: 10,
658
+ totalCacheWrite: 5,
659
+ lastPromptTokens: 115,
660
+ estimatedCostUsd: 0.5,
661
+ totalResponseMs: 1000,
662
+ lastResponseMs: 500,
663
+ fastestResponseMs: 500,
664
+ // contextTokens, contextWindow, numApiCalls deliberately omitted
665
+ },
666
+ },
667
+ }),
668
+ );
669
+ loadSessions();
670
+ const session = getSession("migrate-chat-ctx");
671
+ expect(session.usage.contextTokens).toBe(0);
672
+ expect(session.usage.contextWindow).toBe(0);
673
+ expect(session.usage.numApiCalls).toBe(0);
674
+ // Existing fields should be preserved
675
+ expect(session.usage.totalInputTokens).toBe(100);
676
+ expect(session.usage.lastPromptTokens).toBe(115);
677
+ });
678
+
645
679
  it("fixes fastestResponseMs of 0 to Infinity", () => {
646
680
  vi.mocked(existsSync).mockReturnValueOnce(true);
647
681
  vi.mocked(readFileSync).mockReturnValueOnce(
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Shared constants for Claude SDK backend and background agents.
3
+ *
4
+ * Single source of truth for disallowed tool lists, thinking effort
5
+ * configuration, and streaming parameters.
6
+ */
7
+
8
+ // ── Disallowed tool lists ──────────────────────────────────────────────────
9
+
10
+ /**
11
+ * Core tools disallowed in all SDK query contexts (chat, heartbeat, dream).
12
+ * These are interactive or planning-only tools that make no sense in a
13
+ * headless agent context.
14
+ */
15
+ export const DISALLOWED_TOOLS_CORE = [
16
+ "EnterPlanMode",
17
+ "ExitPlanMode",
18
+ "EnterWorktree",
19
+ "ExitWorktree",
20
+ "TodoWrite",
21
+ "TodoRead",
22
+ "TaskCreate",
23
+ "TaskUpdate",
24
+ "TaskGet",
25
+ "TaskList",
26
+ "TaskOutput",
27
+ "TaskStop",
28
+ "AskUserQuestion",
29
+ ] as const;
30
+
31
+ /** Disallowed tools for the main chat handler (core + web tools replaced by Brave MCP). */
32
+ export const DISALLOWED_TOOLS_CHAT = [
33
+ ...DISALLOWED_TOOLS_CORE,
34
+ "WebSearch",
35
+ "WebFetch",
36
+ ] as const;
37
+
38
+ /** Disallowed tools for background agents — heartbeat and dream (core + Agent). */
39
+ export const DISALLOWED_TOOLS_BACKGROUND = [
40
+ ...DISALLOWED_TOOLS_CORE,
41
+ "Agent",
42
+ ] as const;
43
+
44
+ // ── Thinking / effort configuration ────────────────────────────────────────
45
+
46
+ export const EFFORT_MAP: Record<
47
+ string,
48
+ {
49
+ thinking: { type: "adaptive" | "disabled" };
50
+ effort?: "low" | "medium" | "high" | "max";
51
+ }
52
+ > = {
53
+ off: { thinking: { type: "disabled" } },
54
+ low: { thinking: { type: "adaptive" }, effort: "low" },
55
+ medium: { thinking: { type: "adaptive" }, effort: "medium" },
56
+ high: { thinking: { type: "adaptive" }, effort: "high" },
57
+ max: { thinking: { type: "adaptive" }, effort: "max" },
58
+ };
59
+
60
+ // ── Streaming ──────────────────────────────────────────────────────────────
61
+
62
+ /** Minimum interval (ms) between streaming delta callbacks to avoid flooding frontends. */
63
+ export const STREAM_INTERVAL = 1000;
@@ -0,0 +1,236 @@
1
+ /**
2
+ * Main message handler — executes a user query through the Claude Agent SDK.
3
+ *
4
+ * Orchestrates the full lifecycle: prompt formatting, SDK query, stream
5
+ * processing, error recovery (session expired / context overflow / model
6
+ * fallback), token accounting, and session persistence.
7
+ */
8
+
9
+ import { query } from "@anthropic-ai/claude-agent-sdk";
10
+ import {
11
+ getSession,
12
+ incrementTurns,
13
+ recordUsage,
14
+ resetSession,
15
+ setSessionId,
16
+ setSessionName,
17
+ } from "../../storage/sessions.js";
18
+ import { getChatSettings, setChatModel } from "../../storage/chat-settings.js";
19
+ import { classify } from "../../core/errors.js";
20
+ import { getFallbackModel } from "../../core/models.js";
21
+ import { rebuildSystemPrompt } from "../../util/config.js";
22
+ import { getPluginPromptAdditions } from "../../core/plugin.js";
23
+ import { log, logError, logWarn } from "../../util/log.js";
24
+ import { traceMessage } from "../../util/trace.js";
25
+ import { formatFullDatetime } from "../../util/time.js";
26
+
27
+ import type { QueryParams, QueryResult } from "../../core/types.js";
28
+ import { getConfig } from "./state.js";
29
+ import { buildSdkOptions } from "./options.js";
30
+ import {
31
+ createStreamState,
32
+ isSystemInit,
33
+ isStreamEvent,
34
+ isAssistant,
35
+ isResult,
36
+ processStreamDelta,
37
+ processAssistantMessage,
38
+ processResultMessage,
39
+ } from "./stream.js";
40
+
41
+ // ── Main handler ─────────────────────────────────────────────────────────────
42
+
43
+ export async function handleMessage(
44
+ params: QueryParams,
45
+ _retried = false,
46
+ ): Promise<QueryResult> {
47
+ const config = getConfig();
48
+
49
+ const {
50
+ chatId,
51
+ text,
52
+ senderName,
53
+ isGroup,
54
+ onTextBlock,
55
+ onStreamDelta,
56
+ onToolUse,
57
+ } = params;
58
+ const session = getSession(chatId);
59
+ const t0 = Date.now();
60
+
61
+ // Rebuild system prompt on first turn of a new/reset session so identity,
62
+ // memory, and workspace listing are fresh
63
+ if (session.turns === 0) {
64
+ rebuildSystemPrompt(config, getPluginPromptAdditions());
65
+ }
66
+
67
+ const { options, activeModel } = buildSdkOptions(chatId);
68
+
69
+ const msgIdHint = params.messageId ? ` [msg_id:${params.messageId}]` : "";
70
+ const nowTag = `[${formatFullDatetime()}]`;
71
+
72
+ const prompt = isGroup
73
+ ? `${nowTag} [${senderName}]${msgIdHint}: ${text}`
74
+ : `${nowTag}${msgIdHint} ${text}`;
75
+ log("agent", `[${chatId}] <- (${text.length} chars)`);
76
+ traceMessage(chatId, "in", text, { senderName, isGroup });
77
+
78
+ const qi = query({ prompt, options });
79
+ const state = createStreamState();
80
+
81
+ try {
82
+ for await (const message of qi) {
83
+ // Session ID capture
84
+ if (isSystemInit(message)) {
85
+ state.newSessionId = message.session_id;
86
+ continue;
87
+ }
88
+
89
+ // Stream text deltas and thinking deltas
90
+ if (isStreamEvent(message)) {
91
+ processStreamDelta(message, state, onStreamDelta);
92
+ continue;
93
+ }
94
+
95
+ // Complete assistant message — extract text blocks and tool calls
96
+ if (isAssistant(message)) {
97
+ const result = processAssistantMessage(message, state);
98
+
99
+ // Notify tool usage
100
+ for (const tool of result.tools) {
101
+ if (onToolUse) {
102
+ try {
103
+ onToolUse(tool.name, tool.input);
104
+ } catch {
105
+ /* non-fatal */
106
+ }
107
+ }
108
+ }
109
+
110
+ // Send progress text segments (text before each tool call) in order
111
+ if (onTextBlock) {
112
+ for (const text of result.progressTexts) {
113
+ try {
114
+ await onTextBlock(text);
115
+ } catch {
116
+ /* non-fatal — don't abort the stream loop */
117
+ }
118
+ }
119
+ }
120
+ continue;
121
+ }
122
+
123
+ // Final result — read token counts and context info
124
+ if (isResult(message)) {
125
+ processResultMessage(message, state);
126
+ }
127
+ }
128
+ } catch (err) {
129
+ const classified = classify(err);
130
+
131
+ // Session expired — reset and retry once
132
+ if (classified.reason === "session_expired" && !_retried) {
133
+ logWarn(
134
+ "agent",
135
+ `[${chatId}] Stale session, retrying with fresh session`,
136
+ );
137
+ resetSession(chatId);
138
+ return handleMessage(params, true);
139
+ }
140
+
141
+ // Context length exceeded — safety net for edge cases where SDK
142
+ // auto-compaction doesn't prevent overflow
143
+ if (classified.reason === "context_length" && !_retried) {
144
+ logWarn(
145
+ "agent",
146
+ `[${chatId}] Context length exceeded, resetting session and retrying`,
147
+ );
148
+ resetSession(chatId);
149
+ return handleMessage(params, true);
150
+ }
151
+
152
+ // Model fallback: if overloaded/timeout, retry with the next-tier model
153
+ if (!_retried && classified.retryable) {
154
+ const fallback = getFallbackModel(activeModel);
155
+ if (fallback) {
156
+ logWarn(
157
+ "agent",
158
+ `[${chatId}] ${classified.reason}, falling back to ${fallback.replace("claude-", "")}`,
159
+ );
160
+ resetSession(chatId);
161
+ const originalModel = getChatSettings(chatId).model;
162
+ setChatModel(chatId, fallback);
163
+ try {
164
+ return await handleMessage(params, true);
165
+ } finally {
166
+ setChatModel(chatId, originalModel);
167
+ }
168
+ }
169
+ }
170
+
171
+ logError("agent", `[${chatId}] SDK error: ${classified.message}`);
172
+ throw classified;
173
+ }
174
+
175
+ // ── Persist session and usage ─────────────────────────────────────────────
176
+
177
+ const durationMs = Date.now() - t0;
178
+ if (state.newSessionId) setSessionId(chatId, state.newSessionId);
179
+ incrementTurns(chatId);
180
+ recordUsage(chatId, {
181
+ inputTokens: state.sdkInputTokens,
182
+ outputTokens: state.sdkOutputTokens,
183
+ cacheRead: state.sdkCacheRead,
184
+ cacheWrite: state.sdkCacheWrite,
185
+ durationMs,
186
+ model: activeModel,
187
+ contextTokens: state.contextTokens,
188
+ contextWindow: state.contextWindow,
189
+ numApiCalls: state.numApiCalls,
190
+ });
191
+
192
+ // Set a descriptive session name from the first message
193
+ if (session.turns === 0 && text) {
194
+ const cleanText = text
195
+ .replace(/^\[.*?\]\s*/g, "")
196
+ .replace(/\[msg_id:\d+\]\s*/g, "")
197
+ .trim();
198
+ if (cleanText) {
199
+ const name =
200
+ cleanText.length > 30 ? cleanText.slice(0, 30) + "..." : cleanText;
201
+ setSessionName(chatId, name);
202
+ }
203
+ }
204
+
205
+ // ── Build result ──────────────────────────────────────────────────────────
206
+
207
+ state.allResponseText += state.currentBlockText;
208
+ const totalPrompt =
209
+ state.sdkInputTokens + state.sdkCacheRead + state.sdkCacheWrite;
210
+ const cacheHitPct =
211
+ totalPrompt > 0 ? Math.round((state.sdkCacheRead / totalPrompt) * 100) : 0;
212
+
213
+ log(
214
+ "agent",
215
+ `[${chatId}] -> (${durationMs}ms, in=${state.sdkInputTokens} out=${state.sdkOutputTokens} cache=${cacheHitPct}%` +
216
+ `${state.toolCalls > 0 ? ` tools=${state.toolCalls}` : ""})`,
217
+ );
218
+ traceMessage(chatId, "out", state.allResponseText, {
219
+ durationMs,
220
+ inputTokens: state.sdkInputTokens,
221
+ outputTokens: state.sdkOutputTokens,
222
+ cacheRead: state.sdkCacheRead,
223
+ cacheWrite: state.sdkCacheWrite,
224
+ toolCalls: state.toolCalls,
225
+ model: activeModel,
226
+ });
227
+
228
+ return {
229
+ text: state.allResponseText.trim(),
230
+ durationMs,
231
+ inputTokens: state.sdkInputTokens,
232
+ outputTokens: state.sdkOutputTokens,
233
+ cacheRead: state.sdkCacheRead,
234
+ cacheWrite: state.sdkCacheWrite,
235
+ };
236
+ }