talon-agent 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "talon-agent",
3
- "version": "1.5.0",
3
+ "version": "1.6.0",
4
4
  "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
5
5
  "author": "Dylan Neve",
6
6
  "license": "MIT",
@@ -31,9 +31,13 @@ const {
31
31
  loadChatSettings,
32
32
  resolveModelName,
33
33
  EFFORT_LEVELS,
34
- MODEL_ALIASES,
35
34
  } = await import("../storage/chat-settings.js");
36
35
 
36
+ // Register Claude models (static — no SDK subprocess in tests)
37
+ const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
38
+ await import("../backend/claude-sdk/models.js");
39
+ registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
40
+
37
41
  describe("chat-settings", () => {
38
42
  describe("getChatSettings", () => {
39
43
  it("returns empty object for unknown chat", () => {
@@ -166,12 +170,21 @@ describe("chat-settings", () => {
166
170
  });
167
171
  });
168
172
 
169
- describe("MODEL_ALIASES", () => {
170
- it("contains all expected aliases", () => {
171
- expect(Object.keys(MODEL_ALIASES).length).toBeGreaterThanOrEqual(9);
172
- expect(MODEL_ALIASES.sonnet).toBe("claude-sonnet-4-6");
173
- expect(MODEL_ALIASES.opus).toBe("claude-opus-4-6");
174
- expect(MODEL_ALIASES.haiku).toBe("claude-haiku-4-5");
173
+ describe("model alias resolution (via registry)", () => {
174
+ it("resolves short aliases to full model IDs", () => {
175
+ expect(resolveModelName("sonnet")).toBe("claude-sonnet-4-6");
176
+ expect(resolveModelName("opus")).toBe("claude-opus-4-6");
177
+ expect(resolveModelName("haiku")).toBe("claude-haiku-4-5");
178
+ });
179
+
180
+ it("resolves versioned aliases", () => {
181
+ expect(resolveModelName("sonnet-4-6")).toBe("claude-sonnet-4-6");
182
+ expect(resolveModelName("opus-4.6")).toBe("claude-opus-4-6");
183
+ expect(resolveModelName("haiku-4.5")).toBe("claude-haiku-4-5");
184
+ });
185
+
186
+ it("passes through unknown names unchanged", () => {
187
+ expect(resolveModelName("gpt-4o")).toBe("gpt-4o");
175
188
  });
176
189
  });
177
190
 
@@ -49,6 +49,9 @@ const { classify, TalonError } = await import("../core/errors.js");
49
49
  await import("../storage/cron-store.js");
50
50
  const { handleSharedAction } = await import("../core/gateway-actions.js");
51
51
  const { resolveModelName } = await import("../storage/chat-settings.js");
52
+ const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
53
+ await import("../backend/claude-sdk/models.js");
54
+ registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
52
55
  const { Cron } = await import("croner");
53
56
 
54
57
  // ── Configuration ───────────────────────────────────────────────────────────
@@ -75,12 +75,14 @@ vi.mock("../util/config.js", () => ({
75
75
  ),
76
76
  }));
77
77
 
78
- vi.mock("../backend/claude-sdk/index.js", () => ({
78
+ // Backend mock passed as 3rd arg to handleSharedAction
79
+ const mockBackend = {
80
+ query: vi.fn(),
79
81
  updateSystemPrompt: (...args: unknown[]) =>
80
82
  mockUpdateSystemPrompt(
81
83
  ...(args as Parameters<typeof mockUpdateSystemPrompt>),
82
84
  ),
83
- }));
85
+ };
84
86
 
85
87
  // ── Import after mocks ────────────────────────────────────────────────────
86
88
 
@@ -105,6 +107,7 @@ describe("reload_plugins gateway action", () => {
105
107
  const result = await handleSharedAction(
106
108
  { action: "reload_plugins" },
107
109
  12345,
110
+ mockBackend,
108
111
  );
109
112
  expect(result).not.toBeNull();
110
113
  expect(result!.ok).toBe(true);
@@ -115,19 +118,19 @@ describe("reload_plugins gateway action", () => {
115
118
  });
116
119
 
117
120
  it("calls reloadPlugins without explicit frontends (derived from config)", async () => {
118
- await handleSharedAction({ action: "reload_plugins" }, 12345);
121
+ await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
119
122
  // Gateway no longer passes frontends — reloadPlugins derives them from config
120
123
  expect(mockReloadPlugins).toHaveBeenCalledWith();
121
124
  });
122
125
 
123
126
  it("rebuilds system prompt after reloading", async () => {
124
- await handleSharedAction({ action: "reload_plugins" }, 12345);
127
+ await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
125
128
  expect(mockRebuildSystemPrompt).toHaveBeenCalledTimes(1);
126
129
  expect(mockGetPluginPromptAdditions).toHaveBeenCalledTimes(1);
127
130
  });
128
131
 
129
132
  it("updates backend system prompt after rebuild", async () => {
130
- await handleSharedAction({ action: "reload_plugins" }, 12345);
133
+ await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
131
134
  expect(mockUpdateSystemPrompt).toHaveBeenCalledTimes(1);
132
135
  });
133
136
 
@@ -138,6 +141,7 @@ describe("reload_plugins gateway action", () => {
138
141
  const result = await handleSharedAction(
139
142
  { action: "reload_plugins" },
140
143
  12345,
144
+ mockBackend,
141
145
  );
142
146
  expect(result).not.toBeNull();
143
147
  expect(result!.ok).toBe(false);
@@ -151,6 +155,7 @@ describe("reload_plugins gateway action", () => {
151
155
  const result = await handleSharedAction(
152
156
  { action: "reload_plugins" },
153
157
  12345,
158
+ mockBackend,
154
159
  );
155
160
  expect(result!.ok).toBe(false);
156
161
  expect(result!.error).toContain("Invalid JSON in config");
@@ -164,6 +169,7 @@ describe("reload_plugins gateway action", () => {
164
169
  const result = await handleSharedAction(
165
170
  { action: "reload_plugins" },
166
171
  12345,
172
+ mockBackend,
167
173
  );
168
174
  expect(result!.ok).toBe(true);
169
175
  expect(result!.text).toContain("(0)");
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Shared constants for Claude SDK backend and background agents.
3
+ *
4
+ * Single source of truth for disallowed tool lists, thinking effort
5
+ * configuration, and streaming parameters.
6
+ */
7
+
8
+ // ── Disallowed tool lists ──────────────────────────────────────────────────
9
+
10
+ /**
11
+ * Core tools disallowed in all SDK query contexts (chat, heartbeat, dream).
12
+ * These are interactive or planning-only tools that make no sense in a
13
+ * headless agent context.
14
+ */
15
+ export const DISALLOWED_TOOLS_CORE = [
16
+ "EnterPlanMode",
17
+ "ExitPlanMode",
18
+ "EnterWorktree",
19
+ "ExitWorktree",
20
+ "TodoWrite",
21
+ "TodoRead",
22
+ "TaskCreate",
23
+ "TaskUpdate",
24
+ "TaskGet",
25
+ "TaskList",
26
+ "TaskOutput",
27
+ "TaskStop",
28
+ "AskUserQuestion",
29
+ ] as const;
30
+
31
+ /** Disallowed tools for the main chat handler (core + web tools replaced by Brave MCP). */
32
+ export const DISALLOWED_TOOLS_CHAT = [
33
+ ...DISALLOWED_TOOLS_CORE,
34
+ "WebSearch",
35
+ "WebFetch",
36
+ ] as const;
37
+
38
+ /** Disallowed tools for background agents — heartbeat and dream (core + Agent). */
39
+ export const DISALLOWED_TOOLS_BACKGROUND = [
40
+ ...DISALLOWED_TOOLS_CORE,
41
+ "Agent",
42
+ ] as const;
43
+
44
+ // ── Thinking / effort configuration ────────────────────────────────────────
45
+
46
+ export const EFFORT_MAP: Record<
47
+ string,
48
+ {
49
+ thinking: { type: "adaptive" | "disabled" };
50
+ effort?: "low" | "medium" | "high" | "max";
51
+ }
52
+ > = {
53
+ off: { thinking: { type: "disabled" } },
54
+ low: { thinking: { type: "adaptive" }, effort: "low" },
55
+ medium: { thinking: { type: "adaptive" }, effort: "medium" },
56
+ high: { thinking: { type: "adaptive" }, effort: "high" },
57
+ max: { thinking: { type: "adaptive" }, effort: "max" },
58
+ };
59
+
60
+ // ── Streaming ──────────────────────────────────────────────────────────────
61
+
62
+ /** Minimum interval (ms) between streaming delta callbacks to avoid flooding frontends. */
63
+ export const STREAM_INTERVAL = 1000;
@@ -0,0 +1,236 @@
1
+ /**
2
+ * Main message handler — executes a user query through the Claude Agent SDK.
3
+ *
4
+ * Orchestrates the full lifecycle: prompt formatting, SDK query, stream
5
+ * processing, error recovery (session expired / context overflow / model
6
+ * fallback), token accounting, and session persistence.
7
+ */
8
+
9
+ import { query } from "@anthropic-ai/claude-agent-sdk";
10
+ import {
11
+ getSession,
12
+ incrementTurns,
13
+ recordUsage,
14
+ resetSession,
15
+ setSessionId,
16
+ setSessionName,
17
+ } from "../../storage/sessions.js";
18
+ import { getChatSettings, setChatModel } from "../../storage/chat-settings.js";
19
+ import { classify } from "../../core/errors.js";
20
+ import { getFallbackModel } from "../../core/models.js";
21
+ import { rebuildSystemPrompt } from "../../util/config.js";
22
+ import { getPluginPromptAdditions } from "../../core/plugin.js";
23
+ import { log, logError, logWarn } from "../../util/log.js";
24
+ import { traceMessage } from "../../util/trace.js";
25
+ import { formatFullDatetime } from "../../util/time.js";
26
+
27
+ import type { QueryParams, QueryResult } from "../../core/types.js";
28
+ import { getConfig } from "./state.js";
29
+ import { buildSdkOptions } from "./options.js";
30
+ import {
31
+ createStreamState,
32
+ isSystemInit,
33
+ isStreamEvent,
34
+ isAssistant,
35
+ isResult,
36
+ processStreamDelta,
37
+ processAssistantMessage,
38
+ processResultMessage,
39
+ } from "./stream.js";
40
+
41
+ // ── Main handler ─────────────────────────────────────────────────────────────
42
+
43
+ export async function handleMessage(
44
+ params: QueryParams,
45
+ _retried = false,
46
+ ): Promise<QueryResult> {
47
+ const config = getConfig();
48
+
49
+ const {
50
+ chatId,
51
+ text,
52
+ senderName,
53
+ isGroup,
54
+ onTextBlock,
55
+ onStreamDelta,
56
+ onToolUse,
57
+ } = params;
58
+ const session = getSession(chatId);
59
+ const t0 = Date.now();
60
+
61
+ // Rebuild system prompt on first turn of a new/reset session so identity,
62
+ // memory, and workspace listing are fresh
63
+ if (session.turns === 0) {
64
+ rebuildSystemPrompt(config, getPluginPromptAdditions());
65
+ }
66
+
67
+ const { options, activeModel } = buildSdkOptions(chatId);
68
+
69
+ const msgIdHint = params.messageId ? ` [msg_id:${params.messageId}]` : "";
70
+ const nowTag = `[${formatFullDatetime()}]`;
71
+
72
+ const prompt = isGroup
73
+ ? `${nowTag} [${senderName}]${msgIdHint}: ${text}`
74
+ : `${nowTag}${msgIdHint} ${text}`;
75
+ log("agent", `[${chatId}] <- (${text.length} chars)`);
76
+ traceMessage(chatId, "in", text, { senderName, isGroup });
77
+
78
+ const qi = query({ prompt, options });
79
+ const state = createStreamState();
80
+
81
+ try {
82
+ for await (const message of qi) {
83
+ // Session ID capture
84
+ if (isSystemInit(message)) {
85
+ state.newSessionId = message.session_id;
86
+ continue;
87
+ }
88
+
89
+ // Stream text deltas and thinking deltas
90
+ if (isStreamEvent(message)) {
91
+ processStreamDelta(message, state, onStreamDelta);
92
+ continue;
93
+ }
94
+
95
+ // Complete assistant message — extract text blocks and tool calls
96
+ if (isAssistant(message)) {
97
+ const result = processAssistantMessage(message, state);
98
+
99
+ // Notify tool usage
100
+ for (const tool of result.tools) {
101
+ if (onToolUse) {
102
+ try {
103
+ onToolUse(tool.name, tool.input);
104
+ } catch {
105
+ /* non-fatal */
106
+ }
107
+ }
108
+ }
109
+
110
+ // Send progress text segments (text before each tool call) in order
111
+ if (onTextBlock) {
112
+ for (const text of result.progressTexts) {
113
+ try {
114
+ await onTextBlock(text);
115
+ } catch {
116
+ /* non-fatal — don't abort the stream loop */
117
+ }
118
+ }
119
+ }
120
+ continue;
121
+ }
122
+
123
+ // Final result — read token counts and context info
124
+ if (isResult(message)) {
125
+ processResultMessage(message, state);
126
+ }
127
+ }
128
+ } catch (err) {
129
+ const classified = classify(err);
130
+
131
+ // Session expired — reset and retry once
132
+ if (classified.reason === "session_expired" && !_retried) {
133
+ logWarn(
134
+ "agent",
135
+ `[${chatId}] Stale session, retrying with fresh session`,
136
+ );
137
+ resetSession(chatId);
138
+ return handleMessage(params, true);
139
+ }
140
+
141
+ // Context length exceeded — safety net for edge cases where SDK
142
+ // auto-compaction doesn't prevent overflow
143
+ if (classified.reason === "context_length" && !_retried) {
144
+ logWarn(
145
+ "agent",
146
+ `[${chatId}] Context length exceeded, resetting session and retrying`,
147
+ );
148
+ resetSession(chatId);
149
+ return handleMessage(params, true);
150
+ }
151
+
152
+ // Model fallback: if overloaded/timeout, retry with the next-tier model
153
+ if (!_retried && classified.retryable) {
154
+ const fallback = getFallbackModel(activeModel);
155
+ if (fallback) {
156
+ logWarn(
157
+ "agent",
158
+ `[${chatId}] ${classified.reason}, falling back to ${fallback.replace("claude-", "")}`,
159
+ );
160
+ resetSession(chatId);
161
+ const originalModel = getChatSettings(chatId).model;
162
+ setChatModel(chatId, fallback);
163
+ try {
164
+ return await handleMessage(params, true);
165
+ } finally {
166
+ setChatModel(chatId, originalModel);
167
+ }
168
+ }
169
+ }
170
+
171
+ logError("agent", `[${chatId}] SDK error: ${classified.message}`);
172
+ throw classified;
173
+ }
174
+
175
+ // ── Persist session and usage ─────────────────────────────────────────────
176
+
177
+ const durationMs = Date.now() - t0;
178
+ if (state.newSessionId) setSessionId(chatId, state.newSessionId);
179
+ incrementTurns(chatId);
180
+ recordUsage(chatId, {
181
+ inputTokens: state.sdkInputTokens,
182
+ outputTokens: state.sdkOutputTokens,
183
+ cacheRead: state.sdkCacheRead,
184
+ cacheWrite: state.sdkCacheWrite,
185
+ durationMs,
186
+ model: activeModel,
187
+ contextTokens: state.contextTokens,
188
+ contextWindow: state.contextWindow,
189
+ numApiCalls: state.numApiCalls,
190
+ });
191
+
192
+ // Set a descriptive session name from the first message
193
+ if (session.turns === 0 && text) {
194
+ const cleanText = text
195
+ .replace(/^\[.*?\]\s*/g, "")
196
+ .replace(/\[msg_id:\d+\]\s*/g, "")
197
+ .trim();
198
+ if (cleanText) {
199
+ const name =
200
+ cleanText.length > 30 ? cleanText.slice(0, 30) + "..." : cleanText;
201
+ setSessionName(chatId, name);
202
+ }
203
+ }
204
+
205
+ // ── Build result ──────────────────────────────────────────────────────────
206
+
207
+ state.allResponseText += state.currentBlockText;
208
+ const totalPrompt =
209
+ state.sdkInputTokens + state.sdkCacheRead + state.sdkCacheWrite;
210
+ const cacheHitPct =
211
+ totalPrompt > 0 ? Math.round((state.sdkCacheRead / totalPrompt) * 100) : 0;
212
+
213
+ log(
214
+ "agent",
215
+ `[${chatId}] -> (${durationMs}ms, in=${state.sdkInputTokens} out=${state.sdkOutputTokens} cache=${cacheHitPct}%` +
216
+ `${state.toolCalls > 0 ? ` tools=${state.toolCalls}` : ""})`,
217
+ );
218
+ traceMessage(chatId, "out", state.allResponseText, {
219
+ durationMs,
220
+ inputTokens: state.sdkInputTokens,
221
+ outputTokens: state.sdkOutputTokens,
222
+ cacheRead: state.sdkCacheRead,
223
+ cacheWrite: state.sdkCacheWrite,
224
+ toolCalls: state.toolCalls,
225
+ model: activeModel,
226
+ });
227
+
228
+ return {
229
+ text: state.allResponseText.trim(),
230
+ durationMs,
231
+ inputTokens: state.sdkInputTokens,
232
+ outputTokens: state.sdkOutputTokens,
233
+ cacheRead: state.sdkCacheRead,
234
+ cacheWrite: state.sdkCacheWrite,
235
+ };
236
+ }