talon-agent 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/package.json +2 -2
  2. package/prompts/heartbeat.md +18 -6
  3. package/src/__tests__/chat-settings.test.ts +20 -7
  4. package/src/__tests__/fuzz.test.ts +3 -0
  5. package/src/__tests__/heartbeat.test.ts +21 -0
  6. package/src/__tests__/reload-plugins.test.ts +205 -0
  7. package/src/__tests__/sessions.test.ts +155 -121
  8. package/src/backend/claude-sdk/constants.ts +63 -0
  9. package/src/backend/claude-sdk/handler.ts +236 -0
  10. package/src/backend/claude-sdk/index.ts +10 -423
  11. package/src/backend/claude-sdk/models.ts +216 -0
  12. package/src/backend/claude-sdk/options.ts +129 -0
  13. package/src/backend/claude-sdk/state.ts +59 -0
  14. package/src/backend/claude-sdk/stream.ts +221 -0
  15. package/src/backend/claude-sdk/warm.ts +89 -0
  16. package/src/bootstrap.ts +22 -108
  17. package/src/cli.ts +30 -15
  18. package/src/core/dream.ts +5 -17
  19. package/src/core/gateway-actions.ts +34 -2
  20. package/src/core/gateway.ts +5 -2
  21. package/src/core/heartbeat.ts +12 -22
  22. package/src/core/models.ts +149 -0
  23. package/src/core/plugin.ts +147 -0
  24. package/src/core/tools/admin.ts +22 -0
  25. package/src/core/tools/index.ts +2 -0
  26. package/src/core/tools/types.ts +2 -1
  27. package/src/core/types.ts +4 -0
  28. package/src/frontend/teams/index.ts +7 -10
  29. package/src/frontend/telegram/callbacks.ts +15 -27
  30. package/src/frontend/telegram/commands.ts +32 -36
  31. package/src/frontend/telegram/helpers.ts +13 -15
  32. package/src/frontend/telegram/index.ts +1 -1
  33. package/src/frontend/terminal/commands.ts +7 -4
  34. package/src/index.ts +2 -1
  35. package/src/storage/chat-settings.ts +5 -19
  36. package/src/storage/sessions.ts +34 -40
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "talon-agent",
3
- "version": "1.4.0",
3
+ "version": "1.6.0",
4
4
  "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
5
5
  "author": "Dylan Neve",
6
6
  "license": "MIT",
@@ -51,7 +51,7 @@
51
51
  "format:check": "prettier --check src/ prompts/"
52
52
  },
53
53
  "dependencies": {
54
- "@anthropic-ai/claude-agent-sdk": "^0.2.97",
54
+ "@anthropic-ai/claude-agent-sdk": "^0.2.104",
55
55
  "@brave/brave-search-mcp-server": "^2.0.75",
56
56
  "@clack/prompts": "^1.2.0",
57
57
  "@grammyjs/auto-retry": "^2.0.2",
@@ -1,6 +1,14 @@
1
1
  You are Talon's background heartbeat agent. You run periodically (every {{intervalMinutes}} minutes) to perform maintenance tasks defined by the user.
2
2
 
3
- You have access ONLY to filesystem tools (Read, Write, Edit, Bash, Glob, Grep). Do NOT attempt to use any Telegram, MCP, or messaging tools.
3
+ You have access to filesystem tools (Read, Write, Edit, Bash, Glob, Grep) and all loaded MCP plugins. Do NOT use Telegram messaging tools you cannot send messages to users.
4
+
5
+ ## Available MCP Tools
6
+
7
+ You have access to all registered MCP plugin tools (excluding Telegram messaging tools). The exact set depends on what plugins are enabled in the current configuration, but may include email, memory/knowledge graph, web search, Wikipedia, GitHub, media processing, browser automation, and more.
8
+
9
+ Only use tools that are actually available in your current session. Do not assume any specific tool is present — check what's exposed to you at runtime.
10
+
11
+ Use available tools when they help accomplish the user-defined tasks (e.g. checking email, querying the knowledge graph, searching the web for updates).
4
12
 
5
13
  ## Context
6
14
 
@@ -20,11 +28,15 @@ If the instructions file does not exist or is empty, perform these default tasks
20
28
  1. **Review recent logs** — Check `{{logsDir}}/` for log files dated after `{{lastRunIso}}`. If `{{lastRunIso}}` is `never`, treat it as the beginning of time and review all available logs. Extract any new facts, preferences, or notable events.
21
29
  2. **Update memory** — Merge any new information into `{{memoryFile}}`, keeping entries concise and factual.
22
30
  3. **Update daily notes** — Write today's learnings, observations, corrections, and follow-ups to `{{dailyMemoryFile}}`. Keep entries concise — the bot reads this file on demand for context.
23
- 4. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
31
+ 4. **Check email** — If email tools are available, check the inbox for new messages and note anything important.
32
+ 5. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
24
33
 
25
34
  ## Rules
26
35
 
27
- - Be surgical and precise. Do not rewrite files unnecessarily.
28
- - Do not modify files outside the workspace unless the instructions explicitly allow it.
29
- - Keep your work focused and efficient you have a 10-minute time limit.
30
- - When done, stop. The system handles all state tracking.
36
+ - Do NOT use Telegram messaging tools — they are not available in heartbeat mode.
37
+ - Be concise in log entries and memory updates.
38
+ - If a task fails, log the error and move on to the next task.
39
+ - Do NOT modify the instructions file only read it.
40
+ - Be surgical: only make the minimal file changes needed to complete the current task.
41
+ - Do NOT create, modify, move, or delete files outside `{{workspace}}` unless the user-defined instructions explicitly require it.
42
+ - Complete all tasks within the time budget. If running low, prioritize memory updates.
@@ -31,9 +31,13 @@ const {
31
31
  loadChatSettings,
32
32
  resolveModelName,
33
33
  EFFORT_LEVELS,
34
- MODEL_ALIASES,
35
34
  } = await import("../storage/chat-settings.js");
36
35
 
36
+ // Register Claude models (static — no SDK subprocess in tests)
37
+ const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
38
+ await import("../backend/claude-sdk/models.js");
39
+ registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
40
+
37
41
  describe("chat-settings", () => {
38
42
  describe("getChatSettings", () => {
39
43
  it("returns empty object for unknown chat", () => {
@@ -166,12 +170,21 @@ describe("chat-settings", () => {
166
170
  });
167
171
  });
168
172
 
169
- describe("MODEL_ALIASES", () => {
170
- it("contains all expected aliases", () => {
171
- expect(Object.keys(MODEL_ALIASES).length).toBeGreaterThanOrEqual(9);
172
- expect(MODEL_ALIASES.sonnet).toBe("claude-sonnet-4-6");
173
- expect(MODEL_ALIASES.opus).toBe("claude-opus-4-6");
174
- expect(MODEL_ALIASES.haiku).toBe("claude-haiku-4-5");
173
+ describe("model alias resolution (via registry)", () => {
174
+ it("resolves short aliases to full model IDs", () => {
175
+ expect(resolveModelName("sonnet")).toBe("claude-sonnet-4-6");
176
+ expect(resolveModelName("opus")).toBe("claude-opus-4-6");
177
+ expect(resolveModelName("haiku")).toBe("claude-haiku-4-5");
178
+ });
179
+
180
+ it("resolves versioned aliases", () => {
181
+ expect(resolveModelName("sonnet-4-6")).toBe("claude-sonnet-4-6");
182
+ expect(resolveModelName("opus-4.6")).toBe("claude-opus-4-6");
183
+ expect(resolveModelName("haiku-4.5")).toBe("claude-haiku-4-5");
184
+ });
185
+
186
+ it("passes through unknown names unchanged", () => {
187
+ expect(resolveModelName("gpt-4o")).toBe("gpt-4o");
175
188
  });
176
189
  });
177
190
 
@@ -49,6 +49,9 @@ const { classify, TalonError } = await import("../core/errors.js");
49
49
  await import("../storage/cron-store.js");
50
50
  const { handleSharedAction } = await import("../core/gateway-actions.js");
51
51
  const { resolveModelName } = await import("../storage/chat-settings.js");
52
+ const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
53
+ await import("../backend/claude-sdk/models.js");
54
+ registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
52
55
  const { Cron } = await import("croner");
53
56
 
54
57
  // ── Configuration ───────────────────────────────────────────────────────────
@@ -48,6 +48,10 @@ vi.mock("@anthropic-ai/claude-agent-sdk", () => ({
48
48
  query: queryMock,
49
49
  }));
50
50
 
51
+ vi.mock("../core/plugin.js", () => ({
52
+ getPluginMcpServers: vi.fn(() => ({})),
53
+ }));
54
+
51
55
  vi.mock("../util/paths.js", () => ({
52
56
  files: {
53
57
  heartbeatState: "/fake/.talon/workspace/memory/heartbeat_state.json",
@@ -184,6 +188,23 @@ describe("forceHeartbeat", () => {
184
188
  expect(finalState.status).toBe("idle");
185
189
  });
186
190
 
191
+ it("passes plugin MCP servers to the agent via getPluginMcpServers", async () => {
192
+ const { getPluginMcpServers } = await import("../core/plugin.js");
193
+ const mockServers = {
194
+ "email-tools": { command: "node", args: ["email.js"], env: {} },
195
+ };
196
+ vi.mocked(getPluginMcpServers).mockReturnValue(mockServers);
197
+
198
+ await forceHeartbeat();
199
+
200
+ expect(getPluginMcpServers).toHaveBeenCalledWith("", "heartbeat");
201
+ // Verify mcpServers was passed through to query()
202
+ const queryCall = queryMock.mock.calls[0] as unknown as [
203
+ { options: { mcpServers: Record<string, unknown> } },
204
+ ];
205
+ expect(queryCall[0].options.mcpServers).toEqual(mockServers);
206
+ });
207
+
187
208
  it("preserves previous last_run on failure", async () => {
188
209
  const previousLastRun = Date.now() - 3600_000;
189
210
  existsSyncMock.mockReturnValue(true);
@@ -0,0 +1,205 @@
1
+ import { describe, it, expect, vi, beforeEach } from "vitest";
2
+
3
+ // ── Module mocks ──────────────────────────────────────────────────────────
4
+
5
+ vi.mock("../util/log.js", () => ({
6
+ log: vi.fn(),
7
+ logError: vi.fn(),
8
+ logWarn: vi.fn(),
9
+ logDebug: vi.fn(),
10
+ }));
11
+
12
+ vi.mock("write-file-atomic", () => ({
13
+ default: { sync: vi.fn() },
14
+ }));
15
+
16
+ // Mock cheerio (required by gateway-actions via extractText)
17
+ vi.mock("cheerio", () => ({
18
+ load: vi.fn(() => {
19
+ const $ = (sel: string) => ({
20
+ remove: vi.fn(),
21
+ text: () => "",
22
+ });
23
+ ($ as any).root = vi.fn();
24
+ return $;
25
+ }),
26
+ }));
27
+
28
+ // Mock storage modules required by gateway-actions
29
+ vi.mock("../storage/history.js", () => ({
30
+ getRecentFormatted: vi.fn(() => ""),
31
+ searchHistory: vi.fn(() => ""),
32
+ getMessagesByUser: vi.fn(() => ""),
33
+ getKnownUsers: vi.fn(() => ""),
34
+ }));
35
+ vi.mock("../storage/media-index.js", () => ({
36
+ formatMediaIndex: vi.fn(() => ""),
37
+ }));
38
+ vi.mock("../storage/cron-store.js", () => ({
39
+ addCronJob: vi.fn(),
40
+ getCronJob: vi.fn(),
41
+ getCronJobsForChat: vi.fn(() => []),
42
+ updateCronJob: vi.fn(),
43
+ deleteCronJob: vi.fn(),
44
+ validateCronExpression: vi.fn(() => ({ valid: true })),
45
+ generateCronId: vi.fn(() => "test-id"),
46
+ }));
47
+
48
+ // ── Plugin mocking ──────────────────────────────────────────────────────
49
+
50
+ const DEFAULT_CONFIG = {
51
+ model: "claude-opus-4-6",
52
+ frontend: "telegram",
53
+ plugins: [],
54
+ systemPrompt: "test prompt",
55
+ };
56
+
57
+ const mockReloadPlugins = vi.fn(async () => ({
58
+ names: ["extras", "brave-search"],
59
+ config: { ...DEFAULT_CONFIG },
60
+ }));
61
+ const mockGetPluginPromptAdditions = vi.fn(() => "prompt additions");
62
+ const mockRebuildSystemPrompt = vi.fn();
63
+ const mockUpdateSystemPrompt = vi.fn();
64
+
65
+ vi.mock("../core/plugin.js", () => ({
66
+ reloadPlugins: (...args: unknown[]) =>
67
+ mockReloadPlugins(...(args as Parameters<typeof mockReloadPlugins>)),
68
+ getPluginPromptAdditions: () => mockGetPluginPromptAdditions(),
69
+ }));
70
+
71
+ vi.mock("../util/config.js", () => ({
72
+ rebuildSystemPrompt: (...args: unknown[]) =>
73
+ mockRebuildSystemPrompt(
74
+ ...(args as Parameters<typeof mockRebuildSystemPrompt>),
75
+ ),
76
+ }));
77
+
78
+ // Backend mock — passed as 3rd arg to handleSharedAction
79
+ const mockBackend = {
80
+ query: vi.fn(),
81
+ updateSystemPrompt: (...args: unknown[]) =>
82
+ mockUpdateSystemPrompt(
83
+ ...(args as Parameters<typeof mockUpdateSystemPrompt>),
84
+ ),
85
+ };
86
+
87
+ // ── Import after mocks ────────────────────────────────────────────────────
88
+
89
+ import { handleSharedAction } from "../core/gateway-actions.js";
90
+
91
+ // ── Tests ─────────────────────────────────────────────────────────────────
92
+
93
+ describe("reload_plugins gateway action", () => {
94
+ beforeEach(() => {
95
+ vi.resetAllMocks();
96
+ // Re-establish default implementations after reset
97
+ mockReloadPlugins.mockImplementation(async () => ({
98
+ names: ["extras", "brave-search"],
99
+ config: { ...DEFAULT_CONFIG },
100
+ }));
101
+ mockGetPluginPromptAdditions.mockReturnValue("prompt additions");
102
+ mockRebuildSystemPrompt.mockImplementation(() => {});
103
+ mockUpdateSystemPrompt.mockImplementation(() => {});
104
+ });
105
+
106
+ it("returns loaded plugin names on success", async () => {
107
+ const result = await handleSharedAction(
108
+ { action: "reload_plugins" },
109
+ 12345,
110
+ mockBackend,
111
+ );
112
+ expect(result).not.toBeNull();
113
+ expect(result!.ok).toBe(true);
114
+ expect(result!.text).toContain("Plugins reloaded successfully");
115
+ expect(result!.text).toContain("extras");
116
+ expect(result!.text).toContain("brave-search");
117
+ expect(result!.text).toContain("(2)");
118
+ });
119
+
120
+ it("calls reloadPlugins without explicit frontends (derived from config)", async () => {
121
+ await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
122
+ // Gateway no longer passes frontends — reloadPlugins derives them from config
123
+ expect(mockReloadPlugins).toHaveBeenCalledWith();
124
+ });
125
+
126
+ it("rebuilds system prompt after reloading", async () => {
127
+ await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
128
+ expect(mockRebuildSystemPrompt).toHaveBeenCalledTimes(1);
129
+ expect(mockGetPluginPromptAdditions).toHaveBeenCalledTimes(1);
130
+ });
131
+
132
+ it("updates backend system prompt after rebuild", async () => {
133
+ await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
134
+ expect(mockUpdateSystemPrompt).toHaveBeenCalledTimes(1);
135
+ });
136
+
137
+ it("returns error when reloadPlugins throws", async () => {
138
+ mockReloadPlugins.mockRejectedValueOnce(
139
+ new Error("Config validation failed"),
140
+ );
141
+ const result = await handleSharedAction(
142
+ { action: "reload_plugins" },
143
+ 12345,
144
+ mockBackend,
145
+ );
146
+ expect(result).not.toBeNull();
147
+ expect(result!.ok).toBe(false);
148
+ expect(result!.error).toContain("Config validation failed");
149
+ });
150
+
151
+ it("returns error when config is malformed", async () => {
152
+ mockReloadPlugins.mockRejectedValueOnce(
153
+ new Error("Invalid JSON in config"),
154
+ );
155
+ const result = await handleSharedAction(
156
+ { action: "reload_plugins" },
157
+ 12345,
158
+ mockBackend,
159
+ );
160
+ expect(result!.ok).toBe(false);
161
+ expect(result!.error).toContain("Invalid JSON in config");
162
+ });
163
+
164
+ it("reports zero plugins when none configured", async () => {
165
+ mockReloadPlugins.mockImplementation(async () => ({
166
+ names: [],
167
+ config: { ...DEFAULT_CONFIG },
168
+ }));
169
+ const result = await handleSharedAction(
170
+ { action: "reload_plugins" },
171
+ 12345,
172
+ mockBackend,
173
+ );
174
+ expect(result!.ok).toBe(true);
175
+ expect(result!.text).toContain("(0)");
176
+ expect(result!.text).toContain("(none)");
177
+ });
178
+ });
179
+
180
+ // ── Admin tool description tests ──────────────────────────────────────────
181
+
182
+ describe("admin tool description", () => {
183
+ it("does not mention session reset or MCP subprocesses", async () => {
184
+ const { adminTools } = await import("../core/tools/admin.js");
185
+ const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
186
+ expect(reloadTool).toBeDefined();
187
+ expect(reloadTool!.description).not.toContain("resets sessions");
188
+ expect(reloadTool!.description).not.toContain("sessions reset");
189
+ expect(reloadTool!.description).not.toContain("MCP subprocesses");
190
+ expect(reloadTool!.description).toContain("without restarting");
191
+ expect(reloadTool!.description).toContain("without downtime");
192
+ });
193
+
194
+ it("mentions env var cleanup", async () => {
195
+ const { adminTools } = await import("../core/tools/admin.js");
196
+ const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
197
+ expect(reloadTool!.description).toContain("env vars");
198
+ });
199
+
200
+ it("has admin tag", async () => {
201
+ const { adminTools } = await import("../core/tools/admin.js");
202
+ const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
203
+ expect(reloadTool!.tag).toBe("admin");
204
+ });
205
+ });