npm - talon-agent - Versions diffs - 1.4.0 → 1.6.0 - Mend

talon-agent 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/package.json +2 -2
package/prompts/heartbeat.md +18 -6
package/src/__tests__/chat-settings.test.ts +20 -7
package/src/__tests__/fuzz.test.ts +3 -0
package/src/__tests__/heartbeat.test.ts +21 -0
package/src/__tests__/reload-plugins.test.ts +205 -0
package/src/__tests__/sessions.test.ts +155 -121
package/src/backend/claude-sdk/constants.ts +63 -0
package/src/backend/claude-sdk/handler.ts +236 -0
package/src/backend/claude-sdk/index.ts +10 -423
package/src/backend/claude-sdk/models.ts +216 -0
package/src/backend/claude-sdk/options.ts +129 -0
package/src/backend/claude-sdk/state.ts +59 -0
package/src/backend/claude-sdk/stream.ts +221 -0
package/src/backend/claude-sdk/warm.ts +89 -0
package/src/bootstrap.ts +22 -108
package/src/cli.ts +30 -15
package/src/core/dream.ts +5 -17
package/src/core/gateway-actions.ts +34 -2
package/src/core/gateway.ts +5 -2
package/src/core/heartbeat.ts +12 -22
package/src/core/models.ts +149 -0
package/src/core/plugin.ts +147 -0
package/src/core/tools/admin.ts +22 -0
package/src/core/tools/index.ts +2 -0
package/src/core/tools/types.ts +2 -1
package/src/core/types.ts +4 -0
package/src/frontend/teams/index.ts +7 -10
package/src/frontend/telegram/callbacks.ts +15 -27
package/src/frontend/telegram/commands.ts +32 -36
package/src/frontend/telegram/helpers.ts +13 -15
package/src/frontend/telegram/index.ts +1 -1
package/src/frontend/terminal/commands.ts +7 -4
package/src/index.ts +2 -1
package/src/storage/chat-settings.ts +5 -19
package/src/storage/sessions.ts +34 -40

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "talon-agent",
-  "version": "1.4.0",
+  "version": "1.6.0",
   "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
   "author": "Dylan Neve",
   "license": "MIT",
@@ -51,7 +51,7 @@
     "format:check": "prettier --check src/ prompts/"
   },
   "dependencies": {
-    "@anthropic-ai/claude-agent-sdk": "^0.2.97",
+    "@anthropic-ai/claude-agent-sdk": "^0.2.104",
     "@brave/brave-search-mcp-server": "^2.0.75",
     "@clack/prompts": "^1.2.0",
     "@grammyjs/auto-retry": "^2.0.2",

package/prompts/heartbeat.md CHANGED Viewed

@@ -1,6 +1,14 @@
 You are Talon's background heartbeat agent. You run periodically (every {{intervalMinutes}} minutes) to perform maintenance tasks defined by the user.
-You have access ONLY to filesystem tools (Read, Write, Edit, Bash, Glob, Grep). Do NOT attempt to use any Telegram, MCP, or messaging tools.
+You have access to filesystem tools (Read, Write, Edit, Bash, Glob, Grep) and all loaded MCP plugins. Do NOT use Telegram messaging tools — you cannot send messages to users.
+## Available MCP Tools
+You have access to all registered MCP plugin tools (excluding Telegram messaging tools). The exact set depends on what plugins are enabled in the current configuration, but may include email, memory/knowledge graph, web search, Wikipedia, GitHub, media processing, browser automation, and more.
+Only use tools that are actually available in your current session. Do not assume any specific tool is present — check what's exposed to you at runtime.
+Use available tools when they help accomplish the user-defined tasks (e.g. checking email, querying the knowledge graph, searching the web for updates).
 ## Context
@@ -20,11 +28,15 @@ If the instructions file does not exist or is empty, perform these default tasks
 1. **Review recent logs** — Check `{{logsDir}}/` for log files dated after `{{lastRunIso}}`. If `{{lastRunIso}}` is `never`, treat it as the beginning of time and review all available logs. Extract any new facts, preferences, or notable events.
 2. **Update memory** — Merge any new information into `{{memoryFile}}`, keeping entries concise and factual.
 3. **Update daily notes** — Write today's learnings, observations, corrections, and follow-ups to `{{dailyMemoryFile}}`. Keep entries concise — the bot reads this file on demand for context.
-4. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
+4. **Check email** — If email tools are available, check the inbox for new messages and note anything important.
+5. **Workspace hygiene** — Note any issues but do not delete files unless the instructions explicitly say to.
 ## Rules
-- Be surgical and precise. Do not rewrite files unnecessarily.
-- Do not modify files outside the workspace unless the instructions explicitly allow it.
-- Keep your work focused and efficient — you have a 10-minute time limit.
-- When done, stop. The system handles all state tracking.
+- Do NOT use Telegram messaging tools — they are not available in heartbeat mode.
+- Be concise in log entries and memory updates.
+- If a task fails, log the error and move on to the next task.
+- Do NOT modify the instructions file — only read it.
+- Be surgical: only make the minimal file changes needed to complete the current task.
+- Do NOT create, modify, move, or delete files outside `{{workspace}}` unless the user-defined instructions explicitly require it.
+- Complete all tasks within the time budget. If running low, prioritize memory updates.

package/src/__tests__/chat-settings.test.ts CHANGED Viewed

@@ -31,9 +31,13 @@ const {
   loadChatSettings,
   resolveModelName,
   EFFORT_LEVELS,
-  MODEL_ALIASES,
 } = await import("../storage/chat-settings.js");
+// Register Claude models (static — no SDK subprocess in tests)
+const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
+  await import("../backend/claude-sdk/models.js");
+registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
 describe("chat-settings", () => {
   describe("getChatSettings", () => {
     it("returns empty object for unknown chat", () => {
@@ -166,12 +170,21 @@ describe("chat-settings", () => {
     });
   });
-  describe("MODEL_ALIASES", () => {
-    it("contains all expected aliases", () => {
-      expect(Object.keys(MODEL_ALIASES).length).toBeGreaterThanOrEqual(9);
-      expect(MODEL_ALIASES.sonnet).toBe("claude-sonnet-4-6");
-      expect(MODEL_ALIASES.opus).toBe("claude-opus-4-6");
-      expect(MODEL_ALIASES.haiku).toBe("claude-haiku-4-5");
+  describe("model alias resolution (via registry)", () => {
+    it("resolves short aliases to full model IDs", () => {
+      expect(resolveModelName("sonnet")).toBe("claude-sonnet-4-6");
+      expect(resolveModelName("opus")).toBe("claude-opus-4-6");
+      expect(resolveModelName("haiku")).toBe("claude-haiku-4-5");
+    });
+    it("resolves versioned aliases", () => {
+      expect(resolveModelName("sonnet-4-6")).toBe("claude-sonnet-4-6");
+      expect(resolveModelName("opus-4.6")).toBe("claude-opus-4-6");
+      expect(resolveModelName("haiku-4.5")).toBe("claude-haiku-4-5");
+    });
+    it("passes through unknown names unchanged", () => {
+      expect(resolveModelName("gpt-4o")).toBe("gpt-4o");
     });
   });

package/src/__tests__/fuzz.test.ts CHANGED Viewed

@@ -49,6 +49,9 @@ const { classify, TalonError } = await import("../core/errors.js");
 await import("../storage/cron-store.js");
 const { handleSharedAction } = await import("../core/gateway-actions.js");
 const { resolveModelName } = await import("../storage/chat-settings.js");
+const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
+  await import("../backend/claude-sdk/models.js");
+registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
 const { Cron } = await import("croner");
 // ── Configuration ───────────────────────────────────────────────────────────

package/src/__tests__/heartbeat.test.ts CHANGED Viewed

@@ -48,6 +48,10 @@ vi.mock("@anthropic-ai/claude-agent-sdk", () => ({
   query: queryMock,
 }));
+vi.mock("../core/plugin.js", () => ({
+  getPluginMcpServers: vi.fn(() => ({})),
+}));
 vi.mock("../util/paths.js", () => ({
   files: {
     heartbeatState: "/fake/.talon/workspace/memory/heartbeat_state.json",
@@ -184,6 +188,23 @@ describe("forceHeartbeat", () => {
     expect(finalState.status).toBe("idle");
   });
+  it("passes plugin MCP servers to the agent via getPluginMcpServers", async () => {
+    const { getPluginMcpServers } = await import("../core/plugin.js");
+    const mockServers = {
+      "email-tools": { command: "node", args: ["email.js"], env: {} },
+    };
+    vi.mocked(getPluginMcpServers).mockReturnValue(mockServers);
+    await forceHeartbeat();
+    expect(getPluginMcpServers).toHaveBeenCalledWith("", "heartbeat");
+    // Verify mcpServers was passed through to query()
+    const queryCall = queryMock.mock.calls[0] as unknown as [
+      { options: { mcpServers: Record<string, unknown> } },
+    ];
+    expect(queryCall[0].options.mcpServers).toEqual(mockServers);
+  });
   it("preserves previous last_run on failure", async () => {
     const previousLastRun = Date.now() - 3600_000;
     existsSyncMock.mockReturnValue(true);

package/src/__tests__/reload-plugins.test.ts ADDED Viewed

@@ -0,0 +1,205 @@
+import { describe, it, expect, vi, beforeEach } from "vitest";
+// ── Module mocks ──────────────────────────────────────────────────────────
+vi.mock("../util/log.js", () => ({
+  log: vi.fn(),
+  logError: vi.fn(),
+  logWarn: vi.fn(),
+  logDebug: vi.fn(),
+}));
+vi.mock("write-file-atomic", () => ({
+  default: { sync: vi.fn() },
+}));
+// Mock cheerio (required by gateway-actions via extractText)
+vi.mock("cheerio", () => ({
+  load: vi.fn(() => {
+    const $ = (sel: string) => ({
+      remove: vi.fn(),
+      text: () => "",
+    });
+    ($ as any).root = vi.fn();
+    return $;
+  }),
+}));
+// Mock storage modules required by gateway-actions
+vi.mock("../storage/history.js", () => ({
+  getRecentFormatted: vi.fn(() => ""),
+  searchHistory: vi.fn(() => ""),
+  getMessagesByUser: vi.fn(() => ""),
+  getKnownUsers: vi.fn(() => ""),
+}));
+vi.mock("../storage/media-index.js", () => ({
+  formatMediaIndex: vi.fn(() => ""),
+}));
+vi.mock("../storage/cron-store.js", () => ({
+  addCronJob: vi.fn(),
+  getCronJob: vi.fn(),
+  getCronJobsForChat: vi.fn(() => []),
+  updateCronJob: vi.fn(),
+  deleteCronJob: vi.fn(),
+  validateCronExpression: vi.fn(() => ({ valid: true })),
+  generateCronId: vi.fn(() => "test-id"),
+}));
+// ── Plugin mocking ──────────────────────────────────────────────────────
+const DEFAULT_CONFIG = {
+  model: "claude-opus-4-6",
+  frontend: "telegram",
+  plugins: [],
+  systemPrompt: "test prompt",
+};
+const mockReloadPlugins = vi.fn(async () => ({
+  names: ["extras", "brave-search"],
+  config: { ...DEFAULT_CONFIG },
+}));
+const mockGetPluginPromptAdditions = vi.fn(() => "prompt additions");
+const mockRebuildSystemPrompt = vi.fn();
+const mockUpdateSystemPrompt = vi.fn();
+vi.mock("../core/plugin.js", () => ({
+  reloadPlugins: (...args: unknown[]) =>
+    mockReloadPlugins(...(args as Parameters<typeof mockReloadPlugins>)),
+  getPluginPromptAdditions: () => mockGetPluginPromptAdditions(),
+}));
+vi.mock("../util/config.js", () => ({
+  rebuildSystemPrompt: (...args: unknown[]) =>
+    mockRebuildSystemPrompt(
+      ...(args as Parameters<typeof mockRebuildSystemPrompt>),
+    ),
+}));
+// Backend mock — passed as 3rd arg to handleSharedAction
+const mockBackend = {
+  query: vi.fn(),
+  updateSystemPrompt: (...args: unknown[]) =>
+    mockUpdateSystemPrompt(
+      ...(args as Parameters<typeof mockUpdateSystemPrompt>),
+    ),
+};
+// ── Import after mocks ────────────────────────────────────────────────────
+import { handleSharedAction } from "../core/gateway-actions.js";
+// ── Tests ─────────────────────────────────────────────────────────────────
+describe("reload_plugins gateway action", () => {
+  beforeEach(() => {
+    vi.resetAllMocks();
+    // Re-establish default implementations after reset
+    mockReloadPlugins.mockImplementation(async () => ({
+      names: ["extras", "brave-search"],
+      config: { ...DEFAULT_CONFIG },
+    }));
+    mockGetPluginPromptAdditions.mockReturnValue("prompt additions");
+    mockRebuildSystemPrompt.mockImplementation(() => {});
+    mockUpdateSystemPrompt.mockImplementation(() => {});
+  });
+  it("returns loaded plugin names on success", async () => {
+    const result = await handleSharedAction(
+      { action: "reload_plugins" },
+      12345,
+      mockBackend,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.ok).toBe(true);
+    expect(result!.text).toContain("Plugins reloaded successfully");
+    expect(result!.text).toContain("extras");
+    expect(result!.text).toContain("brave-search");
+    expect(result!.text).toContain("(2)");
+  });
+  it("calls reloadPlugins without explicit frontends (derived from config)", async () => {
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
+    // Gateway no longer passes frontends — reloadPlugins derives them from config
+    expect(mockReloadPlugins).toHaveBeenCalledWith();
+  });
+  it("rebuilds system prompt after reloading", async () => {
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
+    expect(mockRebuildSystemPrompt).toHaveBeenCalledTimes(1);
+    expect(mockGetPluginPromptAdditions).toHaveBeenCalledTimes(1);
+  });
+  it("updates backend system prompt after rebuild", async () => {
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
+    expect(mockUpdateSystemPrompt).toHaveBeenCalledTimes(1);
+  });
+  it("returns error when reloadPlugins throws", async () => {
+    mockReloadPlugins.mockRejectedValueOnce(
+      new Error("Config validation failed"),
+    );
+    const result = await handleSharedAction(
+      { action: "reload_plugins" },
+      12345,
+      mockBackend,
+    );
+    expect(result).not.toBeNull();
+    expect(result!.ok).toBe(false);
+    expect(result!.error).toContain("Config validation failed");
+  });
+  it("returns error when config is malformed", async () => {
+    mockReloadPlugins.mockRejectedValueOnce(
+      new Error("Invalid JSON in config"),
+    );
+    const result = await handleSharedAction(
+      { action: "reload_plugins" },
+      12345,
+      mockBackend,
+    );
+    expect(result!.ok).toBe(false);
+    expect(result!.error).toContain("Invalid JSON in config");
+  });
+  it("reports zero plugins when none configured", async () => {
+    mockReloadPlugins.mockImplementation(async () => ({
+      names: [],
+      config: { ...DEFAULT_CONFIG },
+    }));
+    const result = await handleSharedAction(
+      { action: "reload_plugins" },
+      12345,
+      mockBackend,
+    );
+    expect(result!.ok).toBe(true);
+    expect(result!.text).toContain("(0)");
+    expect(result!.text).toContain("(none)");
+  });
+});
+// ── Admin tool description tests ──────────────────────────────────────────
+describe("admin tool description", () => {
+  it("does not mention session reset or MCP subprocesses", async () => {
+    const { adminTools } = await import("../core/tools/admin.js");
+    const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
+    expect(reloadTool).toBeDefined();
+    expect(reloadTool!.description).not.toContain("resets sessions");
+    expect(reloadTool!.description).not.toContain("sessions reset");
+    expect(reloadTool!.description).not.toContain("MCP subprocesses");
+    expect(reloadTool!.description).toContain("without restarting");
+    expect(reloadTool!.description).toContain("without downtime");
+  });
+  it("mentions env var cleanup", async () => {
+    const { adminTools } = await import("../core/tools/admin.js");
+    const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
+    expect(reloadTool!.description).toContain("env vars");
+  });
+  it("has admin tag", async () => {
+    const { adminTools } = await import("../core/tools/admin.js");
+    const reloadTool = adminTools.find((t) => t.name === "reload_plugins");
+    expect(reloadTool!.tag).toBe("admin");
+  });
+});