npm - talon-agent - Versions diffs - 1.5.0 → 1.6.1 - Mend

talon-agent 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +184 -50
package/package.json +1 -1
package/src/__tests__/chat-settings.test.ts +20 -7
package/src/__tests__/fuzz.test.ts +3 -0
package/src/__tests__/reload-plugins.test.ts +11 -5
package/src/backend/claude-sdk/constants.ts +63 -0
package/src/backend/claude-sdk/handler.ts +236 -0
package/src/backend/claude-sdk/index.ts +7 -556
package/src/backend/claude-sdk/models.ts +216 -0
package/src/backend/claude-sdk/options.ts +129 -0
package/src/backend/claude-sdk/state.ts +59 -0
package/src/backend/claude-sdk/stream.ts +226 -0
package/src/backend/claude-sdk/warm.ts +89 -0
package/src/bootstrap.ts +19 -5
package/src/cli.ts +30 -15
package/src/core/dream.ts +5 -17
package/src/core/gateway-actions.ts +3 -12
package/src/core/gateway.ts +5 -2
package/src/core/heartbeat.ts +4 -17
package/src/core/models.ts +149 -0
package/src/core/types.ts +4 -0
package/src/frontend/teams/index.ts +1 -3
package/src/frontend/telegram/callbacks.ts +15 -27
package/src/frontend/telegram/commands.ts +23 -28
package/src/frontend/telegram/helpers.ts +13 -15
package/src/frontend/telegram/index.ts +1 -1
package/src/frontend/terminal/commands.ts +7 -4
package/src/index.ts +2 -1
package/src/storage/chat-settings.ts +5 -19

package/README.md CHANGED Viewed

@@ -1,22 +1,28 @@
 # Talon
 [![Node.js](https://img.shields.io/badge/node-%3E%3D22-339933?logo=nodedotjs&logoColor=white)](https://nodejs.org)
-[![TypeScript](https://img.shields.io/badge/TypeScript-5.9-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
+[![TypeScript](https://img.shields.io/badge/TypeScript-6.0-3178C6?logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
 [![Claude](https://img.shields.io/badge/Claude_Agent_SDK-Anthropic-D97706)](https://github.com/anthropics/claude-agent-sdk-typescript)
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
 [![CI](https://github.com/dylanneve1/talon/actions/workflows/ci.yml/badge.svg)](https://github.com/dylanneve1/talon/actions/workflows/ci.yml)
-Multi-platform agentic AI harness powered by Claude. Runs on Telegram, Teams, and Terminal with full tool access through MCP.
+Multi-platform agentic AI harness powered by Claude. Runs on **Telegram**, **Teams**, and **Terminal** with full tool access through MCP.
+---
 ## Features
-- **Multi-frontend** — Telegram (Grammy), Teams (Bot Framework), Terminal (readline)
-- **Claude Agent SDK** — streaming responses, extended thinking, 1M context sessions
-- **31 MCP tools** — messaging, media, history, search, web, cron jobs, file system
-- **Plugin system** — extend with external tool packages (keeps core OSS-clean)
-- **Cron jobs** — persistent recurring tasks with full tool access
-- **Pulse** — periodic conversation-aware engagement in group chats
-- **Per-chat settings** — model, effort level, pulse toggle per conversation
+| | |
+|---|---|
+| **Multi-frontend** | Telegram (Grammy + GramJS userbot), Microsoft Teams (Bot Framework), Terminal with live tool visibility |
+| **Claude Agent SDK** | Streaming responses, extended thinking, adaptive effort, 1M token context, dynamic model discovery |
+| **MCP tools** | Messaging, media, history, search, web fetch, cron jobs, stickers, file system, admin controls |
+| **Plugins** | Hot-reloadable plugin system. Built-in: GitHub, MemPalace, Playwright, Brave Search |
+| **Background agents** | Heartbeat (periodic maintenance) and Dream (memory consolidation + diary) |
+| **Per-chat settings** | Model, effort level, and pulse toggle per conversation via inline keyboard |
+| **Model registry** | Models discovered from the SDK at startup --- new models appear in all pickers automatically |
+---
 ## Quick Start
@@ -24,39 +30,134 @@ Multi-platform agentic AI harness powered by Claude. Runs on Telegram, Teams, an
 git clone https://github.com/dylanneve1/talon.git && cd talon
 npm install
-# Interactive setup (select frontend, configure tokens)
+# Interactive setup (select frontend, configure tokens, pick model)
 npx talon setup
 # Start
-npx talon start       # configured frontend (Telegram/Terminal)
+npx talon start       # configured frontend (daemon mode)
 npx talon chat        # terminal chat mode
 ```
-Requires [Node.js 22+](https://nodejs.org/) and [Claude Code](https://docs.anthropic.com/en/docs/claude-code) installed and authenticated.
+**Prerequisites:**
+- [Node.js 22+](https://nodejs.org/)
+- [Claude Code](https://docs.anthropic.com/en/docs/claude-code) installed and authenticated (`claude` CLI on PATH)
+---
 ## Architecture
 ```
-index.ts (Composition Root)
-├── core/               Platform-agnostic core
-│   ├── gateway.ts      HTTP bridge for MCP tool calls
-│   ├── dispatcher.ts   Query queue + lifecycle
-│   ├── plugin.ts       Plugin loader + registry
-│   ├── pulse.ts        Periodic engagement
-│   └── cron.ts         Persistent scheduled jobs
-├── backend/
-│   ├── claude-sdk/     Claude Agent SDK + MCP subprocess
-│   └── opencode/       OpenCode SDK alternative
-├── frontend/
-│   ├── telegram/       Grammy + GramJS userbot
-│   ├── teams/          Bot Framework
-│   └── terminal/       Readline CLI with tool call visibility
-└── storage/            Sessions, history, settings, cron, media
+index.ts                    Composition root
+  |
+  +-- core/                 Platform-agnostic engine
+  |   +-- models.ts         Model registry (dynamic SDK discovery)
+  |   +-- gateway.ts        HTTP bridge for MCP tool calls
+  |   +-- dispatcher.ts     Per-chat serial, cross-chat parallel execution
+  |   +-- plugin.ts         Plugin loader, registry, hot-reload
+  |   +-- heartbeat.ts      Periodic background agent
+  |   +-- dream.ts          Memory consolidation agent
+  |   +-- pulse.ts          Conversation-aware group engagement
+  |   +-- cron.ts           Persistent scheduled jobs
+  |   +-- tools/            MCP tool definitions (13 files)
+  |
+  +-- backend/
+  |   +-- claude-sdk/       Claude Agent SDK (modular: handler, stream,
+  |   |                     options, state, warm, models, constants)
+  |   +-- opencode/         OpenCode SDK alternative backend
+  |
+  +-- frontend/
+  |   +-- telegram/         Grammy bot + GramJS userbot (10 files)
+  |   +-- teams/            Bot Framework + Graph API
+  |   +-- terminal/         Readline CLI with tool call visibility
+  |
+  +-- storage/              Sessions, history, chat settings,
+  |                         cron jobs, media index, daily logs
+  +-- util/                 Config, logging, workspace, paths, time
+```
+**Dependency rule:** `core/` imports nothing from `frontend/` or `backend/`. Frontends and backends depend on core types, never on each other.
+---
+## Built-in Plugins
+### GitHub
+GitHub API access via the official GitHub MCP server. Gives the agent access to repositories, issues, PRs, code search, and more.
+**Requirements:** Docker installed and running.
+```json
+{
+  "github": {
+    "enabled": true,
+    "token": "ghp_..."
+  }
+}
+```
+The token is optional --- defaults to the output of `gh auth token` if the GitHub CLI is authenticated.
+### MemPalace
+Structured long-term memory with vector search. The agent can store, search, and retrieve memories semantically. Integrates with Dream mode for automatic memory consolidation and personal diary entries.
+**Requirements:** Python 3.10+ with the `mempalace` package.
+```bash
+# Set up a Python environment
+python -m venv ~/.talon/mempalace-venv
+~/.talon/mempalace-venv/bin/pip install mempalace    # Unix
+# or: ~/.talon/mempalace-venv/Scripts/pip install mempalace   # Windows
+```
+```json
+{
+  "mempalace": {
+    "enabled": true,
+    "palacePath": "~/.talon/workspace/palace",
+    "pythonPath": "~/.talon/mempalace-venv/bin/python"
+  }
+}
 ```
-## Plugin System
+Both paths are optional --- defaults to `~/.talon/workspace/palace/` and the venv Python respectively.
+### Playwright
-Plugins add MCP tools and gateway actions without modifying core code. SOLID interface — only `name` is required, everything else is optional.
+Headless browser automation via the Playwright MCP server. The agent can browse websites, take screenshots, generate PDFs, fill forms, and scrape content.
+**Requirements:** None --- `@playwright/mcp` is bundled with Talon.
+```json
+{
+  "playwright": {
+    "enabled": true,
+    "browser": "chromium",
+    "headless": true
+  }
+}
+```
+Supported browsers: `chromium` (default), `chrome`, `firefox`, `webkit`, `msedge`.
+### Brave Search
+Web search via the Brave Search MCP server. Replaces the built-in WebSearch/WebFetch tools with higher-quality search results.
+```json
+{
+  "braveApiKey": "BSA..."
+}
+```
+Get an API key at [brave.com/search/api](https://brave.com/search/api/).
+---
+## Custom Plugins
+Plugins add MCP tools and gateway actions without modifying core code. SOLID interface --- only `name` is required.
 ```json
 {
@@ -80,59 +181,92 @@ export default {
 };
 ```
+Plugins support hot-reload via the `reload_plugins` MCP tool --- no restart required.
+---
 ## CLI
 ```
-talon setup     Interactive setup wizard (multi-select frontends)
-talon start     Start the configured frontend
+talon setup     Interactive setup wizard
+talon start     Start as a background daemon
+talon stop      Stop the daemon
 talon chat      Terminal chat mode (always available)
-talon status    Health, sessions, and plugin status
-talon config    View/edit configuration
+talon status    Health, sessions, plugins, disk usage
+talon config    View or edit configuration
 talon logs      Tail structured log file
-talon doctor    Validate environment
+talon doctor    Validate environment and dependencies
 ```
+---
 ## Configuration
-`workspace/talon.json`:
+Config file: `~/.talon/config.json`
 | Field | Default | Description |
 |-------|---------|-------------|
-| `frontend` | `"telegram"` | `"telegram"`, `"terminal"`, or both |
-| `botToken` | — | Telegram bot token (required for Telegram) |
-| `model` | `"claude-sonnet-4-6"` | Default model |
-| `concurrency` | `1` | Max concurrent AI queries |
+| `frontend` | `"telegram"` | `"telegram"`, `"terminal"`, `"teams"`, or an array |
+| `backend` | `"claude"` | `"claude"` or `"opencode"` |
+| `botToken` | --- | Telegram bot token |
+| `model` | `"claude-sonnet-4-6"` | Default model (discovered from SDK at startup) |
+| `concurrency` | `1` | Max concurrent AI queries (1--20) |
 | `pulse` | `true` | Periodic group engagement |
+| `heartbeat` | `false` | Background maintenance agent |
+| `heartbeatIntervalMinutes` | `60` | Heartbeat interval |
+| `braveApiKey` | --- | Brave Search API key |
+| `timezone` | --- | IANA timezone (e.g. `"Europe/London"`) |
 | `plugins` | `[]` | External plugin packages |
-| `adminUserId` | — | Telegram user ID for /admin |
-| `apiId` / `apiHash` | — | Telegram API for full history |
+| `adminUserId` | --- | Telegram user ID for `/admin` commands |
+| `allowedUsers` | --- | Whitelist of Telegram user IDs |
+| `apiId` / `apiHash` | --- | Telegram API credentials for full message history |
+| `github` | --- | GitHub plugin config (see above) |
+| `mempalace` | --- | MemPalace plugin config (see above) |
+| `playwright` | --- | Playwright plugin config (see above) |
+---
 ## Terminal Mode
 ```bash
-talon chat    # interactive terminal chat
+npx talon chat
 ```
-Tool calls shown in real-time with parameters. Streaming phase indicators (thinking/responding/using tools). Per-turn stats (duration, tokens, cache hit, tool count).
+Tool calls shown in real-time with parameters. Streaming phase indicators (thinking / responding / using tools). Per-turn stats: duration, tokens, cache hit rate, tool count.
+Commands: `/model`, `/effort`, `/reset`, `/status`, `/help`
+---
 ## Production
-- **Docker**: `docker compose up -d`
-- **Systemd**: `talon.service` included
-- **Health**: `GET http://localhost:19876/health` — JSON with uptime, memory, queue, sessions
-- **Logging**: Structured JSON via pino to `workspace/talon.log`
-- **Resilience**: Model fallback, session auto-retry, rate limiting, atomic writes, graceful shutdown
+**Docker:**
+```bash
+docker compose up -d
+```
+**Systemd:** `talon.service` included in the repository.
+**Health endpoint:** `GET http://localhost:19876/health` returns JSON with uptime, memory, queue depth, active sessions, and last activity timestamp.
+**Logging:** Structured JSON via pino to `~/.talon/talon.log`. Rotated on startup when the file exceeds 10MB.
+**Resilience:** Dynamic model fallback on overload, session auto-retry on expiry, rate limit handling with backoff, atomic file writes, graceful shutdown with 15-second drain timeout.
+---
 ## Development
 ```bash
 npm run dev              # watch mode
-npm test                 # 322 tests
-npm run test:coverage    # with coverage
+npm test                 # 1300+ tests
+npm run test:coverage    # with coverage report
 npm run typecheck        # tsc --noEmit
 npm run lint             # oxlint
 ```
+---
 ## License
 MIT

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "talon-agent",
-  "version": "1.5.0",
+  "version": "1.6.1",
   "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
   "author": "Dylan Neve",
   "license": "MIT",

package/src/__tests__/chat-settings.test.ts CHANGED Viewed

@@ -31,9 +31,13 @@ const {
   loadChatSettings,
   resolveModelName,
   EFFORT_LEVELS,
-  MODEL_ALIASES,
 } = await import("../storage/chat-settings.js");
+// Register Claude models (static — no SDK subprocess in tests)
+const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
+  await import("../backend/claude-sdk/models.js");
+registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
 describe("chat-settings", () => {
   describe("getChatSettings", () => {
     it("returns empty object for unknown chat", () => {
@@ -166,12 +170,21 @@ describe("chat-settings", () => {
     });
   });
-  describe("MODEL_ALIASES", () => {
-    it("contains all expected aliases", () => {
-      expect(Object.keys(MODEL_ALIASES).length).toBeGreaterThanOrEqual(9);
-      expect(MODEL_ALIASES.sonnet).toBe("claude-sonnet-4-6");
-      expect(MODEL_ALIASES.opus).toBe("claude-opus-4-6");
-      expect(MODEL_ALIASES.haiku).toBe("claude-haiku-4-5");
+  describe("model alias resolution (via registry)", () => {
+    it("resolves short aliases to full model IDs", () => {
+      expect(resolveModelName("sonnet")).toBe("claude-sonnet-4-6");
+      expect(resolveModelName("opus")).toBe("claude-opus-4-6");
+      expect(resolveModelName("haiku")).toBe("claude-haiku-4-5");
+    });
+    it("resolves versioned aliases", () => {
+      expect(resolveModelName("sonnet-4-6")).toBe("claude-sonnet-4-6");
+      expect(resolveModelName("opus-4.6")).toBe("claude-opus-4-6");
+      expect(resolveModelName("haiku-4.5")).toBe("claude-haiku-4-5");
+    });
+    it("passes through unknown names unchanged", () => {
+      expect(resolveModelName("gpt-4o")).toBe("gpt-4o");
     });
   });

package/src/__tests__/fuzz.test.ts CHANGED Viewed

@@ -49,6 +49,9 @@ const { classify, TalonError } = await import("../core/errors.js");
 await import("../storage/cron-store.js");
 const { handleSharedAction } = await import("../core/gateway-actions.js");
 const { resolveModelName } = await import("../storage/chat-settings.js");
+const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
+  await import("../backend/claude-sdk/models.js");
+registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
 const { Cron } = await import("croner");
 // ── Configuration ───────────────────────────────────────────────────────────

package/src/__tests__/reload-plugins.test.ts CHANGED Viewed

@@ -75,12 +75,14 @@ vi.mock("../util/config.js", () => ({
     ),
 }));
-vi.mock("../backend/claude-sdk/index.js", () => ({
+// Backend mock — passed as 3rd arg to handleSharedAction
+const mockBackend = {
+  query: vi.fn(),
   updateSystemPrompt: (...args: unknown[]) =>
     mockUpdateSystemPrompt(
       ...(args as Parameters<typeof mockUpdateSystemPrompt>),
     ),
-}));
+};
 // ── Import after mocks ────────────────────────────────────────────────────
@@ -105,6 +107,7 @@ describe("reload_plugins gateway action", () => {
     const result = await handleSharedAction(
       { action: "reload_plugins" },
       12345,
+      mockBackend,
     );
     expect(result).not.toBeNull();
     expect(result!.ok).toBe(true);
@@ -115,19 +118,19 @@ describe("reload_plugins gateway action", () => {
   });
   it("calls reloadPlugins without explicit frontends (derived from config)", async () => {
-    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
     // Gateway no longer passes frontends — reloadPlugins derives them from config
     expect(mockReloadPlugins).toHaveBeenCalledWith();
   });
   it("rebuilds system prompt after reloading", async () => {
-    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
     expect(mockRebuildSystemPrompt).toHaveBeenCalledTimes(1);
     expect(mockGetPluginPromptAdditions).toHaveBeenCalledTimes(1);
   });
   it("updates backend system prompt after rebuild", async () => {
-    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
     expect(mockUpdateSystemPrompt).toHaveBeenCalledTimes(1);
   });
@@ -138,6 +141,7 @@ describe("reload_plugins gateway action", () => {
     const result = await handleSharedAction(
       { action: "reload_plugins" },
       12345,
+      mockBackend,
     );
     expect(result).not.toBeNull();
     expect(result!.ok).toBe(false);
@@ -151,6 +155,7 @@ describe("reload_plugins gateway action", () => {
     const result = await handleSharedAction(
       { action: "reload_plugins" },
       12345,
+      mockBackend,
     );
     expect(result!.ok).toBe(false);
     expect(result!.error).toContain("Invalid JSON in config");
@@ -164,6 +169,7 @@ describe("reload_plugins gateway action", () => {
     const result = await handleSharedAction(
       { action: "reload_plugins" },
       12345,
+      mockBackend,
     );
     expect(result!.ok).toBe(true);
     expect(result!.text).toContain("(0)");

package/src/backend/claude-sdk/constants.ts ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * Shared constants for Claude SDK backend and background agents.
+ *
+ * Single source of truth for disallowed tool lists, thinking effort
+ * configuration, and streaming parameters.
+ */
+// ── Disallowed tool lists ──────────────────────────────────────────────────
+/**
+ * Core tools disallowed in all SDK query contexts (chat, heartbeat, dream).
+ * These are interactive or planning-only tools that make no sense in a
+ * headless agent context.
+ */
+export const DISALLOWED_TOOLS_CORE = [
+  "EnterPlanMode",
+  "ExitPlanMode",
+  "EnterWorktree",
+  "ExitWorktree",
+  "TodoWrite",
+  "TodoRead",
+  "TaskCreate",
+  "TaskUpdate",
+  "TaskGet",
+  "TaskList",
+  "TaskOutput",
+  "TaskStop",
+  "AskUserQuestion",
+] as const;
+/** Disallowed tools for the main chat handler (core + web tools replaced by Brave MCP). */
+export const DISALLOWED_TOOLS_CHAT = [
+  ...DISALLOWED_TOOLS_CORE,
+  "WebSearch",
+  "WebFetch",
+] as const;
+/** Disallowed tools for background agents — heartbeat and dream (core + Agent). */
+export const DISALLOWED_TOOLS_BACKGROUND = [
+  ...DISALLOWED_TOOLS_CORE,
+  "Agent",
+] as const;
+// ── Thinking / effort configuration ────────────────────────────────────────
+export const EFFORT_MAP: Record<
+  string,
+  {
+    thinking: { type: "adaptive" | "disabled" };
+    effort?: "low" | "medium" | "high" | "max";
+  }
+> = {
+  off: { thinking: { type: "disabled" } },
+  low: { thinking: { type: "adaptive" }, effort: "low" },
+  medium: { thinking: { type: "adaptive" }, effort: "medium" },
+  high: { thinking: { type: "adaptive" }, effort: "high" },
+  max: { thinking: { type: "adaptive" }, effort: "max" },
+};
+// ── Streaming ──────────────────────────────────────────────────────────────
+/** Minimum interval (ms) between streaming delta callbacks to avoid flooding frontends. */
+export const STREAM_INTERVAL = 1000;