npm - talon-agent - Versions diffs - 1.4.0 → 1.5.0 - Mend

talon-agent 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/package.json +2 -2
package/prompts/heartbeat.md +18 -6
package/src/__tests__/heartbeat.test.ts +21 -0
package/src/__tests__/reload-plugins.test.ts +199 -0
package/src/__tests__/sessions.test.ts +155 -121
package/src/backend/claude-sdk/index.ts +198 -62
package/src/bootstrap.ts +3 -103
package/src/core/gateway-actions.ts +42 -1
package/src/core/heartbeat.ts +8 -5
package/src/core/plugin.ts +147 -0
package/src/core/tools/admin.ts +22 -0
package/src/core/tools/index.ts +2 -0
package/src/core/tools/types.ts +2 -1
package/src/frontend/teams/index.ts +9 -10
package/src/frontend/telegram/commands.ts +11 -10
package/src/storage/sessions.ts +34 -40

package/src/backend/claude-sdk/index.ts CHANGED Viewed

@@ -41,34 +41,15 @@ export function initAgent(
   delete process.env.CLAUDECODE;
 }
-// ── Main handler ─────────────────────────────────────────────────────────────
-export async function handleMessage(
-  params: QueryParams,
-  _retried = false,
-): Promise<QueryResult> {
-  if (!config)
-    throw new Error("Agent not initialized. Call initAgent() first.");
+/** Update the system prompt on the live config. Used by plugin hot-reload
+ *  so the next message picks up new plugin tool descriptions. */
+export function updateSystemPrompt(prompt: string): void {
+  if (config) config.systemPrompt = prompt;
+}
-  const {
-    chatId,
-    text,
-    senderName,
-    isGroup,
-    onTextBlock,
-    onStreamDelta,
-    onToolUse,
-  } = params;
-  const session = getSession(chatId);
-  const t0 = Date.now();
+// ── Shared options builder ───────────────────────────────────────────────────
-  // Rebuild system prompt on first turn of a new/reset session so identity,
-  // memory, and workspace listing are fresh
-  if (session.turns === 0) {
-    rebuildSystemPrompt(config, getPluginPromptAdditions());
-  }
-  // Per-chat settings override global config
+function buildSdkOptions(chatId: string) {
   const chatSettings = getChatSettings(chatId);
   const activeModel = chatSettings.model ?? config.model;
   const activeEffort = chatSettings.effort ?? "adaptive";
@@ -90,13 +71,18 @@ export async function handleMessage(
     thinking: { type: "adaptive" as const },
   };
+  const supports1m =
+    !activeModel.includes("haiku") && !activeModel.includes("[1m]");
+  const sdkModel = supports1m ? `${activeModel}[1m]` : activeModel;
+  const session = getSession(chatId);
   const options = {
-    model: activeModel,
+    model: sdkModel,
     systemPrompt: config.systemPrompt,
     cwd: config.workspace,
     permissionMode: "bypassPermissions" as const,
     allowDangerouslySkipPermissions: true,
-    betas: ["context-1m-2025-08-07"],
     ...(config.claudeBinary
       ? { pathToClaudeCodeExecutable: config.claudeBinary }
       : {}),
@@ -114,16 +100,11 @@ export async function handleMessage(
       "TaskOutput",
       "TaskStop",
       "AskUserQuestion",
-      // Always disable Claude Code built-in web tools — fetch_url is always
-      // available, and Brave Search MCP replaces WebSearch when configured.
       "WebSearch",
       "WebFetch",
     ],
     ...thinkingConfig,
     mcpServers: {
-      // Register unified MCP tools server — one per messaging frontend.
-      // Terminal frontend relies on Claude Code built-in tools (Read, Write,
-      // Bash, etc.) and doesn't need a custom MCP tools server.
       ...(() => {
         const allFrontends = Array.isArray(config.frontend)
           ? config.frontend
@@ -134,12 +115,10 @@ export async function handleMessage(
           string,
           { command: string; args: string[]; env: Record<string, string> }
         > = {};
-        // Resolve tsx from the package root (3 levels up from src/backend/claude-sdk/)
         const tsxImport = resolve(
           import.meta.dirname ?? ".",
           "../../../node_modules/tsx/dist/esm/index.mjs",
         );
-        // Unified MCP server in core/tools/
         const mcpServerPath = resolve(
           import.meta.dirname ?? ".",
           "../../core/tools/mcp-server.ts",
@@ -163,7 +142,6 @@ export async function handleMessage(
         }
         return servers;
       })(),
-      // Brave Search MCP server — provides brave_web_search and brave_local_search
       ...(config.braveApiKey
         ? {
             "brave-search": {
@@ -181,6 +159,107 @@ export async function handleMessage(
     ...(session.sessionId ? { resume: session.sessionId } : {}),
   };
+  return { options, activeModel, session };
+}
+// ── Session warm-up ─────────────────────────────────────────────────────────
+/**
+ * Cold-start a session by spawning an SDK subprocess in streaming input mode,
+ * calling getContextUsage() to populate contextWindow and baseline contextTokens,
+ * then tearing it down. Fire-and-forget — does not block the caller.
+ */
+export async function warmSession(chatId: string): Promise<void> {
+  if (!config) return;
+  const abort = new AbortController();
+  try {
+    rebuildSystemPrompt(config, getPluginPromptAdditions());
+    const { options } = buildSdkOptions(chatId);
+    // Streaming input mode: pass an async iterable that never yields a user message
+    const neverYield = async function* (): AsyncGenerator<never> {
+      await new Promise<never>((_, reject) => {
+        abort.signal.addEventListener("abort", () =>
+          reject(new Error("aborted")),
+        );
+      });
+    };
+    const q = query({
+      prompt: neverYield(),
+      options: {
+        ...options,
+        abortController: abort,
+      } as Parameters<typeof query>[0]["options"],
+    });
+    // Drain the stream in the background so the SDK's internal message loop
+    // doesn't stall — control responses are processed in readMessages() which
+    // needs the inputStream consumer to not back-pressure.
+    const drainPromise = (async () => {
+      try {
+        for await (const _ of q) {
+          // discard SDK messages; we only care about the control response
+        }
+      } catch {
+        // expected: abort causes the stream to end with an error
+      }
+    })();
+    // Race getContextUsage against a timeout so /reset doesn't hang
+    const timeout = new Promise<never>((_, reject) =>
+      setTimeout(() => reject(new Error("warm-up timed out")), 15_000),
+    );
+    const ctx = await Promise.race([q.getContextUsage(), timeout]);
+    const session = getSession(chatId);
+    if (ctx.maxTokens > 0) session.usage.contextWindow = ctx.maxTokens;
+    if (ctx.totalTokens > 0) session.usage.contextTokens = ctx.totalTokens;
+    log(
+      "agent",
+      `[${chatId}] warm-up: context ${ctx.totalTokens}/${ctx.maxTokens} (${ctx.percentage.toFixed(1)}%) model=${ctx.model}`,
+    );
+    abort.abort();
+    await drainPromise;
+  } catch (err) {
+    abort.abort();
+    // Non-fatal — /status will just show 0 until first real message
+    logWarn(
+      "agent",
+      `[${chatId}] warm-up failed: ${err instanceof Error ? err.message : err}`,
+    );
+  }
+}
+// ── Main handler ─────────────────────────────────────────────────────────────
+export async function handleMessage(
+  params: QueryParams,
+  _retried = false,
+): Promise<QueryResult> {
+  if (!config)
+    throw new Error("Agent not initialized. Call initAgent() first.");
+  const {
+    chatId,
+    text,
+    senderName,
+    isGroup,
+    onTextBlock,
+    onStreamDelta,
+    onToolUse,
+  } = params;
+  const session = getSession(chatId);
+  const t0 = Date.now();
+  // Rebuild system prompt on first turn of a new/reset session so identity,
+  // memory, and workspace listing are fresh
+  if (session.turns === 0) {
+    rebuildSystemPrompt(config, getPluginPromptAdditions());
+  }
+  const { options, activeModel } = buildSdkOptions(chatId);
   const msgIdHint = params.messageId ? ` [msg_id:${params.messageId}]` : "";
   const nowTag = `[${formatFullDatetime()}]`;
@@ -199,11 +278,16 @@ export async function handleMessage(
   let currentBlockText = "";
   let allResponseText = "";
   let newSessionId: string | undefined;
-  let inputTokens = 0;
-  let outputTokens = 0;
-  let cacheRead = 0;
-  let cacheWrite = 0;
   let toolCalls = 0;
+  // Populated from SDK result message
+  let contextTokens = 0; // actual context fill from last iteration
+  let contextWindow: number | undefined;
+  let numApiCalls = 0;
+  // Cumulative token counts from SDK modelUsage (aggregated across models)
+  let sdkInputTokens = 0;
+  let sdkOutputTokens = 0;
+  let sdkCacheRead = 0;
+  let sdkCacheWrite = 0;
   // Streaming throttle
   let lastStreamUpdate = 0;
@@ -298,15 +382,64 @@ export async function handleMessage(
         }
       }
-      // Final result
+      // Final result — read all data from SDK result fields
       if (type === "result") {
-        const usage = msg.usage as Record<string, number> | undefined;
-        if (usage) {
-          inputTokens = usage.input_tokens ?? 0;
-          outputTokens = usage.output_tokens ?? 0;
-          cacheRead = usage.cache_read_input_tokens ?? 0;
-          cacheWrite = usage.cache_creation_input_tokens ?? 0;
+        numApiCalls =
+          ((msg as Record<string, unknown>).num_turns as number) ?? 0;
+        // Context fill from last API iteration (only available in raw usage)
+        const usage = msg.usage as
+          | {
+              iterations?: Array<{
+                input_tokens: number;
+                cache_read_input_tokens: number;
+                cache_creation_input_tokens: number;
+              }>;
+            }
+          | undefined;
+        if (
+          usage &&
+          Array.isArray(usage.iterations) &&
+          usage.iterations.length > 0
+        ) {
+          const last = usage.iterations[usage.iterations.length - 1];
+          contextTokens =
+            (last.input_tokens ?? 0) +
+            (last.cache_read_input_tokens ?? 0) +
+            (last.cache_creation_input_tokens ?? 0);
+        }
+        // Token counts, context window from SDK modelUsage (aggregated per model)
+        type MU = {
+          inputTokens?: number;
+          outputTokens?: number;
+          cacheReadInputTokens?: number;
+          cacheCreationInputTokens?: number;
+          contextWindow?: number;
+        };
+        const modelUsage = (msg as Record<string, unknown>).modelUsage as
+          | Record<string, MU>
+          | undefined;
+        if (modelUsage) {
+          for (const mu of Object.values(modelUsage)) {
+            sdkInputTokens += mu.inputTokens ?? 0;
+            sdkOutputTokens += mu.outputTokens ?? 0;
+            sdkCacheRead += mu.cacheReadInputTokens ?? 0;
+            sdkCacheWrite += mu.cacheCreationInputTokens ?? 0;
+            if (
+              mu.contextWindow &&
+              mu.contextWindow > 0 &&
+              contextWindow === undefined
+            ) {
+              contextWindow = mu.contextWindow;
+            }
+          }
         }
+        log(
+          "agent",
+          `SDK result: modelUsage=${JSON.stringify(modelUsage)}, contextWindow=${contextWindow}, contextTokens=${contextTokens}, numApiCalls=${numApiCalls}`,
+        );
         // If we still have unsent text and no streaming captured it
         if (
           !allResponseText &&
@@ -368,12 +501,15 @@ export async function handleMessage(
   if (newSessionId) setSessionId(chatId, newSessionId);
   incrementTurns(chatId);
   recordUsage(chatId, {
-    inputTokens,
-    outputTokens,
-    cacheRead,
-    cacheWrite,
+    inputTokens: sdkInputTokens,
+    outputTokens: sdkOutputTokens,
+    cacheRead: sdkCacheRead,
+    cacheWrite: sdkCacheWrite,
     durationMs,
     model: activeModel,
+    contextTokens,
+    contextWindow,
+    numApiCalls,
   });
   // Set a descriptive session name from the first message
@@ -393,21 +529,21 @@ export async function handleMessage(
   // The remaining currentBlockText is the final response text
   allResponseText += currentBlockText;
-  const totalPrompt = inputTokens + cacheRead + cacheWrite;
+  const totalPrompt = sdkInputTokens + sdkCacheRead + sdkCacheWrite;
   const cacheHitPct =
-    totalPrompt > 0 ? Math.round((cacheRead / totalPrompt) * 100) : 0;
+    totalPrompt > 0 ? Math.round((sdkCacheRead / totalPrompt) * 100) : 0;
   log(
     "agent",
-    `[${chatId}] -> (${durationMs}ms, in=${inputTokens} out=${outputTokens} cache=${cacheHitPct}%` +
+    `[${chatId}] -> (${durationMs}ms, in=${sdkInputTokens} out=${sdkOutputTokens} cache=${cacheHitPct}%` +
       `${toolCalls > 0 ? ` tools=${toolCalls}` : ""})`,
   );
   traceMessage(chatId, "out", allResponseText, {
     durationMs,
-    inputTokens,
-    outputTokens,
-    cacheRead,
-    cacheWrite,
+    inputTokens: sdkInputTokens,
+    outputTokens: sdkOutputTokens,
+    cacheRead: sdkCacheRead,
+    cacheWrite: sdkCacheWrite,
     toolCalls,
     model: activeModel,
   });
@@ -415,9 +551,9 @@ export async function handleMessage(
   return {
     text: allResponseText.trim(),
     durationMs,
-    inputTokens,
-    outputTokens,
-    cacheRead,
-    cacheWrite,
+    inputTokens: sdkInputTokens,
+    outputTokens: sdkOutputTokens,
+    cacheRead: sdkCacheRead,
+    cacheWrite: sdkCacheWrite,
   };
 }

package/src/bootstrap.ts CHANGED Viewed

@@ -65,7 +65,7 @@ export async function bootstrap(
     config.mempalace?.enabled === true ||
     config.playwright?.enabled === true;
   if (hasPlugins) {
-    const { loadPlugins, getPluginPromptAdditions, registerPlugin } =
+    const { loadPlugins, loadBuiltinPlugins, getPluginPromptAdditions } =
       await import("./core/plugin.js");
     // External plugins
@@ -76,108 +76,8 @@ export async function bootstrap(
       await loadPlugins(config.plugins, frontends);
     }
-    // Built-in: GitHub
-    if (config.github?.enabled) {
-      const { createGitHubPlugin } = await import("./plugins/github/index.js");
-      const { getPlugin } = await import("./core/plugin.js");
-      const githubConfig = config.github as unknown as Record<string, unknown>;
-      const gh = createGitHubPlugin({ token: config.github.token });
-      registerPlugin(gh, githubConfig);
-      if (getPlugin("github")) {
-        try {
-          const GITHUB_INIT_TIMEOUT_MS = 15_000;
-          await Promise.race([
-            gh.init?.(githubConfig),
-            new Promise((_, reject) =>
-              setTimeout(
-                () => reject(new Error("GitHub init timed out after 15s")),
-                GITHUB_INIT_TIMEOUT_MS,
-              ),
-            ),
-          ]);
-        } catch (err) {
-          log(
-            "github",
-            `Init warning: ${err instanceof Error ? err.message : err}`,
-          );
-        }
-      }
-    }
-    // Built-in: MemPalace
-    if (config.mempalace?.enabled) {
-      const { createMempalacePlugin } =
-        await import("./plugins/mempalace/index.js");
-      const { getPlugin } = await import("./core/plugin.js");
-      const { dirs, files: pathFiles } = await import("./util/paths.js");
-      const pythonPath =
-        config.mempalace.pythonPath ?? pathFiles.mempalacePython;
-      const palacePath = config.mempalace.palacePath ?? dirs.palace;
-      const mempalaceConfig = config.mempalace as unknown as Record<
-        string,
-        unknown
-      >;
-      const mp = createMempalacePlugin({ pythonPath, palacePath });
-      registerPlugin(mp, mempalaceConfig);
-      // Only call init if registration succeeded (validation passed)
-      if (getPlugin("mempalace")) {
-        try {
-          const MEMPALACE_INIT_TIMEOUT_MS = 30_000;
-          await Promise.race([
-            mp.init?.(mempalaceConfig),
-            new Promise((_, reject) =>
-              setTimeout(
-                () => reject(new Error("MemPalace init timed out after 30s")),
-                MEMPALACE_INIT_TIMEOUT_MS,
-              ),
-            ),
-          ]);
-        } catch (err) {
-          log(
-            "mempalace",
-            `Init warning: ${err instanceof Error ? err.message : err}`,
-          );
-        }
-      }
-    }
-    // Built-in: Playwright
-    if (config.playwright?.enabled) {
-      const { createPlaywrightPlugin } =
-        await import("./plugins/playwright/index.js");
-      const { getPlugin } = await import("./core/plugin.js");
-      const playwrightConfig = config.playwright as unknown as Record<
-        string,
-        unknown
-      >;
-      const pw = createPlaywrightPlugin({
-        browser: config.playwright.browser,
-        headless: config.playwright.headless,
-      });
-      registerPlugin(pw, playwrightConfig);
-      if (getPlugin("playwright")) {
-        try {
-          const PW_INIT_TIMEOUT_MS = 15_000;
-          await Promise.race([
-            pw.init?.(playwrightConfig),
-            new Promise((_, reject) =>
-              setTimeout(
-                () => reject(new Error("Playwright init timed out after 15s")),
-                PW_INIT_TIMEOUT_MS,
-              ),
-            ),
-          ]);
-        } catch (err) {
-          log(
-            "playwright",
-            `Init warning: ${err instanceof Error ? err.message : err}`,
-          );
-        }
-      }
-    }
+    // Built-in plugins (GitHub, MemPalace, Playwright) — shared with hot-reload
+    await loadBuiltinPlugins(config);
     rebuildSystemPrompt(config, getPluginPromptAdditions());
   }

package/src/core/gateway-actions.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * Shared gateway actions — platform-agnostic handlers that work with any frontend.
  *
- * Handles: cron CRUD, fetch_url, in-memory history queries.
+ * Handles: cron CRUD, fetch_url, plugin reload, in-memory history queries.
  * Returns null if the action isn't recognized (so the gateway delegates to the frontend).
  */
@@ -310,6 +310,47 @@ export async function handleSharedAction(
       return { ok: true, text: `Deleted cron job "${job.name}" (${jobId})` };
     }
+    // ── Plugin hot-reload ──────────────────────────────────────────────
+    case "reload_plugins": {
+      try {
+        const { reloadPlugins, getPluginPromptAdditions } =
+          await import("./plugin.js");
+        const { rebuildSystemPrompt } = await import("../util/config.js");
+        // reloadPlugins reads + validates config internally — no double read.
+        // Frontends are derived from config if not explicitly provided.
+        const { names, config: freshConfig } = await reloadPlugins();
+        // Rebuild system prompt on the freshConfig, then update the backend's
+        // live config reference so subsequent messages use the new prompt
+        rebuildSystemPrompt(freshConfig, getPluginPromptAdditions());
+        try {
+          const { updateSystemPrompt } =
+            await import("../backend/claude-sdk/index.js");
+          updateSystemPrompt(freshConfig.systemPrompt);
+        } catch (err) {
+          // Non-fatal — OpenCode backend doesn't expose updateSystemPrompt
+          log(
+            "gateway",
+            `reload_plugins: could not update backend prompt: ${err instanceof Error ? err.message : err}`,
+          );
+        }
+        log("gateway", `reload_plugins: ${names.length} plugins loaded`);
+        return {
+          ok: true,
+          text:
+            `Plugins reloaded successfully.\n` +
+            `Loaded (${names.length}): ${names.length > 0 ? names.join(", ") : "(none)"}`,
+        };
+      } catch (err) {
+        return {
+          ok: false,
+          error: `Plugin reload failed: ${err instanceof Error ? err.message : err}`,
+        };
+      }
+    }
     default:
       return null; // not a shared action — delegate to frontend
   }

package/src/core/heartbeat.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  *
  * Runs at a configurable interval (default: 60 minutes).
  * The agent reads instructions from ~/.talon/workspace/heartbeat-instructions.md
- * and executes them using filesystem-only tools (no Telegram/MCP access).
+ * and executes them using filesystem tools and all loaded MCP plugins.
  *
  * Modeled after dream.ts but more general-purpose.
  */
@@ -17,6 +17,7 @@ import type { SDKMessage } from "@anthropic-ai/claude-agent-sdk";
 import { files as pathFiles, dirs } from "../util/paths.js";
 import { log, logError, logWarn } from "../util/log.js";
 import { toYMD } from "../util/time.js";
+import { getPluginMcpServers } from "./plugin.js";
 // ── Types ────────────────────────────────────────────────────────────────────
@@ -282,15 +283,15 @@ async function runHeartbeatAgent(
   const options = {
     model,
     systemPrompt:
-      "You are a background heartbeat agent for Talon. Use only filesystem tools. Follow the user-defined instructions precisely. Be efficient — you have limited time.",
+      "You are a background heartbeat agent for Talon. You have access to filesystem tools and all registered MCP plugins. Follow the user-defined instructions precisely. Be efficient — you have limited time.",
     cwd: workspace,
     permissionMode: "bypassPermissions" as const,
     allowDangerouslySkipPermissions: true,
     ...(configRef.claudeBinary
       ? { pathToClaudeCodeExecutable: configRef.claudeBinary }
       : {}),
-    // No MCP servers — filesystem tools only
-    mcpServers: {},
+    // Load all registered plugin MCP servers (excludes frontend-specific tools like telegram)
+    mcpServers: getPluginMcpServers("", "heartbeat"),
     disallowedTools: [
       "EnterPlanMode",
       "ExitPlanMode",
@@ -315,10 +316,12 @@ async function runHeartbeatAgent(
   // running lock is not released while the subprocess is still active.
   let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
   const timeoutPromise = new Promise<never>((_, reject) => {
-    timeoutHandle = setTimeout(
+    const t = setTimeout(
       () => reject(new Error("Heartbeat agent timed out")),
       HEARTBEAT_TIMEOUT_MS,
     );
+    t.unref(); // Don't prevent Node.js from exiting cleanly during shutdown
+    timeoutHandle = t;
   });
   const agentPromise = (async () => {