npm - talon-agent - Versions diffs - 1.5.0 → 1.6.0 - Mend

talon-agent 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/package.json +1 -1
package/src/__tests__/chat-settings.test.ts +20 -7
package/src/__tests__/fuzz.test.ts +3 -0
package/src/__tests__/reload-plugins.test.ts +11 -5
package/src/backend/claude-sdk/constants.ts +63 -0
package/src/backend/claude-sdk/handler.ts +236 -0
package/src/backend/claude-sdk/index.ts +7 -556
package/src/backend/claude-sdk/models.ts +216 -0
package/src/backend/claude-sdk/options.ts +129 -0
package/src/backend/claude-sdk/state.ts +59 -0
package/src/backend/claude-sdk/stream.ts +221 -0
package/src/backend/claude-sdk/warm.ts +89 -0
package/src/bootstrap.ts +19 -5
package/src/cli.ts +30 -15
package/src/core/dream.ts +5 -17
package/src/core/gateway-actions.ts +3 -12
package/src/core/gateway.ts +5 -2
package/src/core/heartbeat.ts +4 -17
package/src/core/models.ts +149 -0
package/src/core/types.ts +4 -0
package/src/frontend/teams/index.ts +1 -3
package/src/frontend/telegram/callbacks.ts +15 -27
package/src/frontend/telegram/commands.ts +23 -28
package/src/frontend/telegram/helpers.ts +13 -15
package/src/frontend/telegram/index.ts +1 -1
package/src/frontend/terminal/commands.ts +7 -4
package/src/index.ts +2 -1
package/src/storage/chat-settings.ts +5 -19

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "talon-agent",
-  "version": "1.5.0",
+  "version": "1.6.0",
   "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
   "author": "Dylan Neve",
   "license": "MIT",

package/src/__tests__/chat-settings.test.ts CHANGED Viewed

@@ -31,9 +31,13 @@ const {
   loadChatSettings,
   resolveModelName,
   EFFORT_LEVELS,
-  MODEL_ALIASES,
 } = await import("../storage/chat-settings.js");
+// Register Claude models (static — no SDK subprocess in tests)
+const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
+  await import("../backend/claude-sdk/models.js");
+registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
 describe("chat-settings", () => {
   describe("getChatSettings", () => {
     it("returns empty object for unknown chat", () => {
@@ -166,12 +170,21 @@ describe("chat-settings", () => {
     });
   });
-  describe("MODEL_ALIASES", () => {
-    it("contains all expected aliases", () => {
-      expect(Object.keys(MODEL_ALIASES).length).toBeGreaterThanOrEqual(9);
-      expect(MODEL_ALIASES.sonnet).toBe("claude-sonnet-4-6");
-      expect(MODEL_ALIASES.opus).toBe("claude-opus-4-6");
-      expect(MODEL_ALIASES.haiku).toBe("claude-haiku-4-5");
+  describe("model alias resolution (via registry)", () => {
+    it("resolves short aliases to full model IDs", () => {
+      expect(resolveModelName("sonnet")).toBe("claude-sonnet-4-6");
+      expect(resolveModelName("opus")).toBe("claude-opus-4-6");
+      expect(resolveModelName("haiku")).toBe("claude-haiku-4-5");
+    });
+    it("resolves versioned aliases", () => {
+      expect(resolveModelName("sonnet-4-6")).toBe("claude-sonnet-4-6");
+      expect(resolveModelName("opus-4.6")).toBe("claude-opus-4-6");
+      expect(resolveModelName("haiku-4.5")).toBe("claude-haiku-4-5");
+    });
+    it("passes through unknown names unchanged", () => {
+      expect(resolveModelName("gpt-4o")).toBe("gpt-4o");
     });
   });

package/src/__tests__/fuzz.test.ts CHANGED Viewed

@@ -49,6 +49,9 @@ const { classify, TalonError } = await import("../core/errors.js");
 await import("../storage/cron-store.js");
 const { handleSharedAction } = await import("../core/gateway-actions.js");
 const { resolveModelName } = await import("../storage/chat-settings.js");
+const { registerClaudeModelsStatic, CLAUDE_MODELS_STATIC } =
+  await import("../backend/claude-sdk/models.js");
+registerClaudeModelsStatic(CLAUDE_MODELS_STATIC);
 const { Cron } = await import("croner");
 // ── Configuration ───────────────────────────────────────────────────────────

package/src/__tests__/reload-plugins.test.ts CHANGED Viewed

@@ -75,12 +75,14 @@ vi.mock("../util/config.js", () => ({
     ),
 }));
-vi.mock("../backend/claude-sdk/index.js", () => ({
+// Backend mock — passed as 3rd arg to handleSharedAction
+const mockBackend = {
+  query: vi.fn(),
   updateSystemPrompt: (...args: unknown[]) =>
     mockUpdateSystemPrompt(
       ...(args as Parameters<typeof mockUpdateSystemPrompt>),
     ),
-}));
+};
 // ── Import after mocks ────────────────────────────────────────────────────
@@ -105,6 +107,7 @@ describe("reload_plugins gateway action", () => {
     const result = await handleSharedAction(
       { action: "reload_plugins" },
       12345,
+      mockBackend,
     );
     expect(result).not.toBeNull();
     expect(result!.ok).toBe(true);
@@ -115,19 +118,19 @@ describe("reload_plugins gateway action", () => {
   });
   it("calls reloadPlugins without explicit frontends (derived from config)", async () => {
-    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
     // Gateway no longer passes frontends — reloadPlugins derives them from config
     expect(mockReloadPlugins).toHaveBeenCalledWith();
   });
   it("rebuilds system prompt after reloading", async () => {
-    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
     expect(mockRebuildSystemPrompt).toHaveBeenCalledTimes(1);
     expect(mockGetPluginPromptAdditions).toHaveBeenCalledTimes(1);
   });
   it("updates backend system prompt after rebuild", async () => {
-    await handleSharedAction({ action: "reload_plugins" }, 12345);
+    await handleSharedAction({ action: "reload_plugins" }, 12345, mockBackend);
     expect(mockUpdateSystemPrompt).toHaveBeenCalledTimes(1);
   });
@@ -138,6 +141,7 @@ describe("reload_plugins gateway action", () => {
     const result = await handleSharedAction(
       { action: "reload_plugins" },
       12345,
+      mockBackend,
     );
     expect(result).not.toBeNull();
     expect(result!.ok).toBe(false);
@@ -151,6 +155,7 @@ describe("reload_plugins gateway action", () => {
     const result = await handleSharedAction(
       { action: "reload_plugins" },
       12345,
+      mockBackend,
     );
     expect(result!.ok).toBe(false);
     expect(result!.error).toContain("Invalid JSON in config");
@@ -164,6 +169,7 @@ describe("reload_plugins gateway action", () => {
     const result = await handleSharedAction(
       { action: "reload_plugins" },
       12345,
+      mockBackend,
     );
     expect(result!.ok).toBe(true);
     expect(result!.text).toContain("(0)");

package/src/backend/claude-sdk/constants.ts ADDED Viewed

@@ -0,0 +1,63 @@
+/**
+ * Shared constants for Claude SDK backend and background agents.
+ *
+ * Single source of truth for disallowed tool lists, thinking effort
+ * configuration, and streaming parameters.
+ */
+// ── Disallowed tool lists ──────────────────────────────────────────────────
+/**
+ * Core tools disallowed in all SDK query contexts (chat, heartbeat, dream).
+ * These are interactive or planning-only tools that make no sense in a
+ * headless agent context.
+ */
+export const DISALLOWED_TOOLS_CORE = [
+  "EnterPlanMode",
+  "ExitPlanMode",
+  "EnterWorktree",
+  "ExitWorktree",
+  "TodoWrite",
+  "TodoRead",
+  "TaskCreate",
+  "TaskUpdate",
+  "TaskGet",
+  "TaskList",
+  "TaskOutput",
+  "TaskStop",
+  "AskUserQuestion",
+] as const;
+/** Disallowed tools for the main chat handler (core + web tools replaced by Brave MCP). */
+export const DISALLOWED_TOOLS_CHAT = [
+  ...DISALLOWED_TOOLS_CORE,
+  "WebSearch",
+  "WebFetch",
+] as const;
+/** Disallowed tools for background agents — heartbeat and dream (core + Agent). */
+export const DISALLOWED_TOOLS_BACKGROUND = [
+  ...DISALLOWED_TOOLS_CORE,
+  "Agent",
+] as const;
+// ── Thinking / effort configuration ────────────────────────────────────────
+export const EFFORT_MAP: Record<
+  string,
+  {
+    thinking: { type: "adaptive" | "disabled" };
+    effort?: "low" | "medium" | "high" | "max";
+  }
+> = {
+  off: { thinking: { type: "disabled" } },
+  low: { thinking: { type: "adaptive" }, effort: "low" },
+  medium: { thinking: { type: "adaptive" }, effort: "medium" },
+  high: { thinking: { type: "adaptive" }, effort: "high" },
+  max: { thinking: { type: "adaptive" }, effort: "max" },
+};
+// ── Streaming ──────────────────────────────────────────────────────────────
+/** Minimum interval (ms) between streaming delta callbacks to avoid flooding frontends. */
+export const STREAM_INTERVAL = 1000;

package/src/backend/claude-sdk/handler.ts ADDED Viewed

@@ -0,0 +1,236 @@
+/**
+ * Main message handler — executes a user query through the Claude Agent SDK.
+ *
+ * Orchestrates the full lifecycle: prompt formatting, SDK query, stream
+ * processing, error recovery (session expired / context overflow / model
+ * fallback), token accounting, and session persistence.
+ */
+import { query } from "@anthropic-ai/claude-agent-sdk";
+import {
+  getSession,
+  incrementTurns,
+  recordUsage,
+  resetSession,
+  setSessionId,
+  setSessionName,
+} from "../../storage/sessions.js";
+import { getChatSettings, setChatModel } from "../../storage/chat-settings.js";
+import { classify } from "../../core/errors.js";
+import { getFallbackModel } from "../../core/models.js";
+import { rebuildSystemPrompt } from "../../util/config.js";
+import { getPluginPromptAdditions } from "../../core/plugin.js";
+import { log, logError, logWarn } from "../../util/log.js";
+import { traceMessage } from "../../util/trace.js";
+import { formatFullDatetime } from "../../util/time.js";
+import type { QueryParams, QueryResult } from "../../core/types.js";
+import { getConfig } from "./state.js";
+import { buildSdkOptions } from "./options.js";
+import {
+  createStreamState,
+  isSystemInit,
+  isStreamEvent,
+  isAssistant,
+  isResult,
+  processStreamDelta,
+  processAssistantMessage,
+  processResultMessage,
+} from "./stream.js";
+// ── Main handler ─────────────────────────────────────────────────────────────
+export async function handleMessage(
+  params: QueryParams,
+  _retried = false,
+): Promise<QueryResult> {
+  const config = getConfig();
+  const {
+    chatId,
+    text,
+    senderName,
+    isGroup,
+    onTextBlock,
+    onStreamDelta,
+    onToolUse,
+  } = params;
+  const session = getSession(chatId);
+  const t0 = Date.now();
+  // Rebuild system prompt on first turn of a new/reset session so identity,
+  // memory, and workspace listing are fresh
+  if (session.turns === 0) {
+    rebuildSystemPrompt(config, getPluginPromptAdditions());
+  }
+  const { options, activeModel } = buildSdkOptions(chatId);
+  const msgIdHint = params.messageId ? ` [msg_id:${params.messageId}]` : "";
+  const nowTag = `[${formatFullDatetime()}]`;
+  const prompt = isGroup
+    ? `${nowTag} [${senderName}]${msgIdHint}: ${text}`
+    : `${nowTag}${msgIdHint} ${text}`;
+  log("agent", `[${chatId}] <- (${text.length} chars)`);
+  traceMessage(chatId, "in", text, { senderName, isGroup });
+  const qi = query({ prompt, options });
+  const state = createStreamState();
+  try {
+    for await (const message of qi) {
+      // Session ID capture
+      if (isSystemInit(message)) {
+        state.newSessionId = message.session_id;
+        continue;
+      }
+      // Stream text deltas and thinking deltas
+      if (isStreamEvent(message)) {
+        processStreamDelta(message, state, onStreamDelta);
+        continue;
+      }
+      // Complete assistant message — extract text blocks and tool calls
+      if (isAssistant(message)) {
+        const result = processAssistantMessage(message, state);
+        // Notify tool usage
+        for (const tool of result.tools) {
+          if (onToolUse) {
+            try {
+              onToolUse(tool.name, tool.input);
+            } catch {
+              /* non-fatal */
+            }
+          }
+        }
+        // Send progress text segments (text before each tool call) in order
+        if (onTextBlock) {
+          for (const text of result.progressTexts) {
+            try {
+              await onTextBlock(text);
+            } catch {
+              /* non-fatal — don't abort the stream loop */
+            }
+          }
+        }
+        continue;
+      }
+      // Final result — read token counts and context info
+      if (isResult(message)) {
+        processResultMessage(message, state);
+      }
+    }
+  } catch (err) {
+    const classified = classify(err);
+    // Session expired — reset and retry once
+    if (classified.reason === "session_expired" && !_retried) {
+      logWarn(
+        "agent",
+        `[${chatId}] Stale session, retrying with fresh session`,
+      );
+      resetSession(chatId);
+      return handleMessage(params, true);
+    }
+    // Context length exceeded — safety net for edge cases where SDK
+    // auto-compaction doesn't prevent overflow
+    if (classified.reason === "context_length" && !_retried) {
+      logWarn(
+        "agent",
+        `[${chatId}] Context length exceeded, resetting session and retrying`,
+      );
+      resetSession(chatId);
+      return handleMessage(params, true);
+    }
+    // Model fallback: if overloaded/timeout, retry with the next-tier model
+    if (!_retried && classified.retryable) {
+      const fallback = getFallbackModel(activeModel);
+      if (fallback) {
+        logWarn(
+          "agent",
+          `[${chatId}] ${classified.reason}, falling back to ${fallback.replace("claude-", "")}`,
+        );
+        resetSession(chatId);
+        const originalModel = getChatSettings(chatId).model;
+        setChatModel(chatId, fallback);
+        try {
+          return await handleMessage(params, true);
+        } finally {
+          setChatModel(chatId, originalModel);
+        }
+      }
+    }
+    logError("agent", `[${chatId}] SDK error: ${classified.message}`);
+    throw classified;
+  }
+  // ── Persist session and usage ─────────────────────────────────────────────
+  const durationMs = Date.now() - t0;
+  if (state.newSessionId) setSessionId(chatId, state.newSessionId);
+  incrementTurns(chatId);
+  recordUsage(chatId, {
+    inputTokens: state.sdkInputTokens,
+    outputTokens: state.sdkOutputTokens,
+    cacheRead: state.sdkCacheRead,
+    cacheWrite: state.sdkCacheWrite,
+    durationMs,
+    model: activeModel,
+    contextTokens: state.contextTokens,
+    contextWindow: state.contextWindow,
+    numApiCalls: state.numApiCalls,
+  });
+  // Set a descriptive session name from the first message
+  if (session.turns === 0 && text) {
+    const cleanText = text
+      .replace(/^\[.*?\]\s*/g, "")
+      .replace(/\[msg_id:\d+\]\s*/g, "")
+      .trim();
+    if (cleanText) {
+      const name =
+        cleanText.length > 30 ? cleanText.slice(0, 30) + "..." : cleanText;
+      setSessionName(chatId, name);
+    }
+  }
+  // ── Build result ──────────────────────────────────────────────────────────
+  state.allResponseText += state.currentBlockText;
+  const totalPrompt =
+    state.sdkInputTokens + state.sdkCacheRead + state.sdkCacheWrite;
+  const cacheHitPct =
+    totalPrompt > 0 ? Math.round((state.sdkCacheRead / totalPrompt) * 100) : 0;
+  log(
+    "agent",
+    `[${chatId}] -> (${durationMs}ms, in=${state.sdkInputTokens} out=${state.sdkOutputTokens} cache=${cacheHitPct}%` +
+      `${state.toolCalls > 0 ? ` tools=${state.toolCalls}` : ""})`,
+  );
+  traceMessage(chatId, "out", state.allResponseText, {
+    durationMs,
+    inputTokens: state.sdkInputTokens,
+    outputTokens: state.sdkOutputTokens,
+    cacheRead: state.sdkCacheRead,
+    cacheWrite: state.sdkCacheWrite,
+    toolCalls: state.toolCalls,
+    model: activeModel,
+  });
+  return {
+    text: state.allResponseText.trim(),
+    durationMs,
+    inputTokens: state.sdkInputTokens,
+    outputTokens: state.sdkOutputTokens,
+    cacheRead: state.sdkCacheRead,
+    cacheWrite: state.sdkCacheWrite,
+  };
+}