npm - talon-agent - Versions diffs - 1.5.0 → 1.6.0 - Mend

talon-agent 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/package.json +1 -1
package/src/__tests__/chat-settings.test.ts +20 -7
package/src/__tests__/fuzz.test.ts +3 -0
package/src/__tests__/reload-plugins.test.ts +11 -5
package/src/backend/claude-sdk/constants.ts +63 -0
package/src/backend/claude-sdk/handler.ts +236 -0
package/src/backend/claude-sdk/index.ts +7 -556
package/src/backend/claude-sdk/models.ts +216 -0
package/src/backend/claude-sdk/options.ts +129 -0
package/src/backend/claude-sdk/state.ts +59 -0
package/src/backend/claude-sdk/stream.ts +221 -0
package/src/backend/claude-sdk/warm.ts +89 -0
package/src/bootstrap.ts +19 -5
package/src/cli.ts +30 -15
package/src/core/dream.ts +5 -17
package/src/core/gateway-actions.ts +3 -12
package/src/core/gateway.ts +5 -2
package/src/core/heartbeat.ts +4 -17
package/src/core/models.ts +149 -0
package/src/core/types.ts +4 -0
package/src/frontend/teams/index.ts +1 -3
package/src/frontend/telegram/callbacks.ts +15 -27
package/src/frontend/telegram/commands.ts +23 -28
package/src/frontend/telegram/helpers.ts +13 -15
package/src/frontend/telegram/index.ts +1 -1
package/src/frontend/terminal/commands.ts +7 -4
package/src/index.ts +2 -1
package/src/storage/chat-settings.ts +5 -19

package/src/backend/claude-sdk/index.ts CHANGED Viewed

@@ -1,559 +1,10 @@
-import { query } from "@anthropic-ai/claude-agent-sdk";
-import type { TalonConfig } from "../../util/config.js";
-import {
-  getSession,
-  incrementTurns,
-  recordUsage,
-  resetSession,
-  setSessionId,
-  setSessionName,
-} from "../../storage/sessions.js";
-import { getChatSettings, setChatModel } from "../../storage/chat-settings.js";
-import { resolve } from "node:path";
-import { classify } from "../../core/errors.js";
-import {
-  getPluginMcpServers,
-  getPluginPromptAdditions,
-} from "../../core/plugin.js";
-import { rebuildSystemPrompt } from "../../util/config.js";
-import { log, logError, logWarn } from "../../util/log.js";
-import { traceMessage } from "../../util/trace.js";
-import { formatFullDatetime } from "../../util/time.js";
-import type { QueryParams, QueryResult } from "../../core/types.js";
-// ── State ────────────────────────────────────────────────────────────────────
-let config: TalonConfig;
-let bridgePortFn: () => number = () => 19876;
-export function initAgent(
-  cfg: TalonConfig,
-  getBridgePort?: () => number,
-): void {
-  config = cfg;
-  if (getBridgePort) bridgePortFn = getBridgePort;
-  // The Agent SDK spawns an embedded Claude Code subprocess.
-  // If CLAUDECODE is set (e.g. running from a Claude Code terminal),
-  // the subprocess refuses to start with a nested-session error that
-  // gets swallowed — causing an infinite hang on Windows.
-  delete process.env.CLAUDECODE;
-}
-/** Update the system prompt on the live config. Used by plugin hot-reload
- *  so the next message picks up new plugin tool descriptions. */
-export function updateSystemPrompt(prompt: string): void {
-  if (config) config.systemPrompt = prompt;
-}
-// ── Shared options builder ───────────────────────────────────────────────────
-function buildSdkOptions(chatId: string) {
-  const chatSettings = getChatSettings(chatId);
-  const activeModel = chatSettings.model ?? config.model;
-  const activeEffort = chatSettings.effort ?? "adaptive";
-  const EFFORT_MAP: Record<
-    string,
-    {
-      thinking: { type: "adaptive" | "disabled" };
-      effort?: "low" | "medium" | "high" | "max";
-    }
-  > = {
-    off: { thinking: { type: "disabled" } },
-    low: { thinking: { type: "adaptive" }, effort: "low" },
-    medium: { thinking: { type: "adaptive" }, effort: "medium" },
-    high: { thinking: { type: "adaptive" }, effort: "high" },
-    max: { thinking: { type: "adaptive" }, effort: "max" },
-  };
-  const thinkingConfig = EFFORT_MAP[activeEffort] ?? {
-    thinking: { type: "adaptive" as const },
-  };
-  const supports1m =
-    !activeModel.includes("haiku") && !activeModel.includes("[1m]");
-  const sdkModel = supports1m ? `${activeModel}[1m]` : activeModel;
-  const session = getSession(chatId);
-  const options = {
-    model: sdkModel,
-    systemPrompt: config.systemPrompt,
-    cwd: config.workspace,
-    permissionMode: "bypassPermissions" as const,
-    allowDangerouslySkipPermissions: true,
-    ...(config.claudeBinary
-      ? { pathToClaudeCodeExecutable: config.claudeBinary }
-      : {}),
-    disallowedTools: [
-      "EnterPlanMode",
-      "ExitPlanMode",
-      "EnterWorktree",
-      "ExitWorktree",
-      "TodoWrite",
-      "TodoRead",
-      "TaskCreate",
-      "TaskUpdate",
-      "TaskGet",
-      "TaskList",
-      "TaskOutput",
-      "TaskStop",
-      "AskUserQuestion",
-      "WebSearch",
-      "WebFetch",
-    ],
-    ...thinkingConfig,
-    mcpServers: {
-      ...(() => {
-        const allFrontends = Array.isArray(config.frontend)
-          ? config.frontend
-          : [config.frontend];
-        const frontends = allFrontends.filter((f) => f !== "terminal");
-        const bridgeUrl = `http://127.0.0.1:${bridgePortFn()}`;
-        const servers: Record<
-          string,
-          { command: string; args: string[]; env: Record<string, string> }
-        > = {};
-        const tsxImport = resolve(
-          import.meta.dirname ?? ".",
-          "../../../node_modules/tsx/dist/esm/index.mjs",
-        );
-        const mcpServerPath = resolve(
-          import.meta.dirname ?? ".",
-          "../../core/tools/mcp-server.ts",
-        );
-        for (const frontend of frontends) {
-          const serverName = `${frontend}-tools`;
-          const mcpEnv = {
-            TALON_BRIDGE_URL: bridgeUrl,
-            TALON_CHAT_ID: chatId,
-            TALON_FRONTEND: frontend,
-          };
-          servers[serverName] = {
-            command: process.platform === "win32" ? "npx" : "node",
-            args:
-              process.platform === "win32"
-                ? ["tsx", mcpServerPath]
-                : ["--import", tsxImport, mcpServerPath],
-            env: mcpEnv,
-          };
-        }
-        return servers;
-      })(),
-      ...(config.braveApiKey
-        ? {
-            "brave-search": {
-              command: resolve(
-                import.meta.dirname ?? ".",
-                "../../../node_modules/.bin/brave-search-mcp-server",
-              ),
-              args: [],
-              env: { BRAVE_API_KEY: config.braveApiKey },
-            },
-          }
-        : {}),
-      ...getPluginMcpServers(`http://127.0.0.1:${bridgePortFn()}`, chatId),
-    },
-    ...(session.sessionId ? { resume: session.sessionId } : {}),
-  };
-  return { options, activeModel, session };
-}
-// ── Session warm-up ─────────────────────────────────────────────────────────
 /**
- * Cold-start a session by spawning an SDK subprocess in streaming input mode,
- * calling getContextUsage() to populate contextWindow and baseline contextTokens,
- * then tearing it down. Fire-and-forget — does not block the caller.
+ * Claude SDK backend — barrel re-export.
+ *
+ * All consumers import from this file; the implementation is split across
+ * focused modules for readability and maintainability.
  */
-export async function warmSession(chatId: string): Promise<void> {
-  if (!config) return;
-  const abort = new AbortController();
-  try {
-    rebuildSystemPrompt(config, getPluginPromptAdditions());
-    const { options } = buildSdkOptions(chatId);
-    // Streaming input mode: pass an async iterable that never yields a user message
-    const neverYield = async function* (): AsyncGenerator<never> {
-      await new Promise<never>((_, reject) => {
-        abort.signal.addEventListener("abort", () =>
-          reject(new Error("aborted")),
-        );
-      });
-    };
-    const q = query({
-      prompt: neverYield(),
-      options: {
-        ...options,
-        abortController: abort,
-      } as Parameters<typeof query>[0]["options"],
-    });
-    // Drain the stream in the background so the SDK's internal message loop
-    // doesn't stall — control responses are processed in readMessages() which
-    // needs the inputStream consumer to not back-pressure.
-    const drainPromise = (async () => {
-      try {
-        for await (const _ of q) {
-          // discard SDK messages; we only care about the control response
-        }
-      } catch {
-        // expected: abort causes the stream to end with an error
-      }
-    })();
-    // Race getContextUsage against a timeout so /reset doesn't hang
-    const timeout = new Promise<never>((_, reject) =>
-      setTimeout(() => reject(new Error("warm-up timed out")), 15_000),
-    );
-    const ctx = await Promise.race([q.getContextUsage(), timeout]);
-    const session = getSession(chatId);
-    if (ctx.maxTokens > 0) session.usage.contextWindow = ctx.maxTokens;
-    if (ctx.totalTokens > 0) session.usage.contextTokens = ctx.totalTokens;
-    log(
-      "agent",
-      `[${chatId}] warm-up: context ${ctx.totalTokens}/${ctx.maxTokens} (${ctx.percentage.toFixed(1)}%) model=${ctx.model}`,
-    );
-    abort.abort();
-    await drainPromise;
-  } catch (err) {
-    abort.abort();
-    // Non-fatal — /status will just show 0 until first real message
-    logWarn(
-      "agent",
-      `[${chatId}] warm-up failed: ${err instanceof Error ? err.message : err}`,
-    );
-  }
-}
-// ── Main handler ─────────────────────────────────────────────────────────────
-export async function handleMessage(
-  params: QueryParams,
-  _retried = false,
-): Promise<QueryResult> {
-  if (!config)
-    throw new Error("Agent not initialized. Call initAgent() first.");
-  const {
-    chatId,
-    text,
-    senderName,
-    isGroup,
-    onTextBlock,
-    onStreamDelta,
-    onToolUse,
-  } = params;
-  const session = getSession(chatId);
-  const t0 = Date.now();
-  // Rebuild system prompt on first turn of a new/reset session so identity,
-  // memory, and workspace listing are fresh
-  if (session.turns === 0) {
-    rebuildSystemPrompt(config, getPluginPromptAdditions());
-  }
-  const { options, activeModel } = buildSdkOptions(chatId);
-  const msgIdHint = params.messageId ? ` [msg_id:${params.messageId}]` : "";
-  const nowTag = `[${formatFullDatetime()}]`;
-  const prompt = isGroup
-    ? `${nowTag} [${senderName}]${msgIdHint}: ${text}`
-    : `${nowTag}${msgIdHint} ${text}`;
-  log("agent", `[${chatId}] <- (${text.length} chars)`);
-  traceMessage(chatId, "in", text, { senderName, isGroup });
-  // SDK types are not fully exported; cast options at the boundary
-  const qi = query({
-    prompt,
-    options: options as Parameters<typeof query>[0]["options"],
-  });
-  let currentBlockText = "";
-  let allResponseText = "";
-  let newSessionId: string | undefined;
-  let toolCalls = 0;
-  // Populated from SDK result message
-  let contextTokens = 0; // actual context fill from last iteration
-  let contextWindow: number | undefined;
-  let numApiCalls = 0;
-  // Cumulative token counts from SDK modelUsage (aggregated across models)
-  let sdkInputTokens = 0;
-  let sdkOutputTokens = 0;
-  let sdkCacheRead = 0;
-  let sdkCacheWrite = 0;
-  // Streaming throttle
-  let lastStreamUpdate = 0;
-  const STREAM_INTERVAL = 1000;
-  try {
-    for await (const message of qi) {
-      const msg = message as Record<string, unknown>;
-      const type = msg.type as string;
-      // Session ID capture
-      if (
-        type === "system" &&
-        msg.subtype === "init" &&
-        typeof msg.session_id === "string"
-      ) {
-        newSessionId = msg.session_id;
-      }
-      // Stream text deltas and thinking deltas
-      if (type === "stream_event" && onStreamDelta) {
-        const event = msg.event as Record<string, unknown> | undefined;
-        if (event?.type === "content_block_delta") {
-          const delta = event.delta as Record<string, unknown> | undefined;
-          if (
-            delta?.type === "thinking_delta" &&
-            typeof delta.thinking === "string"
-          ) {
-            // Thinking phase: notify but don't accumulate text
-            const now = Date.now();
-            if (now - lastStreamUpdate >= STREAM_INTERVAL) {
-              lastStreamUpdate = now;
-              onStreamDelta(currentBlockText, "thinking");
-            }
-          } else if (
-            delta?.type === "text_delta" &&
-            typeof delta.text === "string"
-          ) {
-            currentBlockText += delta.text;
-            const now = Date.now();
-            if (now - lastStreamUpdate >= STREAM_INTERVAL) {
-              lastStreamUpdate = now;
-              onStreamDelta(currentBlockText, "text");
-            }
-          }
-        }
-      }
-      // Complete assistant message — may contain multiple text blocks
-      // and tool_use blocks. Each text block before a tool_use is a
-      // "progress message" that should be sent immediately.
-      if (type === "assistant") {
-        const content = (msg.message as { content?: unknown[] })?.content;
-        if (Array.isArray(content)) {
-          let blockText = "";
-          for (const block of content) {
-            const b = block as { type: string; text?: string; name?: string };
-            if (b.type === "text" && b.text) {
-              blockText += b.text;
-            }
-            if (b.type === "tool_use") {
-              toolCalls++;
-              const tb = block as {
-                type: string;
-                name?: string;
-                input?: Record<string, unknown>;
-              };
-              if (onToolUse && tb.name) {
-                try {
-                  onToolUse(tb.name, tb.input ?? {});
-                } catch {
-                  /* non-fatal */
-                }
-              }
-              // If there's text before this tool call, send it as a progress message
-              if (blockText.trim() && onTextBlock) {
-                try {
-                  await onTextBlock(blockText.trim());
-                } catch {
-                  /* non-fatal — don't abort the stream loop */
-                }
-                allResponseText += blockText;
-                blockText = "";
-                currentBlockText = "";
-              }
-            }
-          }
-          // Remaining text after all tool calls (or if no tool calls)
-          if (blockText.trim()) {
-            currentBlockText = blockText;
-          }
-        }
-      }
-      // Final result — read all data from SDK result fields
-      if (type === "result") {
-        numApiCalls =
-          ((msg as Record<string, unknown>).num_turns as number) ?? 0;
-        // Context fill from last API iteration (only available in raw usage)
-        const usage = msg.usage as
-          | {
-              iterations?: Array<{
-                input_tokens: number;
-                cache_read_input_tokens: number;
-                cache_creation_input_tokens: number;
-              }>;
-            }
-          | undefined;
-        if (
-          usage &&
-          Array.isArray(usage.iterations) &&
-          usage.iterations.length > 0
-        ) {
-          const last = usage.iterations[usage.iterations.length - 1];
-          contextTokens =
-            (last.input_tokens ?? 0) +
-            (last.cache_read_input_tokens ?? 0) +
-            (last.cache_creation_input_tokens ?? 0);
-        }
-        // Token counts, context window from SDK modelUsage (aggregated per model)
-        type MU = {
-          inputTokens?: number;
-          outputTokens?: number;
-          cacheReadInputTokens?: number;
-          cacheCreationInputTokens?: number;
-          contextWindow?: number;
-        };
-        const modelUsage = (msg as Record<string, unknown>).modelUsage as
-          | Record<string, MU>
-          | undefined;
-        if (modelUsage) {
-          for (const mu of Object.values(modelUsage)) {
-            sdkInputTokens += mu.inputTokens ?? 0;
-            sdkOutputTokens += mu.outputTokens ?? 0;
-            sdkCacheRead += mu.cacheReadInputTokens ?? 0;
-            sdkCacheWrite += mu.cacheCreationInputTokens ?? 0;
-            if (
-              mu.contextWindow &&
-              mu.contextWindow > 0 &&
-              contextWindow === undefined
-            ) {
-              contextWindow = mu.contextWindow;
-            }
-          }
-        }
-        log(
-          "agent",
-          `SDK result: modelUsage=${JSON.stringify(modelUsage)}, contextWindow=${contextWindow}, contextTokens=${contextTokens}, numApiCalls=${numApiCalls}`,
-        );
-        // If we still have unsent text and no streaming captured it
-        if (
-          !allResponseText &&
-          !currentBlockText &&
-          typeof msg.result === "string"
-        ) {
-          currentBlockText = msg.result;
-        }
-      }
-    }
-  } catch (err) {
-    const classified = classify(err);
-    if (classified.reason === "session_expired" && !_retried) {
-      logWarn(
-        "agent",
-        `[${chatId}] Stale session, retrying with fresh session`,
-      );
-      resetSession(chatId);
-      return handleMessage(params, true);
-    }
-    // Context length exceeded — reset session and retry (SDK auto-compaction should prevent
-    // this, but handle it as a safety net for edge cases)
-    if (classified.reason === "context_length" && !_retried) {
-      logWarn(
-        "agent",
-        `[${chatId}] Context length exceeded, resetting session and retrying`,
-      );
-      resetSession(chatId);
-      return handleMessage(params, true);
-    }
-    // Model fallback: if overloaded/timeout, retry with a faster model
-    if (!_retried && classified.retryable) {
-      const fallbackModel = activeModel.includes("opus")
-        ? "claude-sonnet-4-6"
-        : activeModel.includes("sonnet")
-          ? "claude-haiku-4-5"
-          : null;
-      if (fallbackModel) {
-        logWarn(
-          "agent",
-          `[${chatId}] ${classified.reason}, falling back to ${fallbackModel.replace("claude-", "")}`,
-        );
-        resetSession(chatId);
-        const originalModel = getChatSettings(chatId).model;
-        setChatModel(chatId, fallbackModel);
-        try {
-          return await handleMessage(params, true);
-        } finally {
-          setChatModel(chatId, originalModel);
-        }
-      }
-    }
-    logError("agent", `[${chatId}] SDK error: ${classified.message}`);
-    throw classified;
-  }
-  // Persist session and usage
-  const durationMs = Date.now() - t0;
-  if (newSessionId) setSessionId(chatId, newSessionId);
-  incrementTurns(chatId);
-  recordUsage(chatId, {
-    inputTokens: sdkInputTokens,
-    outputTokens: sdkOutputTokens,
-    cacheRead: sdkCacheRead,
-    cacheWrite: sdkCacheWrite,
-    durationMs,
-    model: activeModel,
-    contextTokens,
-    contextWindow,
-    numApiCalls,
-  });
-  // Set a descriptive session name from the first message
-  if (session.turns === 0 && text) {
-    // Strip metadata prefixes like [DM from ...] or [Name]:
-    const cleanText = text
-      .replace(/^\[.*?\]\s*/g, "")
-      .replace(/\[msg_id:\d+\]\s*/g, "")
-      .trim();
-    if (cleanText) {
-      const name =
-        cleanText.length > 30 ? cleanText.slice(0, 30) + "..." : cleanText;
-      setSessionName(chatId, name);
-    }
-  }
-  // The remaining currentBlockText is the final response text
-  allResponseText += currentBlockText;
-  const totalPrompt = sdkInputTokens + sdkCacheRead + sdkCacheWrite;
-  const cacheHitPct =
-    totalPrompt > 0 ? Math.round((sdkCacheRead / totalPrompt) * 100) : 0;
-  log(
-    "agent",
-    `[${chatId}] -> (${durationMs}ms, in=${sdkInputTokens} out=${sdkOutputTokens} cache=${cacheHitPct}%` +
-      `${toolCalls > 0 ? ` tools=${toolCalls}` : ""})`,
-  );
-  traceMessage(chatId, "out", allResponseText, {
-    durationMs,
-    inputTokens: sdkInputTokens,
-    outputTokens: sdkOutputTokens,
-    cacheRead: sdkCacheRead,
-    cacheWrite: sdkCacheWrite,
-    toolCalls,
-    model: activeModel,
-  });
-  return {
-    text: allResponseText.trim(),
-    durationMs,
-    inputTokens: sdkInputTokens,
-    outputTokens: sdkOutputTokens,
-    cacheRead: sdkCacheRead,
-    cacheWrite: sdkCacheWrite,
-  };
-}
+export { initAgent, updateSystemPrompt } from "./state.js";
+export { warmSession } from "./warm.js";
+export { handleMessage } from "./handler.js";