npm - talon-agent - Versions diffs - 1.5.0 → 1.6.0 - Mend

talon-agent 1.5.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/package.json +1 -1
package/src/__tests__/chat-settings.test.ts +20 -7
package/src/__tests__/fuzz.test.ts +3 -0
package/src/__tests__/reload-plugins.test.ts +11 -5
package/src/backend/claude-sdk/constants.ts +63 -0
package/src/backend/claude-sdk/handler.ts +236 -0
package/src/backend/claude-sdk/index.ts +7 -556
package/src/backend/claude-sdk/models.ts +216 -0
package/src/backend/claude-sdk/options.ts +129 -0
package/src/backend/claude-sdk/state.ts +59 -0
package/src/backend/claude-sdk/stream.ts +221 -0
package/src/backend/claude-sdk/warm.ts +89 -0
package/src/bootstrap.ts +19 -5
package/src/cli.ts +30 -15
package/src/core/dream.ts +5 -17
package/src/core/gateway-actions.ts +3 -12
package/src/core/gateway.ts +5 -2
package/src/core/heartbeat.ts +4 -17
package/src/core/models.ts +149 -0
package/src/core/types.ts +4 -0
package/src/frontend/teams/index.ts +1 -3
package/src/frontend/telegram/callbacks.ts +15 -27
package/src/frontend/telegram/commands.ts +23 -28
package/src/frontend/telegram/helpers.ts +13 -15
package/src/frontend/telegram/index.ts +1 -1
package/src/frontend/terminal/commands.ts +7 -4
package/src/index.ts +2 -1
package/src/storage/chat-settings.ts +5 -19

package/src/backend/claude-sdk/models.ts ADDED Viewed

@@ -0,0 +1,216 @@
+/**
+ * Claude model discovery — queries the SDK for available models.
+ *
+ * Spawns a throwaway SDK subprocess, calls supportedModels(), and
+ * registers the results in the global model registry. This is the
+ * only source of truth for available Claude models — if the SDK
+ * fails to provide models, initialization is aborted.
+ */
+import { query } from "@anthropic-ai/claude-agent-sdk";
+import { registerModels, clearModelsByProvider } from "../../core/models.js";
+import type { ModelInfo } from "../../core/models.js";
+import { log, logError } from "../../util/log.js";
+// ── Tier / fallback inference ───────────────────────────────────────────────
+/** Infer tier from model ID. */
+function inferTier(modelId: string): ModelInfo["tier"] {
+  if (modelId.includes("opus")) return "premium";
+  if (modelId.includes("haiku")) return "economy";
+  return "balanced";
+}
+/** Build short aliases from a model ID like "claude-sonnet-4-6". */
+function buildAliases(modelId: string): string[] {
+  const aliases: string[] = [];
+  const match = modelId.match(/claude-(\w+)-(.+)/);
+  if (match) {
+    const family = match[1];
+    const version = match[2];
+    aliases.push(family);
+    aliases.push(`${family}-${version}`);
+    aliases.push(`${family}-${version.replace(/-/g, ".")}`);
+  }
+  return aliases;
+}
+// ── SDK → registry conversion ───────────────────────────────────────────────
+/**
+ * Convert SDK ModelInfo to our registry format.
+ * Sorts by tier and builds fallback chains automatically.
+ */
+function convertSdkModels(
+  sdkModels: Array<{
+    value: string;
+    displayName: string;
+    description: string;
+  }>,
+): ModelInfo[] {
+  // Filter out SDK artifacts:
+  // - [1m] variants: we add this suffix ourselves in options.ts
+  // - "default" pseudo-model: not a real model, just an alias for the default
+  // - any model that doesn't start with "claude-": not an Anthropic model
+  const filtered = sdkModels.filter(
+    (m) => m.value.startsWith("claude-") && !m.value.includes("["),
+  );
+  const models: ModelInfo[] = filtered.map((m) => ({
+    id: m.value,
+    displayName: m.displayName,
+    description: m.description,
+    aliases: buildAliases(m.value),
+    provider: "anthropic",
+    capabilities: {
+      supports1mContext: !m.value.includes("haiku"),
+    },
+    tier: inferTier(m.value),
+  }));
+  const tierOrder = { premium: 0, balanced: 1, economy: 2 };
+  models.sort((a, b) => tierOrder[a.tier] - tierOrder[b.tier]);
+  // Fallback chain: each model falls back to the first model in the next lower tier
+  for (const model of models) {
+    if (model.fallback) continue;
+    const nextTier = models.find(
+      (m) => tierOrder[m.tier] > tierOrder[model.tier],
+    );
+    if (nextTier) model.fallback = nextTier.id;
+  }
+  return models;
+}
+// ── Public API ──────────────────────────────────────────────────────────────
+/**
+ * Discover available models from the Claude Agent SDK and register them.
+ *
+ * Spawns a throwaway SDK subprocess, calls supportedModels(), converts the
+ * results to our registry format, and registers them. Throws on failure —
+ * if the SDK can't provide models, Talon cannot function.
+ */
+export async function registerClaudeModels(sdkOptions: {
+  model: string;
+  cwd?: string;
+  permissionMode?: string;
+  allowDangerouslySkipPermissions?: boolean;
+  pathToClaudeCodeExecutable?: string;
+}): Promise<void> {
+  const abort = new AbortController();
+  let drainPromise: Promise<void> | undefined;
+  try {
+    const neverYield = async function* (): AsyncGenerator<never> {
+      await new Promise<never>((_, reject) => {
+        abort.signal.addEventListener("abort", () =>
+          reject(new Error("aborted")),
+        );
+      });
+    };
+    const q = query({
+      prompt: neverYield(),
+      options: {
+        ...sdkOptions,
+        abortController: abort,
+      } as Parameters<typeof query>[0]["options"],
+    });
+    drainPromise = (async () => {
+      try {
+        for await (const _ of q) {
+          /* discard */
+        }
+      } catch {
+        /* expected on abort */
+      }
+    })();
+    let timeoutId: ReturnType<typeof setTimeout> | undefined;
+    const timeout = new Promise<never>((_, reject) => {
+      timeoutId = setTimeout(
+        () => reject(new Error("model discovery timed out after 15s")),
+        15_000,
+      );
+    });
+    let sdkModels: Array<{
+      value: string;
+      displayName: string;
+      description: string;
+    }>;
+    try {
+      sdkModels = await Promise.race([q.supportedModels(), timeout]);
+    } finally {
+      if (timeoutId !== undefined) clearTimeout(timeoutId);
+    }
+    if (sdkModels.length === 0) {
+      throw new Error("SDK returned empty model list");
+    }
+    const models = convertSdkModels(sdkModels);
+    clearModelsByProvider("anthropic");
+    registerModels(models);
+    log(
+      "agent",
+      `Discovered ${models.length} models from SDK: ${models.map((m) => m.id).join(", ")}`,
+    );
+    abort.abort();
+    await drainPromise;
+  } catch (err) {
+    abort.abort();
+    if (drainPromise) await drainPromise.catch(() => {});
+    const msg = err instanceof Error ? err.message : String(err);
+    logError("agent", `Fatal: model discovery failed — ${msg}`);
+    throw new Error(
+      `Claude SDK model discovery failed: ${msg}. ` +
+        `Check that Claude Code is installed and your API key is valid.`,
+    );
+  }
+}
+/**
+ * Register models from a static list. For use in tests and the CLI setup
+ * wizard where the SDK subprocess is not available.
+ */
+export function registerClaudeModelsStatic(models: ModelInfo[]): void {
+  registerModels(models);
+}
+/** Default model definitions for CLI setup wizard and tests. */
+export const CLAUDE_MODELS_STATIC: ModelInfo[] = [
+  {
+    id: "claude-opus-4-6",
+    displayName: "Opus 4.6",
+    description: "smartest",
+    aliases: ["opus", "opus-4.6", "opus-4-6"],
+    provider: "anthropic",
+    capabilities: { supports1mContext: true },
+    tier: "premium",
+    fallback: "claude-sonnet-4-6",
+  },
+  {
+    id: "claude-sonnet-4-6",
+    displayName: "Sonnet 4.6",
+    description: "fast, balanced",
+    aliases: ["sonnet", "sonnet-4.6", "sonnet-4-6"],
+    provider: "anthropic",
+    capabilities: { supports1mContext: true },
+    tier: "balanced",
+    fallback: "claude-haiku-4-5",
+  },
+  {
+    id: "claude-haiku-4-5",
+    displayName: "Haiku 4.5",
+    description: "fastest, cheapest",
+    aliases: ["haiku", "haiku-4.5", "haiku-4-5"],
+    provider: "anthropic",
+    capabilities: { supports1mContext: false },
+    tier: "economy",
+  },
+];

package/src/backend/claude-sdk/options.ts ADDED Viewed

@@ -0,0 +1,129 @@
+/**
+ * SDK options builder — constructs the configuration object for query() calls.
+ *
+ * Translates per-chat settings (model, effort) and global config (plugins,
+ * MCP servers, system prompt) into the Options shape expected by the SDK.
+ */
+import { resolve } from "node:path";
+import type { Options } from "@anthropic-ai/claude-agent-sdk";
+import { getSession } from "../../storage/sessions.js";
+import { getChatSettings } from "../../storage/chat-settings.js";
+import { getPluginMcpServers } from "../../core/plugin.js";
+import { supports1mContext } from "../../core/models.js";
+import { getConfig, getBridgePort } from "./state.js";
+import { DISALLOWED_TOOLS_CHAT, EFFORT_MAP } from "./constants.js";
+// ── Types ────────────────────────────────────────────────────────────────────
+export type BuildSdkOptionsResult = {
+  options: Options;
+  activeModel: string;
+  session: ReturnType<typeof getSession>;
+};
+// ── MCP server construction ─────────────────────────────────────────────────
+/**
+ * Build the MCP servers map for a chat query.
+ * Includes frontend-specific tool servers and Brave Search, if configured.
+ */
+function buildMcpServers(
+  chatId: string,
+): Record<
+  string,
+  { command: string; args: string[]; env: Record<string, string> }
+> {
+  const config = getConfig();
+  const bridgeUrl = `http://127.0.0.1:${getBridgePort()}`;
+  const tsxImport = resolve(
+    import.meta.dirname ?? ".",
+    "../../../node_modules/tsx/dist/esm/index.mjs",
+  );
+  const mcpServerPath = resolve(
+    import.meta.dirname ?? ".",
+    "../../core/tools/mcp-server.ts",
+  );
+  // Frontend-specific MCP tool servers (one per non-terminal frontend)
+  const allFrontends = Array.isArray(config.frontend)
+    ? config.frontend
+    : [config.frontend];
+  const frontends = allFrontends.filter((f) => f !== "terminal");
+  const servers: Record<
+    string,
+    { command: string; args: string[]; env: Record<string, string> }
+  > = {};
+  for (const frontend of frontends) {
+    const serverName = `${frontend}-tools`;
+    const mcpEnv = {
+      TALON_BRIDGE_URL: bridgeUrl,
+      TALON_CHAT_ID: chatId,
+      TALON_FRONTEND: frontend,
+    };
+    servers[serverName] = {
+      command: process.platform === "win32" ? "npx" : "node",
+      args:
+        process.platform === "win32"
+          ? ["tsx", mcpServerPath]
+          : ["--import", tsxImport, mcpServerPath],
+      env: mcpEnv,
+    };
+  }
+  // Brave Search MCP server (if configured)
+  if (config.braveApiKey) {
+    servers["brave-search"] = {
+      command: resolve(
+        import.meta.dirname ?? ".",
+        "../../../node_modules/.bin/brave-search-mcp-server",
+      ),
+      args: [],
+      env: { BRAVE_API_KEY: config.braveApiKey },
+    };
+  }
+  return servers;
+}
+// ── Options builder ─────────────────────────────────────────────────────────
+export function buildSdkOptions(chatId: string): BuildSdkOptionsResult {
+  const config = getConfig();
+  const chatSettings = getChatSettings(chatId);
+  const activeModel = chatSettings.model ?? config.model;
+  const activeEffort = chatSettings.effort ?? "adaptive";
+  const thinkingConfig = EFFORT_MAP[activeEffort] ?? {
+    thinking: { type: "adaptive" as const },
+  };
+  const canUse1m =
+    supports1mContext(activeModel) && !activeModel.includes("[1m]");
+  const sdkModel = canUse1m ? `${activeModel}[1m]` : activeModel;
+  const session = getSession(chatId);
+  const options: Options = {
+    model: sdkModel,
+    systemPrompt: config.systemPrompt,
+    cwd: config.workspace,
+    permissionMode: "bypassPermissions",
+    allowDangerouslySkipPermissions: true,
+    ...(config.claudeBinary
+      ? { pathToClaudeCodeExecutable: config.claudeBinary }
+      : {}),
+    disallowedTools: [...DISALLOWED_TOOLS_CHAT],
+    ...thinkingConfig,
+    mcpServers: {
+      ...buildMcpServers(chatId),
+      ...getPluginMcpServers(`http://127.0.0.1:${getBridgePort()}`, chatId),
+    },
+    ...(session.sessionId ? { resume: session.sessionId } : {}),
+  };
+  return { options, activeModel, session };
+}

package/src/backend/claude-sdk/state.ts ADDED Viewed

@@ -0,0 +1,59 @@
+/**
+ * Module-level state for the Claude SDK backend.
+ *
+ * Owns the mutable config and bridge port references and exposes
+ * initialization functions + internal getters for sibling modules.
+ */
+import type { TalonConfig } from "../../util/config.js";
+import { registerClaudeModels } from "./models.js";
+// ── State ────────────────────────────────────────────────────────────────────
+let config: TalonConfig | undefined;
+let bridgePortFn: () => number = () => 19876;
+// ── Public API (re-exported from barrel) ────────────────────────────────────
+export async function initAgent(
+  cfg: TalonConfig,
+  getBridgePort?: () => number,
+): Promise<void> {
+  config = cfg;
+  if (getBridgePort) bridgePortFn = getBridgePort;
+  // The Agent SDK spawns an embedded Claude Code subprocess.
+  // If CLAUDECODE is set (e.g. running from a Claude Code terminal),
+  // the subprocess refuses to start with a nested-session error that
+  // gets swallowed — causing an infinite hang on Windows.
+  delete process.env.CLAUDECODE;
+  // Discover available models from the SDK — fatal if this fails
+  await registerClaudeModels({
+    model: cfg.model,
+    cwd: cfg.workspace,
+    permissionMode: "bypassPermissions",
+    allowDangerouslySkipPermissions: true,
+    ...(cfg.claudeBinary
+      ? { pathToClaudeCodeExecutable: cfg.claudeBinary }
+      : {}),
+  });
+}
+/** Update the system prompt on the live config. Used by plugin hot-reload
+ *  so the next message picks up new plugin tool descriptions. */
+export function updateSystemPrompt(prompt: string): void {
+  if (config) config.systemPrompt = prompt;
+}
+// ── Internal getters (used by sibling modules, NOT re-exported) ─────────────
+export function getConfig(): TalonConfig {
+  if (!config)
+    throw new Error("Agent not initialized. Call initAgent() first.");
+  return config;
+}
+export function getBridgePort(): number {
+  return bridgePortFn();
+}

package/src/backend/claude-sdk/stream.ts ADDED Viewed

@@ -0,0 +1,221 @@
+/**
+ * Typed stream processing helpers for SDK messages.
+ *
+ * Each function operates on a properly narrowed SDK message type —
+ * no Record<string, unknown> casts. The StreamState accumulator
+ * replaces the scattered local variables from the original handler.
+ */
+import type {
+  SDKMessage,
+  SDKSystemMessage,
+  SDKPartialAssistantMessage,
+  SDKAssistantMessage,
+  SDKResultMessage,
+  ModelUsage,
+} from "@anthropic-ai/claude-agent-sdk";
+import type { BetaRawContentBlockDeltaEvent } from "@anthropic-ai/sdk/resources/beta/messages/messages.mjs";
+import { STREAM_INTERVAL } from "./constants.js";
+import { log } from "../../util/log.js";
+// ── Stream state accumulator ────────────────────────────────────────────────
+/** Mutable state accumulated while iterating the SDK message stream. */
+export type StreamState = {
+  currentBlockText: string;
+  allResponseText: string;
+  newSessionId: string | undefined;
+  toolCalls: number;
+  contextTokens: number;
+  contextWindow: number | undefined;
+  numApiCalls: number;
+  sdkInputTokens: number;
+  sdkOutputTokens: number;
+  sdkCacheRead: number;
+  sdkCacheWrite: number;
+  lastStreamUpdate: number;
+};
+export function createStreamState(): StreamState {
+  return {
+    currentBlockText: "",
+    allResponseText: "",
+    newSessionId: undefined,
+    toolCalls: 0,
+    contextTokens: 0,
+    contextWindow: undefined,
+    numApiCalls: 0,
+    sdkInputTokens: 0,
+    sdkOutputTokens: 0,
+    sdkCacheRead: 0,
+    sdkCacheWrite: 0,
+    lastStreamUpdate: 0,
+  };
+}
+// ── Type guards ─────────────────────────────────────────────────────────────
+export function isSystemInit(msg: SDKMessage): msg is SDKSystemMessage {
+  return msg.type === "system" && msg.subtype === "init";
+}
+export function isStreamEvent(
+  msg: SDKMessage,
+): msg is SDKPartialAssistantMessage {
+  return msg.type === "stream_event";
+}
+export function isAssistant(msg: SDKMessage): msg is SDKAssistantMessage {
+  return msg.type === "assistant";
+}
+export function isResult(msg: SDKMessage): msg is SDKResultMessage {
+  return msg.type === "result";
+}
+// ── Message processors ──────────────────────────────────────────────────────
+/**
+ * Process a streaming delta event — accumulates text and fires throttled
+ * callbacks for thinking and text phases.
+ */
+export function processStreamDelta(
+  msg: SDKPartialAssistantMessage,
+  state: StreamState,
+  onStreamDelta?: (accumulated: string, phase?: "thinking" | "text") => void,
+): void {
+  if (!onStreamDelta) return;
+  const event = msg.event;
+  if (event.type !== "content_block_delta") return;
+  const deltaEvent = event as BetaRawContentBlockDeltaEvent;
+  const delta = deltaEvent.delta;
+  if (delta.type === "thinking_delta") {
+    const now = Date.now();
+    if (now - state.lastStreamUpdate >= STREAM_INTERVAL) {
+      state.lastStreamUpdate = now;
+      onStreamDelta(state.currentBlockText, "thinking");
+    }
+  } else if (delta.type === "text_delta") {
+    state.currentBlockText += delta.text;
+    const now = Date.now();
+    if (now - state.lastStreamUpdate >= STREAM_INTERVAL) {
+      state.lastStreamUpdate = now;
+      onStreamDelta(state.currentBlockText, "text");
+    }
+  }
+}
+/** A tool call extracted from an assistant message. */
+export type ToolCall = {
+  name: string;
+  input: Record<string, unknown>;
+};
+/** Result of processing an assistant message. */
+export type AssistantResult = {
+  /** Text segments accumulated before tool calls, each to be sent as a progress message. */
+  progressTexts: string[];
+  /** Tool calls found in the message. */
+  tools: ToolCall[];
+  /** Trailing text after all tool calls (or the full text if no tool calls). */
+  trailingText: string;
+};
+/**
+ * Process a complete assistant message — extracts text blocks and tool calls.
+ * Uses the typed BetaContentBlock discriminated union.
+ *
+ * When multiple tool_use blocks appear in the same message with text before
+ * each, every text segment is captured in progressTexts so the handler can
+ * emit them all in order.
+ */
+export function processAssistantMessage(
+  msg: SDKAssistantMessage,
+  state: StreamState,
+): AssistantResult {
+  const content = msg.message.content;
+  const tools: ToolCall[] = [];
+  const progressTexts: string[] = [];
+  let blockText = "";
+  for (const block of content) {
+    if (block.type === "text") {
+      blockText += block.text;
+    }
+    if (block.type === "tool_use") {
+      state.toolCalls++;
+      const input =
+        typeof block.input === "object" && block.input !== null
+          ? (block.input as Record<string, unknown>)
+          : {};
+      tools.push({ name: block.name, input });
+      // Text before this tool call is a progress message
+      if (blockText.trim()) {
+        progressTexts.push(blockText.trim());
+        state.allResponseText += blockText;
+        blockText = "";
+        state.currentBlockText = "";
+      }
+    }
+  }
+  // Remaining text after all tool calls (or if no tool calls)
+  const trailingText = blockText.trim() ? blockText : "";
+  if (trailingText) {
+    state.currentBlockText = blockText;
+  }
+  return { progressTexts, tools, trailingText };
+}
+/**
+ * Process the final result message — extracts token counts, context info,
+ * and API call counts from the typed SDK result.
+ */
+export function processResultMessage(
+  msg: SDKResultMessage,
+  state: StreamState,
+): void {
+  state.numApiCalls = msg.num_turns ?? 0;
+  // Context fill from last API iteration
+  const usage = msg.usage;
+  if (usage && Array.isArray(usage.iterations) && usage.iterations.length > 0) {
+    const last = usage.iterations[usage.iterations.length - 1];
+    state.contextTokens =
+      (last.input_tokens ?? 0) +
+      (last.cache_read_input_tokens ?? 0) +
+      (last.cache_creation_input_tokens ?? 0);
+  }
+  // Token counts and context window from SDK modelUsage (aggregated per model)
+  const modelUsage: Record<string, ModelUsage> = msg.modelUsage;
+  for (const mu of Object.values(modelUsage)) {
+    state.sdkInputTokens += mu.inputTokens ?? 0;
+    state.sdkOutputTokens += mu.outputTokens ?? 0;
+    state.sdkCacheRead += mu.cacheReadInputTokens ?? 0;
+    state.sdkCacheWrite += mu.cacheCreationInputTokens ?? 0;
+    if (mu.contextWindow > 0 && state.contextWindow === undefined) {
+      state.contextWindow = mu.contextWindow;
+    }
+  }
+  log(
+    "agent",
+    `SDK result: modelUsage=${JSON.stringify(modelUsage)}, contextWindow=${state.contextWindow}, contextTokens=${state.contextTokens}, numApiCalls=${state.numApiCalls}`,
+  );
+  // Fallback: if no text was captured via streaming or assistant messages,
+  // pull from the result string (available on success results).
+  if (
+    !state.allResponseText &&
+    !state.currentBlockText &&
+    "result" in msg &&
+    typeof msg.result === "string"
+  ) {
+    state.currentBlockText = msg.result;
+  }
+}