npm - talon-agent - Versions diffs - 1.9.2 → 1.10.1 - Mend

talon-agent 1.9.2 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/package.json +10 -5
package/prompts/telegram.md +24 -6
package/src/__tests__/claude-sdk-options.test.ts +95 -0
package/src/__tests__/end-turn.test.ts +307 -0
package/src/__tests__/handlers.test.ts +107 -43
package/src/__tests__/integration/sdk-stub.test.ts +208 -0
package/src/__tests__/integration/stub-claude/build-sea.mjs +114 -0
package/src/__tests__/integration/stub-claude/fake-claude.mjs +352 -0
package/src/__tests__/integration/stub-claude/helpers.ts +263 -0
package/src/__tests__/integration/stub-claude/protocol.ts +108 -0
package/src/__tests__/integration/stub-claude/sea-config.json +7 -0
package/src/__tests__/integration/talon-bootstrap.ts +206 -0
package/src/__tests__/integration/talon-functional.test.ts +190 -0
package/src/__tests__/package.functional.test.ts +178 -0
package/src/backend/claude-sdk/handler.ts +110 -1
package/src/backend/claude-sdk/options.ts +59 -1
package/src/backend/claude-sdk/stream.ts +67 -0
package/src/core/tools/index.ts +41 -0
package/src/core/tools/messaging.ts +79 -1
package/src/core/tools/types.ts +14 -0
package/src/frontend/teams/index.ts +20 -10
package/src/frontend/telegram/handlers.ts +16 -12

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "talon-agent",
-  "version": "1.9.2",
+  "version": "1.10.1",
   "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
   "author": "Dylan Neve",
   "license": "MIT",
@@ -41,13 +41,17 @@
     "setup": "tsx src/cli.ts setup",
     "dev": "tsx --watch src/index.ts",
     "test": "vitest run",
+    "test:ci": "vitest run --reporter=verbose --reporter=json --outputFile=test-results.json",
+    "test:functional": "vitest run --reporter=verbose --reporter=json --outputFile=functional-results.json src/__tests__/package.functional.test.ts src/__tests__/tool-functional.test.ts src/__tests__/mcp-launcher.test.ts src/__tests__/mcp-launcher-functional.test.ts src/__tests__/integration/sdk-stub.test.ts src/__tests__/integration/talon-functional.test.ts",
+    "build:stub-sea": "node src/__tests__/integration/stub-claude/build-sea.mjs",
     "test:watch": "vitest",
     "test:coverage": "vitest run --coverage",
     "typecheck": "tsc --noEmit",
     "lint": "oxlint src/",
     "knip": "knip",
     "format": "prettier --write src/ prompts/",
-    "format:check": "prettier --check src/ prompts/"
+    "format:check": "prettier --check src/ prompts/",
+    "ci:protect": "node .github/scripts/enforce-ci-gate.mjs"
   },
   "dependencies": {
     "@anthropic-ai/claude-agent-sdk": "^0.2.108",
@@ -58,7 +62,7 @@
     "@grammyjs/transformer-throttler": "^1.2.1",
     "@modelcontextprotocol/sdk": "^1.29.0",
     "@opencode-ai/sdk": "^1.4.0",
-    "@playwright/mcp": "^0.0.74",
+    "@playwright/mcp": "^0.0.75",
     "big-integer": "^1.6.52",
     "cheerio": "^1.2.0",
     "croner": "^10.0.1",
@@ -71,7 +75,7 @@
     "telegram": "^2.26.22",
     "tsx": "^4.21.0",
     "undici": "^8.0.2",
-    "write-file-atomic": "^7.0.1",
+    "write-file-atomic": "^8.0.0",
     "zod": "^4.3.6"
   },
   "devDependencies": {
@@ -86,6 +90,7 @@
     "vitest": "^4.1.3"
   },
   "overrides": {
-    "@anthropic-ai/sdk": "^0.95.0"
+    "@anthropic-ai/sdk": "^0.95.0",
+    "ip-address": "^10.1.1"
   }
 }

package/prompts/telegram.md CHANGED Viewed

@@ -2,15 +2,33 @@
 In groups, you'll see messages prefixed with [Name]: — use their name naturally.
-### CRITICAL: Message delivery
+### Response flow — IMPORTANT
-ALL messages to the user MUST be sent using the `send` tool. Your plain text output is **private** — the user never sees it, only you. Think of it as an internal scratchpad: jot a brief note to yourself if useful (a sentence or two — what you did, what you noticed, a reminder), but keep it short since nobody reads it. The only way to reach the user is the `send` tool.
+Your output stream (this prose right here) is **private scratchpad**. The user never sees it. The ONLY ways for content to reach the user are:
-### The `send` tool
+- **`end_turn(text=...)`** — the canonical way to deliver your final reply. Closes the turn. Optional `reply_to` for threaded replies, optional `buttons` for inline keyboards.
+- **`end_turn()`** with no args — explicit silent close. Use this when you've done what you needed to (e.g. reacted with an emoji, ran a tool that didn't need a reply) and want to make it clear that the silence is intentional.
+- **`send(...)`** — for mid-turn rich content (photos, polls, voice, stickers, scheduled messages, multi-message responses, multi-target). Does NOT close the turn — typically followed by `end_turn(...)` or `end_turn()`.
+- **`react(message_id, emoji)`** — emoji reaction on a message. Often the right response to acknowledge without replying. Pair with `end_turn()` to close cleanly.
-One tool for everything. Set `type` to choose what to send:
+**There is no fallback.** Prose written without an `end_turn` / `send` call is scratchpad — dropped. If you write a thoughtful response in your output stream and forget to wrap it in `end_turn(text=...)`, the user sees nothing. Get into the habit of ending every turn with one of the closing options above.
-- `send(type="text", text="Hello!")` — send a message
+Doing nothing — no tool call at all — is also a valid silent close (the model genuinely had nothing to do), but `end_turn()` makes the intent explicit and is preferred when the silence is deliberate.
+**Flow enforcement:** if you produce trailing prose without calling `end_turn` / `send`, the system will re-prompt you ONCE with a `[FLOW VIOLATION]` reminder in the same session. You'll see your broken turn in history and get a fresh turn to redo it correctly. Burns 2x the tokens for that exchange, so just call `end_turn` the first time.
+### When to use `send` vs `end_turn`
+- **`end_turn`** = the final reply that ends your turn. Plain text + optional reply_to + optional buttons. The closer.
+- **`send`** = anything richer or anything mid-turn: photos, polls, voice, scheduled messages, stickers, locations, dice, contacts, multi-message responses, replies to other chats.
+For a plain text final reply, prefer `end_turn(text=...)` over `send(type="text", text=...)`. They reach the same delivery path, but the name makes the intent unambiguous.
+### The `send` tool (rich content)
+One tool, set `type` to choose what to send:
+- `send(type="text", text="Hello!")` — plain text (use end_turn instead for final reply)
 - `send(type="text", text="Hey", reply_to=12345)` — reply to a specific message
 - `send(type="text", text="Pick", buttons=[[{"text":"A","callback_data":"a"}]])` — with buttons
 - `send(type="text", text="Reminder", delay_seconds=60)` — schedule for later
@@ -54,7 +72,7 @@ The user's message ID is in the prompt as [msg_id:N]. Use with `reply_to` and `r
 You don't HAVE to respond to every message. If a message doesn't need a response:
 - React with an emoji using the `react` tool — this is the PREFERRED way to acknowledge without replying.
-- Or simply don't call `send` and skip it entirely.
+- Or call `end_turn()` with no args to end the turn silently.
 - In groups, prefer reactions over replies for simple acknowledgements.
 ### Reactions

package/src/__tests__/claude-sdk-options.test.ts CHANGED Viewed

@@ -107,4 +107,99 @@ describe("buildSdkOptions", () => {
     expect(activeModel).toBe("claude-sonnet-4-6[1m]");
     expect(options.model).toBe("sonnet[1m]");
   });
+  describe("PostToolBatch turn-terminator hook", () => {
+    type HookCallback = (
+      input: unknown,
+      toolUseID?: string,
+      ctx?: { signal: AbortSignal },
+    ) => Promise<{ continue?: boolean; stopReason?: string }>;
+    const callHook = async (toolNames: string[]): Promise<unknown> => {
+      const { buildSdkOptions } =
+        await import("../backend/claude-sdk/options.js");
+      const { options } = buildSdkOptions("chat-hook-test");
+      const matchers = options.hooks?.PostToolBatch;
+      expect(matchers).toBeDefined();
+      expect(matchers!.length).toBe(1);
+      const hook = matchers![0]!.hooks[0] as unknown as HookCallback;
+      return hook(
+        {
+          hook_event_name: "PostToolBatch",
+          tool_calls: toolNames.map((name, i) => ({
+            tool_name: name,
+            tool_input: {},
+            tool_use_id: `tu_${i}`,
+          })),
+        },
+        undefined,
+        { signal: new AbortController().signal },
+      );
+    };
+    it("registers a PostToolBatch hook on the options object", async () => {
+      const { buildSdkOptions } =
+        await import("../backend/claude-sdk/options.js");
+      const { options } = buildSdkOptions("chat-hook-1");
+      expect(options.hooks?.PostToolBatch).toBeDefined();
+      expect(options.hooks!.PostToolBatch!.length).toBe(1);
+      expect(options.hooks!.PostToolBatch![0]!.hooks.length).toBe(1);
+    });
+    it("returns continue:false when an MCP-prefixed end_turn is in the batch", async () => {
+      const result = (await callHook([
+        "mcp__telegram-tools__send",
+        "mcp__telegram-tools__end_turn",
+      ])) as { continue: boolean; stopReason?: string };
+      expect(result.continue).toBe(false);
+      expect(result.stopReason).toMatch(/end_turn/i);
+    });
+    it("returns continue:false when a bare end_turn is in the batch", async () => {
+      const result = (await callHook(["end_turn"])) as {
+        continue: boolean;
+      };
+      expect(result.continue).toBe(false);
+    });
+    it("returns continue:true when no terminator is in the batch", async () => {
+      const result = (await callHook([
+        "mcp__telegram-tools__send",
+        "Read",
+        "Bash",
+      ])) as { continue: boolean };
+      expect(result.continue).toBe(true);
+    });
+    it("returns continue:true on an empty batch", async () => {
+      const result = (await callHook([])) as { continue: boolean };
+      expect(result.continue).toBe(true);
+    });
+    it("ignores non-PostToolBatch events defensively", async () => {
+      const { buildSdkOptions } =
+        await import("../backend/claude-sdk/options.js");
+      const { options } = buildSdkOptions("chat-hook-defensive");
+      const hook = options.hooks!.PostToolBatch![0]!.hooks[0] as unknown as (
+        input: unknown,
+        id?: string,
+        ctx?: { signal: AbortSignal },
+      ) => Promise<{ continue: boolean }>;
+      const result = await hook(
+        {
+          hook_event_name: "PostToolUse",
+          tool_name: "mcp__telegram-tools__end_turn",
+          tool_input: {},
+          tool_response: {},
+          tool_use_id: "tu_0",
+        },
+        undefined,
+        { signal: new AbortController().signal },
+      );
+      expect(result.continue).toBe(true);
+    });
+  });
 });

package/src/__tests__/end-turn.test.ts ADDED Viewed

@@ -0,0 +1,307 @@
+/**
+ * Unit tests for the `end_turn` tool and the cross-tool dedup helpers used to
+ * suppress duplicate deliveries when the model calls both `end_turn` and
+ * `send(type="text")` with similar content in the same turn.
+ *
+ * Covers:
+ *   - normalizeForDedupe / isDuplicateOfDelivered (dedup math)
+ *   - end_turn tool definition (schema, dispatch, silent path)
+ *   - StreamState carries lastTrailingText and deliveredTextNorms
+ */
+import { describe, it, expect, vi } from "vitest";
+import {
+  normalizeForDedupe,
+  isDuplicateOfDelivered,
+  createStreamState,
+  processAssistantMessage,
+} from "../backend/claude-sdk/stream.js";
+import type { SDKAssistantMessage } from "@anthropic-ai/claude-agent-sdk";
+import { messagingTools } from "../core/tools/messaging.js";
+import {
+  isTurnTerminator,
+  stripMcpPrefix,
+  ALL_TOOLS,
+} from "../core/tools/index.js";
+describe("normalizeForDedupe", () => {
+  it("trims, lowercases, and collapses whitespace", () => {
+    expect(normalizeForDedupe("  Hello   World  ")).toBe("hello world");
+    expect(normalizeForDedupe("HELLO\n\tWORLD")).toBe("hello world");
+  });
+  it("strips emoji so prose-with-emoji matches messaging-tool-text", () => {
+    expect(normalizeForDedupe("Got it 👍")).toBe("got it");
+    expect(normalizeForDedupe("Done ✅ and dusted")).toBe("done and dusted");
+  });
+  it("returns empty string for whitespace-only input", () => {
+    expect(normalizeForDedupe("   \n\t  ")).toBe("");
+  });
+});
+describe("isDuplicateOfDelivered", () => {
+  it("returns false when nothing has been delivered yet", () => {
+    expect(isDuplicateOfDelivered("hello there", [])).toBe(false);
+  });
+  it("returns false for very short candidates (below dedup threshold)", () => {
+    // Below MIN_DEDUP_LENGTH (10) — short replies like "ok" / "sure" should
+    // never be deduped, even if they happened to coincide with a longer
+    // delivered text containing them.
+    expect(isDuplicateOfDelivered("ok", ["ok thanks pal"])).toBe(false);
+  });
+  it("matches when normalized candidate is a substring of delivered", () => {
+    const delivered = [normalizeForDedupe("Got it sur, pushing now")];
+    expect(isDuplicateOfDelivered("Got it sur, pushing now", delivered)).toBe(
+      true,
+    );
+  });
+  it("matches when normalized delivered is a substring of candidate", () => {
+    // Model called end_turn(text="Pushing now") then wrote prose
+    // "I'm pushing now and back in a sec." — fuzzy match catches this.
+    const delivered = [normalizeForDedupe("Pushing now")];
+    expect(
+      isDuplicateOfDelivered("I'm pushing now and back in a sec.", delivered),
+    ).toBe(true);
+  });
+  it("does not match unrelated content", () => {
+    const delivered = [normalizeForDedupe("PR #106 merged")];
+    expect(
+      isDuplicateOfDelivered("Got it, I'll look at the docker logs", delivered),
+    ).toBe(false);
+  });
+  it("ignores emoji differences when comparing", () => {
+    // Model wrote "Done 🎉" as prose, also called end_turn(text="Done")
+    const delivered = [normalizeForDedupe("Done")];
+    expect(isDuplicateOfDelivered("Done 🎉", delivered)).toBe(false);
+    // Above is false because "done" (3 chars) < MIN_DEDUP_LENGTH (10).
+    // For a longer match:
+    const longDelivered = [normalizeForDedupe("All set, pushing now")];
+    expect(
+      isDuplicateOfDelivered("All set, pushing now 🚀", longDelivered),
+    ).toBe(true);
+  });
+});
+describe("createStreamState", () => {
+  it("initializes lastTrailingText and deliveredTextNorms", () => {
+    const state = createStreamState();
+    expect(state.lastTrailingText).toBe("");
+    expect(state.deliveredTextNorms).toEqual([]);
+  });
+  it("initializes turnTerminated to false", () => {
+    const state = createStreamState();
+    expect(state.turnTerminated).toBe(false);
+  });
+});
+describe("turn-terminator declaration", () => {
+  it("end_turn is declared with endsTurn: true", () => {
+    const endTurn = messagingTools.find((t) => t.name === "end_turn");
+    expect(endTurn?.endsTurn).toBe(true);
+  });
+  it("send is NOT declared as a turn terminator", () => {
+    // `send` is for mid-turn rich content (photos, polls, scheduled messages,
+    // etc.) — calling it does NOT mean the model is done. Only end_turn
+    // declares the turn finished.
+    const send = messagingTools.find((t) => t.name === "send");
+    expect(send?.endsTurn).toBeFalsy();
+  });
+  it("isTurnTerminator returns true for end_turn", () => {
+    expect(isTurnTerminator("end_turn")).toBe(true);
+  });
+  it("isTurnTerminator returns false for non-terminator tools", () => {
+    expect(isTurnTerminator("send")).toBe(false);
+    expect(isTurnTerminator("react")).toBe(false);
+    expect(isTurnTerminator("fetch_url")).toBe(false);
+    expect(isTurnTerminator("nonexistent_tool")).toBe(false);
+  });
+  it("isTurnTerminator handles MCP-prefixed names", () => {
+    // Tools served through MCP arrive with a `mcp__<server>__` prefix.
+    // The check must normalize the prefix so the SDK's actual tool names
+    // match the registry. Without this, downstream branches gated on
+    // `state.turnTerminated` silently never fire — the flow-violation
+    // re-prompt skip and trailing-prose dedup both break.
+    expect(isTurnTerminator("mcp__telegram-tools__end_turn")).toBe(true);
+    expect(isTurnTerminator("mcp__teams-tools__end_turn")).toBe(true);
+    // Non-terminators with the same prefix shape still return false
+    expect(isTurnTerminator("mcp__telegram-tools__send")).toBe(false);
+    expect(isTurnTerminator("mcp__telegram-tools__react")).toBe(false);
+    // Server name with hyphen + underscore must still match the boundary
+    expect(isTurnTerminator("mcp__some-server-name__end_turn")).toBe(true);
+  });
+  it("stripMcpPrefix strips the mcp__<server>__ prefix when present", () => {
+    expect(stripMcpPrefix("mcp__telegram-tools__end_turn")).toBe("end_turn");
+    expect(stripMcpPrefix("mcp__brave-search__brave_web_search")).toBe(
+      "brave_web_search",
+    );
+    // Non-greedy match takes the FIRST `__` after `mcp__` as the boundary
+    expect(stripMcpPrefix("mcp__a__b__c")).toBe("b__c");
+  });
+  it("stripMcpPrefix returns input unchanged when no prefix matches", () => {
+    expect(stripMcpPrefix("end_turn")).toBe("end_turn");
+    expect(stripMcpPrefix("send")).toBe("send");
+    expect(stripMcpPrefix("Read")).toBe("Read");
+    // Looks like a prefix but missing the trailing `__`
+    expect(stripMcpPrefix("mcp__incomplete")).toBe("mcp__incomplete");
+    // Different prefix shape
+    expect(stripMcpPrefix("not_mcp__server__tool")).toBe(
+      "not_mcp__server__tool",
+    );
+  });
+  it("only one turn terminator currently exists (end_turn)", () => {
+    // If a future change adds a second terminator, this test should fail
+    // and the author should document why a new terminator is necessary.
+    const terminators = ALL_TOOLS.filter((t) => t.endsTurn).map((t) => t.name);
+    expect(terminators).toEqual(["end_turn"]);
+  });
+});
+describe("end_turn tool definition", () => {
+  const endTurn = messagingTools.find((t) => t.name === "end_turn");
+  it("is registered in messagingTools", () => {
+    expect(endTurn).toBeDefined();
+    expect(endTurn?.tag).toBe("messaging");
+    expect(endTurn?.frontends).toEqual(["telegram", "teams"]);
+  });
+  it("has text, reply_to, and buttons schema fields", () => {
+    expect(endTurn?.schema).toBeDefined();
+    expect(endTurn?.schema.text).toBeDefined();
+    expect(endTurn?.schema.reply_to).toBeDefined();
+    expect(endTurn?.schema.buttons).toBeDefined();
+  });
+  it("dispatches plain text via send_message bridge", async () => {
+    const bridge = vi.fn(async () => ({ ok: true }));
+    await endTurn!.execute({ text: "Hello sur" }, bridge);
+    expect(bridge).toHaveBeenCalledWith("send_message", {
+      text: "Hello sur",
+      reply_to_message_id: undefined,
+    });
+  });
+  it("dispatches text + reply_to via send_message bridge", async () => {
+    const bridge = vi.fn(async () => ({ ok: true }));
+    await endTurn!.execute({ text: "Yep", reply_to: 12345 }, bridge);
+    expect(bridge).toHaveBeenCalledWith("send_message", {
+      text: "Yep",
+      reply_to_message_id: 12345,
+    });
+  });
+  it("dispatches text + buttons via send_message_with_buttons bridge", async () => {
+    const bridge = vi.fn(async () => ({ ok: true }));
+    const buttons = [[{ text: "Click", callback_data: "x" }]];
+    await endTurn!.execute({ text: "Pick", buttons }, bridge);
+    expect(bridge).toHaveBeenCalledWith("send_message_with_buttons", {
+      text: "Pick",
+      rows: buttons,
+      reply_to_message_id: undefined,
+    });
+  });
+  it("ends silently with no bridge call when text is omitted", async () => {
+    const bridge = vi.fn(async () => ({ ok: true }));
+    const result = await endTurn!.execute({}, bridge);
+    expect(bridge).not.toHaveBeenCalled();
+    expect(result).toEqual({ ok: true, silent: true });
+  });
+  it("ends silently with no bridge call when text is whitespace-only", async () => {
+    const bridge = vi.fn(async () => ({ ok: true }));
+    const result = await endTurn!.execute({ text: "   \n\t  " }, bridge);
+    expect(bridge).not.toHaveBeenCalled();
+    expect(result).toEqual({ ok: true, silent: true });
+  });
+});
+// ── Production wire-shape contract ──────────────────────────────────────────
+//
+// These tests pin the integration between the SDK's actual emitted tool
+// names (always MCP-prefixed when served via MCP) and the registry checks
+// the handler runs against them. They are the tests that would have caught
+// the bug fixed in this PR — strict-equality `isTurnTerminator("end_turn")`
+// passed in unit tests but the production code path called
+// `isTurnTerminator("mcp__telegram-tools__end_turn")` and silently failed.
+//
+// Auto-derived from ALL_TOOLS so adding a new endsTurn tool or a new MCP
+// frontend stays covered without manually adding cases.
+describe("turn-terminator integration with SDK production tool name shapes", () => {
+  // Built-in MCP server names that the SDK is known to wire Talon's tools
+  // through. Keep this list in sync with the actual MCP server registration
+  // in src/core/tools/mcp-server.ts and frontend wiring.
+  const KNOWN_MCP_SERVERS = ["telegram-tools", "teams-tools"];
+  for (const tool of ALL_TOOLS.filter((t) => t.endsTurn)) {
+    for (const server of KNOWN_MCP_SERVERS) {
+      const sdkName = `mcp__${server}__${tool.name}`;
+      it(`isTurnTerminator(${sdkName}) === true`, () => {
+        // The SDK never emits bare names for MCP-served tools — it always
+        // includes the `mcp__<server>__` prefix. Strict equality against the
+        // registry's bare name was the production bug.
+        expect(isTurnTerminator(sdkName)).toBe(true);
+      });
+      it(`processAssistantMessage + isTurnTerminator: ${sdkName} flips state.turnTerminated`, () => {
+        // End-to-end check of the exact two-step the handler does:
+        //   block.name -> tools[].name (via processAssistantMessage)
+        //   tools[].name -> isTurnTerminator
+        // If either step normalizes inconsistently, this breaks.
+        const state = createStreamState();
+        const msg = {
+          type: "assistant",
+          message: {
+            content: [
+              {
+                type: "tool_use",
+                id: "tool_1",
+                name: sdkName,
+                input: { text: "Hello sur" },
+              },
+            ],
+          },
+        } as unknown as SDKAssistantMessage;
+        const result = processAssistantMessage(msg, state);
+        expect(result.tools).toHaveLength(1);
+        expect(result.tools[0].name).toBe(sdkName);
+        // This is the exact line in handler.ts:
+        //     if (isTurnTerminator(tool.name)) state.turnTerminated = true;
+        if (isTurnTerminator(result.tools[0].name)) {
+          state.turnTerminated = true;
+        }
+        expect(state.turnTerminated).toBe(true);
+      });
+    }
+  }
+  it("non-terminator tools stay non-terminator under MCP prefixing", () => {
+    // Make sure prefix-stripping doesn't accidentally promote arbitrary
+    // tools to terminators.
+    const nonTerminators = ALL_TOOLS.filter((t) => !t.endsTurn);
+    expect(nonTerminators.length).toBeGreaterThan(0);
+    for (const tool of nonTerminators.slice(0, 5)) {
+      for (const server of KNOWN_MCP_SERVERS) {
+        expect(isTurnTerminator(`mcp__${server}__${tool.name}`)).toBe(false);
+      }
+    }
+  });
+});