talon-agent 1.10.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "talon-agent",
3
- "version": "1.10.0",
3
+ "version": "1.10.1",
4
4
  "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
5
5
  "author": "Dylan Neve",
6
6
  "license": "MIT",
@@ -41,13 +41,17 @@
41
41
  "setup": "tsx src/cli.ts setup",
42
42
  "dev": "tsx --watch src/index.ts",
43
43
  "test": "vitest run",
44
+ "test:ci": "vitest run --reporter=verbose --reporter=json --outputFile=test-results.json",
45
+ "test:functional": "vitest run --reporter=verbose --reporter=json --outputFile=functional-results.json src/__tests__/package.functional.test.ts src/__tests__/tool-functional.test.ts src/__tests__/mcp-launcher.test.ts src/__tests__/mcp-launcher-functional.test.ts src/__tests__/integration/sdk-stub.test.ts src/__tests__/integration/talon-functional.test.ts",
46
+ "build:stub-sea": "node src/__tests__/integration/stub-claude/build-sea.mjs",
44
47
  "test:watch": "vitest",
45
48
  "test:coverage": "vitest run --coverage",
46
49
  "typecheck": "tsc --noEmit",
47
50
  "lint": "oxlint src/",
48
51
  "knip": "knip",
49
52
  "format": "prettier --write src/ prompts/",
50
- "format:check": "prettier --check src/ prompts/"
53
+ "format:check": "prettier --check src/ prompts/",
54
+ "ci:protect": "node .github/scripts/enforce-ci-gate.mjs"
51
55
  },
52
56
  "dependencies": {
53
57
  "@anthropic-ai/claude-agent-sdk": "^0.2.108",
@@ -58,7 +62,7 @@
58
62
  "@grammyjs/transformer-throttler": "^1.2.1",
59
63
  "@modelcontextprotocol/sdk": "^1.29.0",
60
64
  "@opencode-ai/sdk": "^1.4.0",
61
- "@playwright/mcp": "^0.0.74",
65
+ "@playwright/mcp": "^0.0.75",
62
66
  "big-integer": "^1.6.52",
63
67
  "cheerio": "^1.2.0",
64
68
  "croner": "^10.0.1",
@@ -71,7 +75,7 @@
71
75
  "telegram": "^2.26.22",
72
76
  "tsx": "^4.21.0",
73
77
  "undici": "^8.0.2",
74
- "write-file-atomic": "^7.0.1",
78
+ "write-file-atomic": "^8.0.0",
75
79
  "zod": "^4.3.6"
76
80
  },
77
81
  "devDependencies": {
@@ -86,6 +90,7 @@
86
90
  "vitest": "^4.1.3"
87
91
  },
88
92
  "overrides": {
89
- "@anthropic-ai/sdk": "^0.95.0"
93
+ "@anthropic-ai/sdk": "^0.95.0",
94
+ "ip-address": "^10.1.1"
90
95
  }
91
96
  }
@@ -107,4 +107,99 @@ describe("buildSdkOptions", () => {
107
107
  expect(activeModel).toBe("claude-sonnet-4-6[1m]");
108
108
  expect(options.model).toBe("sonnet[1m]");
109
109
  });
110
+
111
+ describe("PostToolBatch turn-terminator hook", () => {
112
+ type HookCallback = (
113
+ input: unknown,
114
+ toolUseID?: string,
115
+ ctx?: { signal: AbortSignal },
116
+ ) => Promise<{ continue?: boolean; stopReason?: string }>;
117
+
118
+ const callHook = async (toolNames: string[]): Promise<unknown> => {
119
+ const { buildSdkOptions } =
120
+ await import("../backend/claude-sdk/options.js");
121
+ const { options } = buildSdkOptions("chat-hook-test");
122
+
123
+ const matchers = options.hooks?.PostToolBatch;
124
+ expect(matchers).toBeDefined();
125
+ expect(matchers!.length).toBe(1);
126
+ const hook = matchers![0]!.hooks[0] as unknown as HookCallback;
127
+
128
+ return hook(
129
+ {
130
+ hook_event_name: "PostToolBatch",
131
+ tool_calls: toolNames.map((name, i) => ({
132
+ tool_name: name,
133
+ tool_input: {},
134
+ tool_use_id: `tu_${i}`,
135
+ })),
136
+ },
137
+ undefined,
138
+ { signal: new AbortController().signal },
139
+ );
140
+ };
141
+
142
+ it("registers a PostToolBatch hook on the options object", async () => {
143
+ const { buildSdkOptions } =
144
+ await import("../backend/claude-sdk/options.js");
145
+ const { options } = buildSdkOptions("chat-hook-1");
146
+ expect(options.hooks?.PostToolBatch).toBeDefined();
147
+ expect(options.hooks!.PostToolBatch!.length).toBe(1);
148
+ expect(options.hooks!.PostToolBatch![0]!.hooks.length).toBe(1);
149
+ });
150
+
151
+ it("returns continue:false when an MCP-prefixed end_turn is in the batch", async () => {
152
+ const result = (await callHook([
153
+ "mcp__telegram-tools__send",
154
+ "mcp__telegram-tools__end_turn",
155
+ ])) as { continue: boolean; stopReason?: string };
156
+ expect(result.continue).toBe(false);
157
+ expect(result.stopReason).toMatch(/end_turn/i);
158
+ });
159
+
160
+ it("returns continue:false when a bare end_turn is in the batch", async () => {
161
+ const result = (await callHook(["end_turn"])) as {
162
+ continue: boolean;
163
+ };
164
+ expect(result.continue).toBe(false);
165
+ });
166
+
167
+ it("returns continue:true when no terminator is in the batch", async () => {
168
+ const result = (await callHook([
169
+ "mcp__telegram-tools__send",
170
+ "Read",
171
+ "Bash",
172
+ ])) as { continue: boolean };
173
+ expect(result.continue).toBe(true);
174
+ });
175
+
176
+ it("returns continue:true on an empty batch", async () => {
177
+ const result = (await callHook([])) as { continue: boolean };
178
+ expect(result.continue).toBe(true);
179
+ });
180
+
181
+ it("ignores non-PostToolBatch events defensively", async () => {
182
+ const { buildSdkOptions } =
183
+ await import("../backend/claude-sdk/options.js");
184
+ const { options } = buildSdkOptions("chat-hook-defensive");
185
+ const hook = options.hooks!.PostToolBatch![0]!.hooks[0] as unknown as (
186
+ input: unknown,
187
+ id?: string,
188
+ ctx?: { signal: AbortSignal },
189
+ ) => Promise<{ continue: boolean }>;
190
+
191
+ const result = await hook(
192
+ {
193
+ hook_event_name: "PostToolUse",
194
+ tool_name: "mcp__telegram-tools__end_turn",
195
+ tool_input: {},
196
+ tool_response: {},
197
+ tool_use_id: "tu_0",
198
+ },
199
+ undefined,
200
+ { signal: new AbortController().signal },
201
+ );
202
+ expect(result.continue).toBe(true);
203
+ });
204
+ });
110
205
  });
@@ -14,9 +14,15 @@ import {
14
14
  normalizeForDedupe,
15
15
  isDuplicateOfDelivered,
16
16
  createStreamState,
17
+ processAssistantMessage,
17
18
  } from "../backend/claude-sdk/stream.js";
19
+ import type { SDKAssistantMessage } from "@anthropic-ai/claude-agent-sdk";
18
20
  import { messagingTools } from "../core/tools/messaging.js";
19
- import { isTurnTerminator, ALL_TOOLS } from "../core/tools/index.js";
21
+ import {
22
+ isTurnTerminator,
23
+ stripMcpPrefix,
24
+ ALL_TOOLS,
25
+ } from "../core/tools/index.js";
20
26
 
21
27
  describe("normalizeForDedupe", () => {
22
28
  it("trims, lowercases, and collapses whitespace", () => {
@@ -120,6 +126,42 @@ describe("turn-terminator declaration", () => {
120
126
  expect(isTurnTerminator("nonexistent_tool")).toBe(false);
121
127
  });
122
128
 
129
+ it("isTurnTerminator handles MCP-prefixed names", () => {
130
+ // Tools served through MCP arrive with a `mcp__<server>__` prefix.
131
+ // The check must normalize the prefix so the SDK's actual tool names
132
+ // match the registry. Without this, downstream branches gated on
133
+ // `state.turnTerminated` silently never fire — the flow-violation
134
+ // re-prompt skip and trailing-prose dedup both break.
135
+ expect(isTurnTerminator("mcp__telegram-tools__end_turn")).toBe(true);
136
+ expect(isTurnTerminator("mcp__teams-tools__end_turn")).toBe(true);
137
+ // Non-terminators with the same prefix shape still return false
138
+ expect(isTurnTerminator("mcp__telegram-tools__send")).toBe(false);
139
+ expect(isTurnTerminator("mcp__telegram-tools__react")).toBe(false);
140
+ // Server name with hyphen + underscore must still match the boundary
141
+ expect(isTurnTerminator("mcp__some-server-name__end_turn")).toBe(true);
142
+ });
143
+
144
+ it("stripMcpPrefix strips the mcp__<server>__ prefix when present", () => {
145
+ expect(stripMcpPrefix("mcp__telegram-tools__end_turn")).toBe("end_turn");
146
+ expect(stripMcpPrefix("mcp__brave-search__brave_web_search")).toBe(
147
+ "brave_web_search",
148
+ );
149
+ // Non-greedy match takes the FIRST `__` after `mcp__` as the boundary
150
+ expect(stripMcpPrefix("mcp__a__b__c")).toBe("b__c");
151
+ });
152
+
153
+ it("stripMcpPrefix returns input unchanged when no prefix matches", () => {
154
+ expect(stripMcpPrefix("end_turn")).toBe("end_turn");
155
+ expect(stripMcpPrefix("send")).toBe("send");
156
+ expect(stripMcpPrefix("Read")).toBe("Read");
157
+ // Looks like a prefix but missing the trailing `__`
158
+ expect(stripMcpPrefix("mcp__incomplete")).toBe("mcp__incomplete");
159
+ // Different prefix shape
160
+ expect(stripMcpPrefix("not_mcp__server__tool")).toBe(
161
+ "not_mcp__server__tool",
162
+ );
163
+ });
164
+
123
165
  it("only one turn terminator currently exists (end_turn)", () => {
124
166
  // If a future change adds a second terminator, this test should fail
125
167
  // and the author should document why a new terminator is necessary.
@@ -187,3 +229,79 @@ describe("end_turn tool definition", () => {
187
229
  expect(result).toEqual({ ok: true, silent: true });
188
230
  });
189
231
  });
232
+
233
+ // ── Production wire-shape contract ──────────────────────────────────────────
234
+ //
235
+ // These tests pin the integration between the SDK's actual emitted tool
236
+ // names (always MCP-prefixed when served via MCP) and the registry checks
237
+ // the handler runs against them. They are the tests that would have caught
238
+ // the bug fixed in this PR — strict-equality `isTurnTerminator("end_turn")`
239
+ // passed in unit tests but the production code path called
240
+ // `isTurnTerminator("mcp__telegram-tools__end_turn")` and silently failed.
241
+ //
242
+ // Auto-derived from ALL_TOOLS so adding a new endsTurn tool or a new MCP
243
+ // frontend stays covered without manually adding cases.
244
+
245
+ describe("turn-terminator integration with SDK production tool name shapes", () => {
246
+ // Built-in MCP server names that the SDK is known to wire Talon's tools
247
+ // through. Keep this list in sync with the actual MCP server registration
248
+ // in src/core/tools/mcp-server.ts and frontend wiring.
249
+ const KNOWN_MCP_SERVERS = ["telegram-tools", "teams-tools"];
250
+
251
+ for (const tool of ALL_TOOLS.filter((t) => t.endsTurn)) {
252
+ for (const server of KNOWN_MCP_SERVERS) {
253
+ const sdkName = `mcp__${server}__${tool.name}`;
254
+
255
+ it(`isTurnTerminator(${sdkName}) === true`, () => {
256
+ // The SDK never emits bare names for MCP-served tools — it always
257
+ // includes the `mcp__<server>__` prefix. Strict equality against the
258
+ // registry's bare name was the production bug.
259
+ expect(isTurnTerminator(sdkName)).toBe(true);
260
+ });
261
+
262
+ it(`processAssistantMessage + isTurnTerminator: ${sdkName} flips state.turnTerminated`, () => {
263
+ // End-to-end check of the exact two-step the handler does:
264
+ // block.name -> tools[].name (via processAssistantMessage)
265
+ // tools[].name -> isTurnTerminator
266
+ // If either step normalizes inconsistently, this breaks.
267
+ const state = createStreamState();
268
+ const msg = {
269
+ type: "assistant",
270
+ message: {
271
+ content: [
272
+ {
273
+ type: "tool_use",
274
+ id: "tool_1",
275
+ name: sdkName,
276
+ input: { text: "Hello sur" },
277
+ },
278
+ ],
279
+ },
280
+ } as unknown as SDKAssistantMessage;
281
+
282
+ const result = processAssistantMessage(msg, state);
283
+ expect(result.tools).toHaveLength(1);
284
+ expect(result.tools[0].name).toBe(sdkName);
285
+
286
+ // This is the exact line in handler.ts:
287
+ // if (isTurnTerminator(tool.name)) state.turnTerminated = true;
288
+ if (isTurnTerminator(result.tools[0].name)) {
289
+ state.turnTerminated = true;
290
+ }
291
+ expect(state.turnTerminated).toBe(true);
292
+ });
293
+ }
294
+ }
295
+
296
+ it("non-terminator tools stay non-terminator under MCP prefixing", () => {
297
+ // Make sure prefix-stripping doesn't accidentally promote arbitrary
298
+ // tools to terminators.
299
+ const nonTerminators = ALL_TOOLS.filter((t) => !t.endsTurn);
300
+ expect(nonTerminators.length).toBeGreaterThan(0);
301
+ for (const tool of nonTerminators.slice(0, 5)) {
302
+ for (const server of KNOWN_MCP_SERVERS) {
303
+ expect(isTurnTerminator(`mcp__${server}__${tool.name}`)).toBe(false);
304
+ }
305
+ }
306
+ });
307
+ });
@@ -1731,45 +1731,103 @@ describe("handleAnimationMessage — downloads and enqueues", () => {
1731
1731
  });
1732
1732
 
1733
1733
  describe("processAndReply — onToolUse callback triggers appendDailyLogResponse", () => {
1734
- it("calls appendDailyLogResponse when execute invokes onToolUse with send tool", async () => {
1735
- const { appendDailyLogResponse } = await import("../storage/daily-log.js");
1736
-
1737
- executeMock.mockImplementationOnce(
1738
- async (params: Record<string, unknown>) => {
1739
- // Simulate execute calling onToolUse with a "send" tool
1740
- const onToolUse = params.onToolUse as (
1741
- toolName: string,
1742
- input: Record<string, unknown>,
1743
- ) => void;
1744
- onToolUse?.("send", { type: "text", text: "Hello from Claude!" });
1745
- return {
1746
- text: "",
1747
- durationMs: 5,
1748
- inputTokens: 1,
1749
- outputTokens: 2,
1750
- cacheRead: 0,
1751
- cacheWrite: 0,
1752
- bridgeMessageCount: 1,
1753
- };
1754
- },
1755
- );
1756
-
1757
- const ctx = {
1758
- chat: { id: 96001, type: "private" },
1759
- message: { text: "hi", message_id: 950, reply_to_message: null },
1760
- me: { id: 999, username: "testbot" },
1761
- from: { id: 93, first_name: "Yuki" },
1762
- } as any;
1763
-
1764
- await handleTextMessage(ctx, mockBot, mockConfig);
1765
- await new Promise((r) => setTimeout(r, 700));
1734
+ // The SDK emits MCP-prefixed tool names in production
1735
+ // (`mcp__telegram-tools__send` etc.). Bare names like `send` are how the
1736
+ // tool is registered but NOT what the handler actually receives. This
1737
+ // test matrix exercises both shapes — the prefixed forms are what would
1738
+ // catch a regression where the strip-prefix logic gets dropped.
1739
+ const cases = [
1740
+ {
1741
+ label: "MCP-prefixed send (production shape)",
1742
+ toolName: "mcp__telegram-tools__send",
1743
+ input: { type: "text", text: "Production send text" },
1744
+ expectedText: "Production send text",
1745
+ shouldCapture: true,
1746
+ },
1747
+ {
1748
+ label: "MCP-prefixed end_turn (production shape)",
1749
+ toolName: "mcp__telegram-tools__end_turn",
1750
+ input: { text: "Production end_turn text" },
1751
+ expectedText: "Production end_turn text",
1752
+ shouldCapture: true,
1753
+ },
1754
+ {
1755
+ label: "bare send (defensive — registry-shape compat)",
1756
+ toolName: "send",
1757
+ input: { type: "text", text: "Bare send text" },
1758
+ expectedText: "Bare send text",
1759
+ shouldCapture: true,
1760
+ },
1761
+ {
1762
+ label: "bare end_turn (defensive — registry-shape compat)",
1763
+ toolName: "end_turn",
1764
+ input: { text: "Bare end_turn text" },
1765
+ expectedText: "Bare end_turn text",
1766
+ shouldCapture: true,
1767
+ },
1768
+ {
1769
+ label: "non-text send (photo) — should NOT capture",
1770
+ toolName: "mcp__telegram-tools__send",
1771
+ input: { type: "photo", file_path: "/x.jpg", caption: "ignored" },
1772
+ expectedText: "",
1773
+ shouldCapture: false,
1774
+ },
1775
+ {
1776
+ label: "react tool — should NOT capture",
1777
+ toolName: "mcp__telegram-tools__react",
1778
+ input: { message_id: 1, emoji: "👍" },
1779
+ expectedText: "",
1780
+ shouldCapture: false,
1781
+ },
1782
+ ];
1783
+
1784
+ for (const c of cases) {
1785
+ it(`${c.shouldCapture ? "captures" : "ignores"} ${c.label}`, async () => {
1786
+ const { appendDailyLogResponse } =
1787
+ await import("../storage/daily-log.js");
1788
+ const mockedFn = vi.mocked(appendDailyLogResponse);
1789
+ const before = mockedFn.mock.calls.length;
1790
+
1791
+ executeMock.mockImplementationOnce(
1792
+ async (params: Record<string, unknown>) => {
1793
+ const onToolUse = params.onToolUse as (
1794
+ toolName: string,
1795
+ input: Record<string, unknown>,
1796
+ ) => void;
1797
+ onToolUse?.(c.toolName, c.input);
1798
+ return {
1799
+ text: "",
1800
+ durationMs: 5,
1801
+ inputTokens: 1,
1802
+ outputTokens: 2,
1803
+ cacheRead: 0,
1804
+ cacheWrite: 0,
1805
+ bridgeMessageCount: 1,
1806
+ };
1807
+ },
1808
+ );
1766
1809
 
1767
- expect(appendDailyLogResponse).toHaveBeenCalledWith(
1768
- "Talon",
1769
- "Hello from Claude!",
1770
- expect.anything(),
1771
- );
1772
- }, 3000);
1810
+ const ctx = {
1811
+ chat: { id: 96001, type: "private" },
1812
+ message: { text: "hi", message_id: 950, reply_to_message: null },
1813
+ me: { id: 999, username: "testbot" },
1814
+ from: { id: 93, first_name: "Yuki" },
1815
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1816
+ } as any;
1817
+
1818
+ await handleTextMessage(ctx, mockBot, mockConfig);
1819
+ await new Promise((r) => setTimeout(r, 700));
1820
+
1821
+ const newCalls = mockedFn.mock.calls.slice(before);
1822
+ if (c.shouldCapture) {
1823
+ expect(newCalls).toHaveLength(1);
1824
+ expect(newCalls[0][0]).toBe("Talon");
1825
+ expect(newCalls[0][1]).toBe(c.expectedText);
1826
+ } else {
1827
+ expect(newCalls).toHaveLength(0);
1828
+ }
1829
+ }, 3000);
1830
+ }
1773
1831
  });
1774
1832
 
1775
1833
  describe("createStreamCallbacks — onTextBlock delivers message via sendHtml", () => {
@@ -0,0 +1,208 @@
1
+ /**
2
+ * End-to-end integration tests against the real Claude Agent SDK driving a
3
+ * stub `claude` binary (see `stub-claude/`).
4
+ *
5
+ * Unlike the unit tests in `src/__tests__/`, these don't `vi.mock` the SDK —
6
+ * the real `query()` runs, real subprocess spawn, real hook dispatcher, real
7
+ * stream-json protocol. The binary is replaced with a scripted stub so we
8
+ * can assert on what the SDK produces given known inputs.
9
+ *
10
+ * What this catches that unit tests can't:
11
+ * - Bugs in our integration with the SDK's protocol shapes (e.g. the
12
+ * MCP-prefix bug in `isTurnTerminator` would have surfaced here).
13
+ * - Hook wiring — confirms options.hooks actually gets invoked by the SDK.
14
+ * - Stream sequence handling against authentic message ordering.
15
+ */
16
+
17
+ import { describe, it, expect } from "vitest";
18
+ import {
19
+ runWithStub,
20
+ cleanup,
21
+ assistantText,
22
+ assistantToolUse,
23
+ successResult,
24
+ fireHook,
25
+ } from "./stub-claude/helpers.js";
26
+
27
+ // On Windows the stub binary is a SEA-compiled `.exe` (built via
28
+ // `npm run build:stub-sea`). On POSIX it's the `.mjs` source with shebang.
29
+ // If the .exe isn't present (build step skipped), we skip the suite cleanly
30
+ // rather than fail with `ENOENT`.
31
+ import { existsSync } from "node:fs";
32
+ import { resolve as resolvePath, dirname as dirnamePath } from "node:path";
33
+ import { fileURLToPath as fileUrl } from "node:url";
34
+ const __testDir = dirnamePath(fileUrl(import.meta.url));
35
+ const stubBinaryPath = resolvePath(
36
+ __testDir,
37
+ process.platform === "win32"
38
+ ? "stub-claude/fake-claude.exe"
39
+ : "stub-claude/fake-claude.mjs",
40
+ );
41
+ const stubReady = existsSync(stubBinaryPath);
42
+
43
+ describe.skipIf(!stubReady)("SDK integration (stub binary)", () => {
44
+ it("completes a simple text-only turn", async () => {
45
+ const result = await runWithStub({
46
+ prompt: "say hi",
47
+ script: {
48
+ turns: [
49
+ {
50
+ emit: [assistantText("hello from stub"), successResult("hello")],
51
+ },
52
+ ],
53
+ },
54
+ });
55
+
56
+ // Should have system init, assistant message, and result
57
+ const types = result.messages.map((m) => (m as { type: string }).type);
58
+ expect(types).toContain("system");
59
+ expect(types).toContain("assistant");
60
+ expect(types).toContain("result");
61
+
62
+ cleanup(result);
63
+ }, 15000);
64
+
65
+ it("yields assistant content blocks intact", async () => {
66
+ const result = await runWithStub({
67
+ prompt: "test",
68
+ script: {
69
+ turns: [
70
+ {
71
+ emit: [assistantText("specific text payload"), successResult()],
72
+ },
73
+ ],
74
+ },
75
+ });
76
+
77
+ const assistant = result.messages.find(
78
+ (m) => (m as { type: string }).type === "assistant",
79
+ ) as
80
+ | { message: { content: { type: string; text?: string }[] } }
81
+ | undefined;
82
+
83
+ expect(assistant).toBeDefined();
84
+ const text = assistant?.message.content.find((b) => b.type === "text");
85
+ expect(text?.text).toBe("specific text payload");
86
+
87
+ cleanup(result);
88
+ }, 15000);
89
+
90
+ it("invokes registered PostToolBatch hooks", async () => {
91
+ // The stub emits an assistant message with a tool_use, then synthesizes a
92
+ // PostToolBatch hook fire (via fireHook). The SDK should look up the
93
+ // callback id we registered during init and invoke our hook function.
94
+ const result = await runWithStub({
95
+ prompt: "trigger end_turn",
96
+ script: {
97
+ turns: [
98
+ {
99
+ emit: [
100
+ assistantToolUse("mcp__telegram-tools__end_turn", {
101
+ text: "delivered",
102
+ }),
103
+ fireHook("PostToolBatch", {
104
+ tool_calls: [
105
+ {
106
+ tool_name: "mcp__telegram-tools__end_turn",
107
+ tool_input: { text: "delivered" },
108
+ tool_use_id: "tu_1",
109
+ },
110
+ ],
111
+ }),
112
+ successResult("delivered"),
113
+ ],
114
+ },
115
+ ],
116
+ },
117
+ sdkOptions: {
118
+ hooks: {
119
+ PostToolBatch: [
120
+ {
121
+ hooks: [
122
+ async () => ({
123
+ continue: false,
124
+ stopReason: "test: end_turn fired",
125
+ }),
126
+ ],
127
+ },
128
+ ],
129
+ },
130
+ },
131
+ });
132
+
133
+ expect(result.hookFires.PostToolBatch ?? 0).toBeGreaterThanOrEqual(1);
134
+
135
+ const calls = result.hookInputs.PostToolBatch ?? [];
136
+ const firstInput = calls[0] as
137
+ | { tool_calls: { tool_name: string }[] }
138
+ | undefined;
139
+ expect(firstInput?.tool_calls.map((tc) => tc.tool_name)).toContain(
140
+ "mcp__telegram-tools__end_turn",
141
+ );
142
+
143
+ cleanup(result);
144
+ }, 15000);
145
+
146
+ it("runs the production turn-terminator hook (PostToolBatch + isTurnTerminator)", async () => {
147
+ // This test composes the actual hook from `src/backend/claude-sdk/options.ts`
148
+ // — we reach into the module to grab it. If the hook implementation drifts
149
+ // from what the SDK calls, this test fails.
150
+ const { isTurnTerminator } = await import("../../core/tools/index.js");
151
+
152
+ const result = await runWithStub({
153
+ prompt: "drive end_turn through the real isTurnTerminator helper",
154
+ script: {
155
+ turns: [
156
+ {
157
+ emit: [
158
+ assistantToolUse("mcp__telegram-tools__end_turn", {
159
+ text: "ok",
160
+ }),
161
+ fireHook("PostToolBatch", {
162
+ tool_calls: [
163
+ {
164
+ tool_name: "mcp__telegram-tools__end_turn",
165
+ tool_input: { text: "ok" },
166
+ tool_use_id: "tu_1",
167
+ },
168
+ ],
169
+ }),
170
+ successResult(),
171
+ ],
172
+ },
173
+ ],
174
+ },
175
+ sdkOptions: {
176
+ hooks: {
177
+ PostToolBatch: [
178
+ {
179
+ hooks: [
180
+ async (input) => {
181
+ type Batch = {
182
+ hook_event_name: string;
183
+ tool_calls: { tool_name: string }[];
184
+ };
185
+ if ((input as Batch).hook_event_name !== "PostToolBatch") {
186
+ return { continue: true };
187
+ }
188
+ const batch = input as Batch;
189
+ const ended = batch.tool_calls.some((tc) =>
190
+ isTurnTerminator(tc.tool_name),
191
+ );
192
+ return ended
193
+ ? { continue: false, stopReason: "turn terminated" }
194
+ : { continue: true };
195
+ },
196
+ ],
197
+ },
198
+ ],
199
+ },
200
+ },
201
+ });
202
+
203
+ // Hook should have fired exactly once and matched the MCP-prefixed name.
204
+ expect(result.hookFires.PostToolBatch).toBe(1);
205
+
206
+ cleanup(result);
207
+ }, 15000);
208
+ });