talon-agent 1.10.0 → 1.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +10 -5
- package/src/__tests__/claude-sdk-options.test.ts +95 -0
- package/src/__tests__/end-turn.test.ts +119 -1
- package/src/__tests__/handlers.test.ts +96 -38
- package/src/__tests__/integration/sdk-stub.test.ts +208 -0
- package/src/__tests__/integration/stub-claude/build-sea.mjs +114 -0
- package/src/__tests__/integration/stub-claude/fake-claude.mjs +352 -0
- package/src/__tests__/integration/stub-claude/helpers.ts +263 -0
- package/src/__tests__/integration/stub-claude/protocol.ts +108 -0
- package/src/__tests__/integration/stub-claude/sea-config.json +7 -0
- package/src/__tests__/integration/talon-bootstrap.ts +206 -0
- package/src/__tests__/integration/talon-functional.test.ts +190 -0
- package/src/__tests__/package.functional.test.ts +178 -0
- package/src/backend/claude-sdk/handler.ts +26 -22
- package/src/backend/claude-sdk/options.ts +59 -1
- package/src/core/tools/index.ts +25 -2
- package/src/frontend/telegram/handlers.ts +12 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "talon-agent",
|
|
3
|
-
"version": "1.10.
|
|
3
|
+
"version": "1.10.1",
|
|
4
4
|
"description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
|
|
5
5
|
"author": "Dylan Neve",
|
|
6
6
|
"license": "MIT",
|
|
@@ -41,13 +41,17 @@
|
|
|
41
41
|
"setup": "tsx src/cli.ts setup",
|
|
42
42
|
"dev": "tsx --watch src/index.ts",
|
|
43
43
|
"test": "vitest run",
|
|
44
|
+
"test:ci": "vitest run --reporter=verbose --reporter=json --outputFile=test-results.json",
|
|
45
|
+
"test:functional": "vitest run --reporter=verbose --reporter=json --outputFile=functional-results.json src/__tests__/package.functional.test.ts src/__tests__/tool-functional.test.ts src/__tests__/mcp-launcher.test.ts src/__tests__/mcp-launcher-functional.test.ts src/__tests__/integration/sdk-stub.test.ts src/__tests__/integration/talon-functional.test.ts",
|
|
46
|
+
"build:stub-sea": "node src/__tests__/integration/stub-claude/build-sea.mjs",
|
|
44
47
|
"test:watch": "vitest",
|
|
45
48
|
"test:coverage": "vitest run --coverage",
|
|
46
49
|
"typecheck": "tsc --noEmit",
|
|
47
50
|
"lint": "oxlint src/",
|
|
48
51
|
"knip": "knip",
|
|
49
52
|
"format": "prettier --write src/ prompts/",
|
|
50
|
-
"format:check": "prettier --check src/ prompts/"
|
|
53
|
+
"format:check": "prettier --check src/ prompts/",
|
|
54
|
+
"ci:protect": "node .github/scripts/enforce-ci-gate.mjs"
|
|
51
55
|
},
|
|
52
56
|
"dependencies": {
|
|
53
57
|
"@anthropic-ai/claude-agent-sdk": "^0.2.108",
|
|
@@ -58,7 +62,7 @@
|
|
|
58
62
|
"@grammyjs/transformer-throttler": "^1.2.1",
|
|
59
63
|
"@modelcontextprotocol/sdk": "^1.29.0",
|
|
60
64
|
"@opencode-ai/sdk": "^1.4.0",
|
|
61
|
-
"@playwright/mcp": "^0.0.
|
|
65
|
+
"@playwright/mcp": "^0.0.75",
|
|
62
66
|
"big-integer": "^1.6.52",
|
|
63
67
|
"cheerio": "^1.2.0",
|
|
64
68
|
"croner": "^10.0.1",
|
|
@@ -71,7 +75,7 @@
|
|
|
71
75
|
"telegram": "^2.26.22",
|
|
72
76
|
"tsx": "^4.21.0",
|
|
73
77
|
"undici": "^8.0.2",
|
|
74
|
-
"write-file-atomic": "^
|
|
78
|
+
"write-file-atomic": "^8.0.0",
|
|
75
79
|
"zod": "^4.3.6"
|
|
76
80
|
},
|
|
77
81
|
"devDependencies": {
|
|
@@ -86,6 +90,7 @@
|
|
|
86
90
|
"vitest": "^4.1.3"
|
|
87
91
|
},
|
|
88
92
|
"overrides": {
|
|
89
|
-
"@anthropic-ai/sdk": "^0.95.0"
|
|
93
|
+
"@anthropic-ai/sdk": "^0.95.0",
|
|
94
|
+
"ip-address": "^10.1.1"
|
|
90
95
|
}
|
|
91
96
|
}
|
|
@@ -107,4 +107,99 @@ describe("buildSdkOptions", () => {
|
|
|
107
107
|
expect(activeModel).toBe("claude-sonnet-4-6[1m]");
|
|
108
108
|
expect(options.model).toBe("sonnet[1m]");
|
|
109
109
|
});
|
|
110
|
+
|
|
111
|
+
describe("PostToolBatch turn-terminator hook", () => {
|
|
112
|
+
type HookCallback = (
|
|
113
|
+
input: unknown,
|
|
114
|
+
toolUseID?: string,
|
|
115
|
+
ctx?: { signal: AbortSignal },
|
|
116
|
+
) => Promise<{ continue?: boolean; stopReason?: string }>;
|
|
117
|
+
|
|
118
|
+
const callHook = async (toolNames: string[]): Promise<unknown> => {
|
|
119
|
+
const { buildSdkOptions } =
|
|
120
|
+
await import("../backend/claude-sdk/options.js");
|
|
121
|
+
const { options } = buildSdkOptions("chat-hook-test");
|
|
122
|
+
|
|
123
|
+
const matchers = options.hooks?.PostToolBatch;
|
|
124
|
+
expect(matchers).toBeDefined();
|
|
125
|
+
expect(matchers!.length).toBe(1);
|
|
126
|
+
const hook = matchers![0]!.hooks[0] as unknown as HookCallback;
|
|
127
|
+
|
|
128
|
+
return hook(
|
|
129
|
+
{
|
|
130
|
+
hook_event_name: "PostToolBatch",
|
|
131
|
+
tool_calls: toolNames.map((name, i) => ({
|
|
132
|
+
tool_name: name,
|
|
133
|
+
tool_input: {},
|
|
134
|
+
tool_use_id: `tu_${i}`,
|
|
135
|
+
})),
|
|
136
|
+
},
|
|
137
|
+
undefined,
|
|
138
|
+
{ signal: new AbortController().signal },
|
|
139
|
+
);
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
it("registers a PostToolBatch hook on the options object", async () => {
|
|
143
|
+
const { buildSdkOptions } =
|
|
144
|
+
await import("../backend/claude-sdk/options.js");
|
|
145
|
+
const { options } = buildSdkOptions("chat-hook-1");
|
|
146
|
+
expect(options.hooks?.PostToolBatch).toBeDefined();
|
|
147
|
+
expect(options.hooks!.PostToolBatch!.length).toBe(1);
|
|
148
|
+
expect(options.hooks!.PostToolBatch![0]!.hooks.length).toBe(1);
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
it("returns continue:false when an MCP-prefixed end_turn is in the batch", async () => {
|
|
152
|
+
const result = (await callHook([
|
|
153
|
+
"mcp__telegram-tools__send",
|
|
154
|
+
"mcp__telegram-tools__end_turn",
|
|
155
|
+
])) as { continue: boolean; stopReason?: string };
|
|
156
|
+
expect(result.continue).toBe(false);
|
|
157
|
+
expect(result.stopReason).toMatch(/end_turn/i);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
it("returns continue:false when a bare end_turn is in the batch", async () => {
|
|
161
|
+
const result = (await callHook(["end_turn"])) as {
|
|
162
|
+
continue: boolean;
|
|
163
|
+
};
|
|
164
|
+
expect(result.continue).toBe(false);
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
it("returns continue:true when no terminator is in the batch", async () => {
|
|
168
|
+
const result = (await callHook([
|
|
169
|
+
"mcp__telegram-tools__send",
|
|
170
|
+
"Read",
|
|
171
|
+
"Bash",
|
|
172
|
+
])) as { continue: boolean };
|
|
173
|
+
expect(result.continue).toBe(true);
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it("returns continue:true on an empty batch", async () => {
|
|
177
|
+
const result = (await callHook([])) as { continue: boolean };
|
|
178
|
+
expect(result.continue).toBe(true);
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it("ignores non-PostToolBatch events defensively", async () => {
|
|
182
|
+
const { buildSdkOptions } =
|
|
183
|
+
await import("../backend/claude-sdk/options.js");
|
|
184
|
+
const { options } = buildSdkOptions("chat-hook-defensive");
|
|
185
|
+
const hook = options.hooks!.PostToolBatch![0]!.hooks[0] as unknown as (
|
|
186
|
+
input: unknown,
|
|
187
|
+
id?: string,
|
|
188
|
+
ctx?: { signal: AbortSignal },
|
|
189
|
+
) => Promise<{ continue: boolean }>;
|
|
190
|
+
|
|
191
|
+
const result = await hook(
|
|
192
|
+
{
|
|
193
|
+
hook_event_name: "PostToolUse",
|
|
194
|
+
tool_name: "mcp__telegram-tools__end_turn",
|
|
195
|
+
tool_input: {},
|
|
196
|
+
tool_response: {},
|
|
197
|
+
tool_use_id: "tu_0",
|
|
198
|
+
},
|
|
199
|
+
undefined,
|
|
200
|
+
{ signal: new AbortController().signal },
|
|
201
|
+
);
|
|
202
|
+
expect(result.continue).toBe(true);
|
|
203
|
+
});
|
|
204
|
+
});
|
|
110
205
|
});
|
|
@@ -14,9 +14,15 @@ import {
|
|
|
14
14
|
normalizeForDedupe,
|
|
15
15
|
isDuplicateOfDelivered,
|
|
16
16
|
createStreamState,
|
|
17
|
+
processAssistantMessage,
|
|
17
18
|
} from "../backend/claude-sdk/stream.js";
|
|
19
|
+
import type { SDKAssistantMessage } from "@anthropic-ai/claude-agent-sdk";
|
|
18
20
|
import { messagingTools } from "../core/tools/messaging.js";
|
|
19
|
-
import {
|
|
21
|
+
import {
|
|
22
|
+
isTurnTerminator,
|
|
23
|
+
stripMcpPrefix,
|
|
24
|
+
ALL_TOOLS,
|
|
25
|
+
} from "../core/tools/index.js";
|
|
20
26
|
|
|
21
27
|
describe("normalizeForDedupe", () => {
|
|
22
28
|
it("trims, lowercases, and collapses whitespace", () => {
|
|
@@ -120,6 +126,42 @@ describe("turn-terminator declaration", () => {
|
|
|
120
126
|
expect(isTurnTerminator("nonexistent_tool")).toBe(false);
|
|
121
127
|
});
|
|
122
128
|
|
|
129
|
+
it("isTurnTerminator handles MCP-prefixed names", () => {
|
|
130
|
+
// Tools served through MCP arrive with a `mcp__<server>__` prefix.
|
|
131
|
+
// The check must normalize the prefix so the SDK's actual tool names
|
|
132
|
+
// match the registry. Without this, downstream branches gated on
|
|
133
|
+
// `state.turnTerminated` silently never fire — the flow-violation
|
|
134
|
+
// re-prompt skip and trailing-prose dedup both break.
|
|
135
|
+
expect(isTurnTerminator("mcp__telegram-tools__end_turn")).toBe(true);
|
|
136
|
+
expect(isTurnTerminator("mcp__teams-tools__end_turn")).toBe(true);
|
|
137
|
+
// Non-terminators with the same prefix shape still return false
|
|
138
|
+
expect(isTurnTerminator("mcp__telegram-tools__send")).toBe(false);
|
|
139
|
+
expect(isTurnTerminator("mcp__telegram-tools__react")).toBe(false);
|
|
140
|
+
// Server name with hyphen + underscore must still match the boundary
|
|
141
|
+
expect(isTurnTerminator("mcp__some-server-name__end_turn")).toBe(true);
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it("stripMcpPrefix strips the mcp__<server>__ prefix when present", () => {
|
|
145
|
+
expect(stripMcpPrefix("mcp__telegram-tools__end_turn")).toBe("end_turn");
|
|
146
|
+
expect(stripMcpPrefix("mcp__brave-search__brave_web_search")).toBe(
|
|
147
|
+
"brave_web_search",
|
|
148
|
+
);
|
|
149
|
+
// Non-greedy match takes the FIRST `__` after `mcp__` as the boundary
|
|
150
|
+
expect(stripMcpPrefix("mcp__a__b__c")).toBe("b__c");
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
it("stripMcpPrefix returns input unchanged when no prefix matches", () => {
|
|
154
|
+
expect(stripMcpPrefix("end_turn")).toBe("end_turn");
|
|
155
|
+
expect(stripMcpPrefix("send")).toBe("send");
|
|
156
|
+
expect(stripMcpPrefix("Read")).toBe("Read");
|
|
157
|
+
// Looks like a prefix but missing the trailing `__`
|
|
158
|
+
expect(stripMcpPrefix("mcp__incomplete")).toBe("mcp__incomplete");
|
|
159
|
+
// Different prefix shape
|
|
160
|
+
expect(stripMcpPrefix("not_mcp__server__tool")).toBe(
|
|
161
|
+
"not_mcp__server__tool",
|
|
162
|
+
);
|
|
163
|
+
});
|
|
164
|
+
|
|
123
165
|
it("only one turn terminator currently exists (end_turn)", () => {
|
|
124
166
|
// If a future change adds a second terminator, this test should fail
|
|
125
167
|
// and the author should document why a new terminator is necessary.
|
|
@@ -187,3 +229,79 @@ describe("end_turn tool definition", () => {
|
|
|
187
229
|
expect(result).toEqual({ ok: true, silent: true });
|
|
188
230
|
});
|
|
189
231
|
});
|
|
232
|
+
|
|
233
|
+
// ── Production wire-shape contract ──────────────────────────────────────────
|
|
234
|
+
//
|
|
235
|
+
// These tests pin the integration between the SDK's actual emitted tool
|
|
236
|
+
// names (always MCP-prefixed when served via MCP) and the registry checks
|
|
237
|
+
// the handler runs against them. They are the tests that would have caught
|
|
238
|
+
// the bug fixed in this PR — strict-equality `isTurnTerminator("end_turn")`
|
|
239
|
+
// passed in unit tests but the production code path called
|
|
240
|
+
// `isTurnTerminator("mcp__telegram-tools__end_turn")` and silently failed.
|
|
241
|
+
//
|
|
242
|
+
// Auto-derived from ALL_TOOLS so adding a new endsTurn tool or a new MCP
|
|
243
|
+
// frontend stays covered without manually adding cases.
|
|
244
|
+
|
|
245
|
+
describe("turn-terminator integration with SDK production tool name shapes", () => {
|
|
246
|
+
// Built-in MCP server names that the SDK is known to wire Talon's tools
|
|
247
|
+
// through. Keep this list in sync with the actual MCP server registration
|
|
248
|
+
// in src/core/tools/mcp-server.ts and frontend wiring.
|
|
249
|
+
const KNOWN_MCP_SERVERS = ["telegram-tools", "teams-tools"];
|
|
250
|
+
|
|
251
|
+
for (const tool of ALL_TOOLS.filter((t) => t.endsTurn)) {
|
|
252
|
+
for (const server of KNOWN_MCP_SERVERS) {
|
|
253
|
+
const sdkName = `mcp__${server}__${tool.name}`;
|
|
254
|
+
|
|
255
|
+
it(`isTurnTerminator(${sdkName}) === true`, () => {
|
|
256
|
+
// The SDK never emits bare names for MCP-served tools — it always
|
|
257
|
+
// includes the `mcp__<server>__` prefix. Strict equality against the
|
|
258
|
+
// registry's bare name was the production bug.
|
|
259
|
+
expect(isTurnTerminator(sdkName)).toBe(true);
|
|
260
|
+
});
|
|
261
|
+
|
|
262
|
+
it(`processAssistantMessage + isTurnTerminator: ${sdkName} flips state.turnTerminated`, () => {
|
|
263
|
+
// End-to-end check of the exact two-step the handler does:
|
|
264
|
+
// block.name -> tools[].name (via processAssistantMessage)
|
|
265
|
+
// tools[].name -> isTurnTerminator
|
|
266
|
+
// If either step normalizes inconsistently, this breaks.
|
|
267
|
+
const state = createStreamState();
|
|
268
|
+
const msg = {
|
|
269
|
+
type: "assistant",
|
|
270
|
+
message: {
|
|
271
|
+
content: [
|
|
272
|
+
{
|
|
273
|
+
type: "tool_use",
|
|
274
|
+
id: "tool_1",
|
|
275
|
+
name: sdkName,
|
|
276
|
+
input: { text: "Hello sur" },
|
|
277
|
+
},
|
|
278
|
+
],
|
|
279
|
+
},
|
|
280
|
+
} as unknown as SDKAssistantMessage;
|
|
281
|
+
|
|
282
|
+
const result = processAssistantMessage(msg, state);
|
|
283
|
+
expect(result.tools).toHaveLength(1);
|
|
284
|
+
expect(result.tools[0].name).toBe(sdkName);
|
|
285
|
+
|
|
286
|
+
// This is the exact line in handler.ts:
|
|
287
|
+
// if (isTurnTerminator(tool.name)) state.turnTerminated = true;
|
|
288
|
+
if (isTurnTerminator(result.tools[0].name)) {
|
|
289
|
+
state.turnTerminated = true;
|
|
290
|
+
}
|
|
291
|
+
expect(state.turnTerminated).toBe(true);
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
it("non-terminator tools stay non-terminator under MCP prefixing", () => {
|
|
297
|
+
// Make sure prefix-stripping doesn't accidentally promote arbitrary
|
|
298
|
+
// tools to terminators.
|
|
299
|
+
const nonTerminators = ALL_TOOLS.filter((t) => !t.endsTurn);
|
|
300
|
+
expect(nonTerminators.length).toBeGreaterThan(0);
|
|
301
|
+
for (const tool of nonTerminators.slice(0, 5)) {
|
|
302
|
+
for (const server of KNOWN_MCP_SERVERS) {
|
|
303
|
+
expect(isTurnTerminator(`mcp__${server}__${tool.name}`)).toBe(false);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
});
|
|
307
|
+
});
|
|
@@ -1731,45 +1731,103 @@ describe("handleAnimationMessage — downloads and enqueues", () => {
|
|
|
1731
1731
|
});
|
|
1732
1732
|
|
|
1733
1733
|
describe("processAndReply — onToolUse callback triggers appendDailyLogResponse", () => {
|
|
1734
|
-
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1751
|
-
|
|
1752
|
-
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1734
|
+
// The SDK emits MCP-prefixed tool names in production
|
|
1735
|
+
// (`mcp__telegram-tools__send` etc.). Bare names like `send` are how the
|
|
1736
|
+
// tool is registered but NOT what the handler actually receives. This
|
|
1737
|
+
// test matrix exercises both shapes — the prefixed forms are what would
|
|
1738
|
+
// catch a regression where the strip-prefix logic gets dropped.
|
|
1739
|
+
const cases = [
|
|
1740
|
+
{
|
|
1741
|
+
label: "MCP-prefixed send (production shape)",
|
|
1742
|
+
toolName: "mcp__telegram-tools__send",
|
|
1743
|
+
input: { type: "text", text: "Production send text" },
|
|
1744
|
+
expectedText: "Production send text",
|
|
1745
|
+
shouldCapture: true,
|
|
1746
|
+
},
|
|
1747
|
+
{
|
|
1748
|
+
label: "MCP-prefixed end_turn (production shape)",
|
|
1749
|
+
toolName: "mcp__telegram-tools__end_turn",
|
|
1750
|
+
input: { text: "Production end_turn text" },
|
|
1751
|
+
expectedText: "Production end_turn text",
|
|
1752
|
+
shouldCapture: true,
|
|
1753
|
+
},
|
|
1754
|
+
{
|
|
1755
|
+
label: "bare send (defensive — registry-shape compat)",
|
|
1756
|
+
toolName: "send",
|
|
1757
|
+
input: { type: "text", text: "Bare send text" },
|
|
1758
|
+
expectedText: "Bare send text",
|
|
1759
|
+
shouldCapture: true,
|
|
1760
|
+
},
|
|
1761
|
+
{
|
|
1762
|
+
label: "bare end_turn (defensive — registry-shape compat)",
|
|
1763
|
+
toolName: "end_turn",
|
|
1764
|
+
input: { text: "Bare end_turn text" },
|
|
1765
|
+
expectedText: "Bare end_turn text",
|
|
1766
|
+
shouldCapture: true,
|
|
1767
|
+
},
|
|
1768
|
+
{
|
|
1769
|
+
label: "non-text send (photo) — should NOT capture",
|
|
1770
|
+
toolName: "mcp__telegram-tools__send",
|
|
1771
|
+
input: { type: "photo", file_path: "/x.jpg", caption: "ignored" },
|
|
1772
|
+
expectedText: "",
|
|
1773
|
+
shouldCapture: false,
|
|
1774
|
+
},
|
|
1775
|
+
{
|
|
1776
|
+
label: "react tool — should NOT capture",
|
|
1777
|
+
toolName: "mcp__telegram-tools__react",
|
|
1778
|
+
input: { message_id: 1, emoji: "👍" },
|
|
1779
|
+
expectedText: "",
|
|
1780
|
+
shouldCapture: false,
|
|
1781
|
+
},
|
|
1782
|
+
];
|
|
1783
|
+
|
|
1784
|
+
for (const c of cases) {
|
|
1785
|
+
it(`${c.shouldCapture ? "captures" : "ignores"} ${c.label}`, async () => {
|
|
1786
|
+
const { appendDailyLogResponse } =
|
|
1787
|
+
await import("../storage/daily-log.js");
|
|
1788
|
+
const mockedFn = vi.mocked(appendDailyLogResponse);
|
|
1789
|
+
const before = mockedFn.mock.calls.length;
|
|
1790
|
+
|
|
1791
|
+
executeMock.mockImplementationOnce(
|
|
1792
|
+
async (params: Record<string, unknown>) => {
|
|
1793
|
+
const onToolUse = params.onToolUse as (
|
|
1794
|
+
toolName: string,
|
|
1795
|
+
input: Record<string, unknown>,
|
|
1796
|
+
) => void;
|
|
1797
|
+
onToolUse?.(c.toolName, c.input);
|
|
1798
|
+
return {
|
|
1799
|
+
text: "",
|
|
1800
|
+
durationMs: 5,
|
|
1801
|
+
inputTokens: 1,
|
|
1802
|
+
outputTokens: 2,
|
|
1803
|
+
cacheRead: 0,
|
|
1804
|
+
cacheWrite: 0,
|
|
1805
|
+
bridgeMessageCount: 1,
|
|
1806
|
+
};
|
|
1807
|
+
},
|
|
1808
|
+
);
|
|
1766
1809
|
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1810
|
+
const ctx = {
|
|
1811
|
+
chat: { id: 96001, type: "private" },
|
|
1812
|
+
message: { text: "hi", message_id: 950, reply_to_message: null },
|
|
1813
|
+
me: { id: 999, username: "testbot" },
|
|
1814
|
+
from: { id: 93, first_name: "Yuki" },
|
|
1815
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1816
|
+
} as any;
|
|
1817
|
+
|
|
1818
|
+
await handleTextMessage(ctx, mockBot, mockConfig);
|
|
1819
|
+
await new Promise((r) => setTimeout(r, 700));
|
|
1820
|
+
|
|
1821
|
+
const newCalls = mockedFn.mock.calls.slice(before);
|
|
1822
|
+
if (c.shouldCapture) {
|
|
1823
|
+
expect(newCalls).toHaveLength(1);
|
|
1824
|
+
expect(newCalls[0][0]).toBe("Talon");
|
|
1825
|
+
expect(newCalls[0][1]).toBe(c.expectedText);
|
|
1826
|
+
} else {
|
|
1827
|
+
expect(newCalls).toHaveLength(0);
|
|
1828
|
+
}
|
|
1829
|
+
}, 3000);
|
|
1830
|
+
}
|
|
1773
1831
|
});
|
|
1774
1832
|
|
|
1775
1833
|
describe("createStreamCallbacks — onTextBlock delivers message via sendHtml", () => {
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* End-to-end integration tests against the real Claude Agent SDK driving a
|
|
3
|
+
* stub `claude` binary (see `stub-claude/`).
|
|
4
|
+
*
|
|
5
|
+
* Unlike the unit tests in `src/__tests__/`, these don't `vi.mock` the SDK —
|
|
6
|
+
* the real `query()` runs, real subprocess spawn, real hook dispatcher, real
|
|
7
|
+
* stream-json protocol. The binary is replaced with a scripted stub so we
|
|
8
|
+
* can assert on what the SDK produces given known inputs.
|
|
9
|
+
*
|
|
10
|
+
* What this catches that unit tests can't:
|
|
11
|
+
* - Bugs in our integration with the SDK's protocol shapes (e.g. the
|
|
12
|
+
* MCP-prefix bug in `isTurnTerminator` would have surfaced here).
|
|
13
|
+
* - Hook wiring — confirms options.hooks actually gets invoked by the SDK.
|
|
14
|
+
* - Stream sequence handling against authentic message ordering.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, it, expect } from "vitest";
|
|
18
|
+
import {
|
|
19
|
+
runWithStub,
|
|
20
|
+
cleanup,
|
|
21
|
+
assistantText,
|
|
22
|
+
assistantToolUse,
|
|
23
|
+
successResult,
|
|
24
|
+
fireHook,
|
|
25
|
+
} from "./stub-claude/helpers.js";
|
|
26
|
+
|
|
27
|
+
// On Windows the stub binary is a SEA-compiled `.exe` (built via
|
|
28
|
+
// `npm run build:stub-sea`). On POSIX it's the `.mjs` source with shebang.
|
|
29
|
+
// If the .exe isn't present (build step skipped), we skip the suite cleanly
|
|
30
|
+
// rather than fail with `ENOENT`.
|
|
31
|
+
import { existsSync } from "node:fs";
|
|
32
|
+
import { resolve as resolvePath, dirname as dirnamePath } from "node:path";
|
|
33
|
+
import { fileURLToPath as fileUrl } from "node:url";
|
|
34
|
+
const __testDir = dirnamePath(fileUrl(import.meta.url));
|
|
35
|
+
const stubBinaryPath = resolvePath(
|
|
36
|
+
__testDir,
|
|
37
|
+
process.platform === "win32"
|
|
38
|
+
? "stub-claude/fake-claude.exe"
|
|
39
|
+
: "stub-claude/fake-claude.mjs",
|
|
40
|
+
);
|
|
41
|
+
const stubReady = existsSync(stubBinaryPath);
|
|
42
|
+
|
|
43
|
+
describe.skipIf(!stubReady)("SDK integration (stub binary)", () => {
|
|
44
|
+
it("completes a simple text-only turn", async () => {
|
|
45
|
+
const result = await runWithStub({
|
|
46
|
+
prompt: "say hi",
|
|
47
|
+
script: {
|
|
48
|
+
turns: [
|
|
49
|
+
{
|
|
50
|
+
emit: [assistantText("hello from stub"), successResult("hello")],
|
|
51
|
+
},
|
|
52
|
+
],
|
|
53
|
+
},
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// Should have system init, assistant message, and result
|
|
57
|
+
const types = result.messages.map((m) => (m as { type: string }).type);
|
|
58
|
+
expect(types).toContain("system");
|
|
59
|
+
expect(types).toContain("assistant");
|
|
60
|
+
expect(types).toContain("result");
|
|
61
|
+
|
|
62
|
+
cleanup(result);
|
|
63
|
+
}, 15000);
|
|
64
|
+
|
|
65
|
+
it("yields assistant content blocks intact", async () => {
|
|
66
|
+
const result = await runWithStub({
|
|
67
|
+
prompt: "test",
|
|
68
|
+
script: {
|
|
69
|
+
turns: [
|
|
70
|
+
{
|
|
71
|
+
emit: [assistantText("specific text payload"), successResult()],
|
|
72
|
+
},
|
|
73
|
+
],
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const assistant = result.messages.find(
|
|
78
|
+
(m) => (m as { type: string }).type === "assistant",
|
|
79
|
+
) as
|
|
80
|
+
| { message: { content: { type: string; text?: string }[] } }
|
|
81
|
+
| undefined;
|
|
82
|
+
|
|
83
|
+
expect(assistant).toBeDefined();
|
|
84
|
+
const text = assistant?.message.content.find((b) => b.type === "text");
|
|
85
|
+
expect(text?.text).toBe("specific text payload");
|
|
86
|
+
|
|
87
|
+
cleanup(result);
|
|
88
|
+
}, 15000);
|
|
89
|
+
|
|
90
|
+
it("invokes registered PostToolBatch hooks", async () => {
|
|
91
|
+
// The stub emits an assistant message with a tool_use, then synthesizes a
|
|
92
|
+
// PostToolBatch hook fire (via fireHook). The SDK should look up the
|
|
93
|
+
// callback id we registered during init and invoke our hook function.
|
|
94
|
+
const result = await runWithStub({
|
|
95
|
+
prompt: "trigger end_turn",
|
|
96
|
+
script: {
|
|
97
|
+
turns: [
|
|
98
|
+
{
|
|
99
|
+
emit: [
|
|
100
|
+
assistantToolUse("mcp__telegram-tools__end_turn", {
|
|
101
|
+
text: "delivered",
|
|
102
|
+
}),
|
|
103
|
+
fireHook("PostToolBatch", {
|
|
104
|
+
tool_calls: [
|
|
105
|
+
{
|
|
106
|
+
tool_name: "mcp__telegram-tools__end_turn",
|
|
107
|
+
tool_input: { text: "delivered" },
|
|
108
|
+
tool_use_id: "tu_1",
|
|
109
|
+
},
|
|
110
|
+
],
|
|
111
|
+
}),
|
|
112
|
+
successResult("delivered"),
|
|
113
|
+
],
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
},
|
|
117
|
+
sdkOptions: {
|
|
118
|
+
hooks: {
|
|
119
|
+
PostToolBatch: [
|
|
120
|
+
{
|
|
121
|
+
hooks: [
|
|
122
|
+
async () => ({
|
|
123
|
+
continue: false,
|
|
124
|
+
stopReason: "test: end_turn fired",
|
|
125
|
+
}),
|
|
126
|
+
],
|
|
127
|
+
},
|
|
128
|
+
],
|
|
129
|
+
},
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
expect(result.hookFires.PostToolBatch ?? 0).toBeGreaterThanOrEqual(1);
|
|
134
|
+
|
|
135
|
+
const calls = result.hookInputs.PostToolBatch ?? [];
|
|
136
|
+
const firstInput = calls[0] as
|
|
137
|
+
| { tool_calls: { tool_name: string }[] }
|
|
138
|
+
| undefined;
|
|
139
|
+
expect(firstInput?.tool_calls.map((tc) => tc.tool_name)).toContain(
|
|
140
|
+
"mcp__telegram-tools__end_turn",
|
|
141
|
+
);
|
|
142
|
+
|
|
143
|
+
cleanup(result);
|
|
144
|
+
}, 15000);
|
|
145
|
+
|
|
146
|
+
it("runs the production turn-terminator hook (PostToolBatch + isTurnTerminator)", async () => {
|
|
147
|
+
// This test composes the actual hook from `src/backend/claude-sdk/options.ts`
|
|
148
|
+
// — we reach into the module to grab it. If the hook implementation drifts
|
|
149
|
+
// from what the SDK calls, this test fails.
|
|
150
|
+
const { isTurnTerminator } = await import("../../core/tools/index.js");
|
|
151
|
+
|
|
152
|
+
const result = await runWithStub({
|
|
153
|
+
prompt: "drive end_turn through the real isTurnTerminator helper",
|
|
154
|
+
script: {
|
|
155
|
+
turns: [
|
|
156
|
+
{
|
|
157
|
+
emit: [
|
|
158
|
+
assistantToolUse("mcp__telegram-tools__end_turn", {
|
|
159
|
+
text: "ok",
|
|
160
|
+
}),
|
|
161
|
+
fireHook("PostToolBatch", {
|
|
162
|
+
tool_calls: [
|
|
163
|
+
{
|
|
164
|
+
tool_name: "mcp__telegram-tools__end_turn",
|
|
165
|
+
tool_input: { text: "ok" },
|
|
166
|
+
tool_use_id: "tu_1",
|
|
167
|
+
},
|
|
168
|
+
],
|
|
169
|
+
}),
|
|
170
|
+
successResult(),
|
|
171
|
+
],
|
|
172
|
+
},
|
|
173
|
+
],
|
|
174
|
+
},
|
|
175
|
+
sdkOptions: {
|
|
176
|
+
hooks: {
|
|
177
|
+
PostToolBatch: [
|
|
178
|
+
{
|
|
179
|
+
hooks: [
|
|
180
|
+
async (input) => {
|
|
181
|
+
type Batch = {
|
|
182
|
+
hook_event_name: string;
|
|
183
|
+
tool_calls: { tool_name: string }[];
|
|
184
|
+
};
|
|
185
|
+
if ((input as Batch).hook_event_name !== "PostToolBatch") {
|
|
186
|
+
return { continue: true };
|
|
187
|
+
}
|
|
188
|
+
const batch = input as Batch;
|
|
189
|
+
const ended = batch.tool_calls.some((tc) =>
|
|
190
|
+
isTurnTerminator(tc.tool_name),
|
|
191
|
+
);
|
|
192
|
+
return ended
|
|
193
|
+
? { continue: false, stopReason: "turn terminated" }
|
|
194
|
+
: { continue: true };
|
|
195
|
+
},
|
|
196
|
+
],
|
|
197
|
+
},
|
|
198
|
+
],
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
// Hook should have fired exactly once and matched the MCP-prefixed name.
|
|
204
|
+
expect(result.hookFires.PostToolBatch).toBe(1);
|
|
205
|
+
|
|
206
|
+
cleanup(result);
|
|
207
|
+
}, 15000);
|
|
208
|
+
});
|