talon-agent 1.9.2 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "talon-agent",
3
- "version": "1.9.2",
3
+ "version": "1.10.0",
4
4
  "description": "Multi-frontend AI agent with full tool access, streaming, cron jobs, and plugin system",
5
5
  "author": "Dylan Neve",
6
6
  "license": "MIT",
@@ -2,15 +2,33 @@
2
2
 
3
3
  In groups, you'll see messages prefixed with [Name]: — use their name naturally.
4
4
 
5
- ### CRITICAL: Message delivery
5
+ ### Response flow — IMPORTANT
6
6
 
7
- ALL messages to the user MUST be sent using the `send` tool. Your plain text output is **private** the user never sees it, only you. Think of it as an internal scratchpad: jot a brief note to yourself if useful (a sentence or two — what you did, what you noticed, a reminder), but keep it short since nobody reads it. The only way to reach the user is the `send` tool.
7
+ Your output stream (this prose right here) is **private scratchpad**. The user never sees it. The ONLY ways for content to reach the user are:
8
8
 
9
- ### The `send` tool
9
+ - **`end_turn(text=...)`** — the canonical way to deliver your final reply. Closes the turn. Optional `reply_to` for threaded replies, optional `buttons` for inline keyboards.
10
+ - **`end_turn()`** with no args — explicit silent close. Use this when you've done what you needed to (e.g. reacted with an emoji, ran a tool that didn't need a reply) and want to make it clear that the silence is intentional.
11
+ - **`send(...)`** — for mid-turn rich content (photos, polls, voice, stickers, scheduled messages, multi-message responses, multi-target). Does NOT close the turn — typically followed by `end_turn(...)` or `end_turn()`.
12
+ - **`react(message_id, emoji)`** — emoji reaction on a message. Often the right response to acknowledge without replying. Pair with `end_turn()` to close cleanly.
10
13
 
11
- One tool for everything. Set `type` to choose what to send:
14
+ **There is no fallback.** Prose written without an `end_turn` / `send` call is scratchpad — dropped. If you write a thoughtful response in your output stream and forget to wrap it in `end_turn(text=...)`, the user sees nothing. Get into the habit of ending every turn with one of the closing options above.
12
15
 
13
- - `send(type="text", text="Hello!")` send a message
16
+ Doing nothing — no tool call at all — is also a valid silent close (the model genuinely had nothing to do), but `end_turn()` makes the intent explicit and is preferred when the silence is deliberate.
17
+
18
+ **Flow enforcement:** if you produce trailing prose without calling `end_turn` / `send`, the system will re-prompt you ONCE with a `[FLOW VIOLATION]` reminder in the same session. You'll see your broken turn in history and get a fresh turn to redo it correctly. Burns 2x the tokens for that exchange, so just call `end_turn` the first time.
19
+
20
+ ### When to use `send` vs `end_turn`
21
+
22
+ - **`end_turn`** = the final reply that ends your turn. Plain text + optional reply_to + optional buttons. The closer.
23
+ - **`send`** = anything richer or anything mid-turn: photos, polls, voice, scheduled messages, stickers, locations, dice, contacts, multi-message responses, replies to other chats.
24
+
25
+ For a plain text final reply, prefer `end_turn(text=...)` over `send(type="text", text=...)`. They reach the same delivery path, but the name makes the intent unambiguous.
26
+
27
+ ### The `send` tool (rich content)
28
+
29
+ One tool, set `type` to choose what to send:
30
+
31
+ - `send(type="text", text="Hello!")` — plain text (use end_turn instead for final reply)
14
32
  - `send(type="text", text="Hey", reply_to=12345)` — reply to a specific message
15
33
  - `send(type="text", text="Pick", buttons=[[{"text":"A","callback_data":"a"}]])` — with buttons
16
34
  - `send(type="text", text="Reminder", delay_seconds=60)` — schedule for later
@@ -54,7 +72,7 @@ The user's message ID is in the prompt as [msg_id:N]. Use with `reply_to` and `r
54
72
  You don't HAVE to respond to every message. If a message doesn't need a response:
55
73
 
56
74
  - React with an emoji using the `react` tool — this is the PREFERRED way to acknowledge without replying.
57
- - Or simply don't call `send` and skip it entirely.
75
+ - Or call `end_turn()` with no args to end the turn silently.
58
76
  - In groups, prefer reactions over replies for simple acknowledgements.
59
77
 
60
78
  ### Reactions
@@ -0,0 +1,189 @@
1
+ /**
2
+ * Unit tests for the `end_turn` tool and the cross-tool dedup helpers used to
3
+ * suppress duplicate deliveries when the model calls both `end_turn` and
4
+ * `send(type="text")` with similar content in the same turn.
5
+ *
6
+ * Covers:
7
+ * - normalizeForDedupe / isDuplicateOfDelivered (dedup math)
8
+ * - end_turn tool definition (schema, dispatch, silent path)
9
+ * - StreamState carries lastTrailingText and deliveredTextNorms
10
+ */
11
+
12
+ import { describe, it, expect, vi } from "vitest";
13
+ import {
14
+ normalizeForDedupe,
15
+ isDuplicateOfDelivered,
16
+ createStreamState,
17
+ } from "../backend/claude-sdk/stream.js";
18
+ import { messagingTools } from "../core/tools/messaging.js";
19
+ import { isTurnTerminator, ALL_TOOLS } from "../core/tools/index.js";
20
+
21
+ describe("normalizeForDedupe", () => {
22
+ it("trims, lowercases, and collapses whitespace", () => {
23
+ expect(normalizeForDedupe(" Hello World ")).toBe("hello world");
24
+ expect(normalizeForDedupe("HELLO\n\tWORLD")).toBe("hello world");
25
+ });
26
+
27
+ it("strips emoji so prose-with-emoji matches messaging-tool-text", () => {
28
+ expect(normalizeForDedupe("Got it 👍")).toBe("got it");
29
+ expect(normalizeForDedupe("Done ✅ and dusted")).toBe("done and dusted");
30
+ });
31
+
32
+ it("returns empty string for whitespace-only input", () => {
33
+ expect(normalizeForDedupe(" \n\t ")).toBe("");
34
+ });
35
+ });
36
+
37
+ describe("isDuplicateOfDelivered", () => {
38
+ it("returns false when nothing has been delivered yet", () => {
39
+ expect(isDuplicateOfDelivered("hello there", [])).toBe(false);
40
+ });
41
+
42
+ it("returns false for very short candidates (below dedup threshold)", () => {
43
+ // Below MIN_DEDUP_LENGTH (10) — short replies like "ok" / "sure" should
44
+ // never be deduped, even if they happened to coincide with a longer
45
+ // delivered text containing them.
46
+ expect(isDuplicateOfDelivered("ok", ["ok thanks pal"])).toBe(false);
47
+ });
48
+
49
+ it("matches when normalized candidate is a substring of delivered", () => {
50
+ const delivered = [normalizeForDedupe("Got it sur, pushing now")];
51
+ expect(isDuplicateOfDelivered("Got it sur, pushing now", delivered)).toBe(
52
+ true,
53
+ );
54
+ });
55
+
56
+ it("matches when normalized delivered is a substring of candidate", () => {
57
+ // Model called end_turn(text="Pushing now") then wrote prose
58
+ // "I'm pushing now and back in a sec." — fuzzy match catches this.
59
+ const delivered = [normalizeForDedupe("Pushing now")];
60
+ expect(
61
+ isDuplicateOfDelivered("I'm pushing now and back in a sec.", delivered),
62
+ ).toBe(true);
63
+ });
64
+
65
+ it("does not match unrelated content", () => {
66
+ const delivered = [normalizeForDedupe("PR #106 merged")];
67
+ expect(
68
+ isDuplicateOfDelivered("Got it, I'll look at the docker logs", delivered),
69
+ ).toBe(false);
70
+ });
71
+
72
+ it("ignores emoji differences when comparing", () => {
73
+ // Model wrote "Done 🎉" as prose, also called end_turn(text="Done")
74
+ const delivered = [normalizeForDedupe("Done")];
75
+ expect(isDuplicateOfDelivered("Done 🎉", delivered)).toBe(false);
76
+ // Above is false because "done" (3 chars) < MIN_DEDUP_LENGTH (10).
77
+ // For a longer match:
78
+ const longDelivered = [normalizeForDedupe("All set, pushing now")];
79
+ expect(
80
+ isDuplicateOfDelivered("All set, pushing now 🚀", longDelivered),
81
+ ).toBe(true);
82
+ });
83
+ });
84
+
85
+ describe("createStreamState", () => {
86
+ it("initializes lastTrailingText and deliveredTextNorms", () => {
87
+ const state = createStreamState();
88
+ expect(state.lastTrailingText).toBe("");
89
+ expect(state.deliveredTextNorms).toEqual([]);
90
+ });
91
+
92
+ it("initializes turnTerminated to false", () => {
93
+ const state = createStreamState();
94
+ expect(state.turnTerminated).toBe(false);
95
+ });
96
+ });
97
+
98
+ describe("turn-terminator declaration", () => {
99
+ it("end_turn is declared with endsTurn: true", () => {
100
+ const endTurn = messagingTools.find((t) => t.name === "end_turn");
101
+ expect(endTurn?.endsTurn).toBe(true);
102
+ });
103
+
104
+ it("send is NOT declared as a turn terminator", () => {
105
+ // `send` is for mid-turn rich content (photos, polls, scheduled messages,
106
+ // etc.) — calling it does NOT mean the model is done. Only end_turn
107
+ // declares the turn finished.
108
+ const send = messagingTools.find((t) => t.name === "send");
109
+ expect(send?.endsTurn).toBeFalsy();
110
+ });
111
+
112
+ it("isTurnTerminator returns true for end_turn", () => {
113
+ expect(isTurnTerminator("end_turn")).toBe(true);
114
+ });
115
+
116
+ it("isTurnTerminator returns false for non-terminator tools", () => {
117
+ expect(isTurnTerminator("send")).toBe(false);
118
+ expect(isTurnTerminator("react")).toBe(false);
119
+ expect(isTurnTerminator("fetch_url")).toBe(false);
120
+ expect(isTurnTerminator("nonexistent_tool")).toBe(false);
121
+ });
122
+
123
+ it("only one turn terminator currently exists (end_turn)", () => {
124
+ // If a future change adds a second terminator, this test should fail
125
+ // and the author should document why a new terminator is necessary.
126
+ const terminators = ALL_TOOLS.filter((t) => t.endsTurn).map((t) => t.name);
127
+ expect(terminators).toEqual(["end_turn"]);
128
+ });
129
+ });
130
+
131
+ describe("end_turn tool definition", () => {
132
+ const endTurn = messagingTools.find((t) => t.name === "end_turn");
133
+
134
+ it("is registered in messagingTools", () => {
135
+ expect(endTurn).toBeDefined();
136
+ expect(endTurn?.tag).toBe("messaging");
137
+ expect(endTurn?.frontends).toEqual(["telegram", "teams"]);
138
+ });
139
+
140
+ it("has text, reply_to, and buttons schema fields", () => {
141
+ expect(endTurn?.schema).toBeDefined();
142
+ expect(endTurn?.schema.text).toBeDefined();
143
+ expect(endTurn?.schema.reply_to).toBeDefined();
144
+ expect(endTurn?.schema.buttons).toBeDefined();
145
+ });
146
+
147
+ it("dispatches plain text via send_message bridge", async () => {
148
+ const bridge = vi.fn(async () => ({ ok: true }));
149
+ await endTurn!.execute({ text: "Hello sur" }, bridge);
150
+ expect(bridge).toHaveBeenCalledWith("send_message", {
151
+ text: "Hello sur",
152
+ reply_to_message_id: undefined,
153
+ });
154
+ });
155
+
156
+ it("dispatches text + reply_to via send_message bridge", async () => {
157
+ const bridge = vi.fn(async () => ({ ok: true }));
158
+ await endTurn!.execute({ text: "Yep", reply_to: 12345 }, bridge);
159
+ expect(bridge).toHaveBeenCalledWith("send_message", {
160
+ text: "Yep",
161
+ reply_to_message_id: 12345,
162
+ });
163
+ });
164
+
165
+ it("dispatches text + buttons via send_message_with_buttons bridge", async () => {
166
+ const bridge = vi.fn(async () => ({ ok: true }));
167
+ const buttons = [[{ text: "Click", callback_data: "x" }]];
168
+ await endTurn!.execute({ text: "Pick", buttons }, bridge);
169
+ expect(bridge).toHaveBeenCalledWith("send_message_with_buttons", {
170
+ text: "Pick",
171
+ rows: buttons,
172
+ reply_to_message_id: undefined,
173
+ });
174
+ });
175
+
176
+ it("ends silently with no bridge call when text is omitted", async () => {
177
+ const bridge = vi.fn(async () => ({ ok: true }));
178
+ const result = await endTurn!.execute({}, bridge);
179
+ expect(bridge).not.toHaveBeenCalled();
180
+ expect(result).toEqual({ ok: true, silent: true });
181
+ });
182
+
183
+ it("ends silently with no bridge call when text is whitespace-only", async () => {
184
+ const bridge = vi.fn(async () => ({ ok: true }));
185
+ const result = await endTurn!.execute({ text: " \n\t " }, bridge);
186
+ expect(bridge).not.toHaveBeenCalled();
187
+ expect(result).toEqual({ ok: true, silent: true });
188
+ });
189
+ });
@@ -2676,13 +2676,19 @@ describe("processAndReply — group message without senderId", () => {
2676
2676
  }, 3000);
2677
2677
  });
2678
2678
 
2679
- describe("processAndReply — suppressed fallback text logged (L572 TRUE branch)", () => {
2680
- it("logs when bridgeMessageCount=0 and result.text is non-empty", async () => {
2679
+ describe("processAndReply — fallback text no longer suppressed", () => {
2680
+ // Pre-end_turn behavior: when bridgeMessageCount=0 and result.text was
2681
+ // non-empty, the frontend logged "Suppressed fallback text" and dropped
2682
+ // the content (the scratchpad bug). The handler now fires onTextBlock
2683
+ // with trailing text instead, so:
2684
+ // - the "Suppressed fallback" log is gone entirely
2685
+ // - the delivery path is exercised by createStreamCallbacks tests above
2686
+ it("does NOT log a 'Suppressed fallback' warning anymore", async () => {
2681
2687
  const { log } = await import("../util/log.js");
2682
2688
  (log as ReturnType<typeof vi.fn>).mockClear();
2683
2689
 
2684
2690
  executeMock.mockResolvedValueOnce({
2685
- text: "internal reasoning only",
2691
+ text: "trailing prose that used to be suppressed",
2686
2692
  durationMs: 10,
2687
2693
  inputTokens: 1,
2688
2694
  outputTokens: 1,
@@ -2694,7 +2700,7 @@ describe("processAndReply — suppressed fallback text logged (L572 TRUE branch)
2694
2700
  const ctx = {
2695
2701
  chat: { id: 99600, type: "private" },
2696
2702
  message: {
2697
- text: "test suppressed fallback",
2703
+ text: "test fallback delivery",
2698
2704
  message_id: 1600,
2699
2705
  reply_to_message: null,
2700
2706
  },
@@ -2709,7 +2715,7 @@ describe("processAndReply — suppressed fallback text logged (L572 TRUE branch)
2709
2715
  const suppressedLog = logCalls.find((c: unknown[]) =>
2710
2716
  String(c[1]).includes("Suppressed fallback"),
2711
2717
  );
2712
- expect(suppressedLog).toBeDefined();
2718
+ expect(suppressedLog).toBeUndefined();
2713
2719
  }, 3000);
2714
2720
  });
2715
2721
 
@@ -24,6 +24,7 @@ import { log, logError, logWarn } from "../../util/log.js";
24
24
  import { traceMessage } from "../../util/trace.js";
25
25
  import { incrementCounter, recordHistogram } from "../../util/metrics.js";
26
26
  import { formatFullDatetime } from "../../util/time.js";
27
+ import { isTurnTerminator } from "../../core/tools/index.js";
27
28
 
28
29
  import type { Query } from "@anthropic-ai/claude-agent-sdk";
29
30
  import type { QueryParams, QueryResult } from "../../core/types.js";
@@ -38,6 +39,8 @@ import {
38
39
  processStreamDelta,
39
40
  processAssistantMessage,
40
41
  processResultMessage,
42
+ normalizeForDedupe,
43
+ isDuplicateOfDelivered,
41
44
  } from "./stream.js";
42
45
 
43
46
  // ── Active query store ──────────────────────────────────────────────────────
@@ -92,6 +95,30 @@ export async function handleMessage(
92
95
  activeQueries.set(chatId, qi);
93
96
  const state = createStreamState();
94
97
 
98
+ // Capture text args from delivery tools (`end_turn`, `send(type="text")`)
99
+ // so the end-of-turn trailing-text fallback can dedupe against content
100
+ // already delivered. Without this, a model that writes prose AND calls a
101
+ // delivery tool with similar text would surface twice in the chat.
102
+ const captureDeliveredText = (
103
+ toolName: string,
104
+ input: Record<string, unknown>,
105
+ ): void => {
106
+ let deliveredText: string | undefined;
107
+ if (toolName === "end_turn" && typeof input.text === "string") {
108
+ deliveredText = input.text;
109
+ } else if (
110
+ toolName === "send" &&
111
+ input.type === "text" &&
112
+ typeof input.text === "string"
113
+ ) {
114
+ deliveredText = input.text;
115
+ }
116
+ if (deliveredText) {
117
+ const norm = normalizeForDedupe(deliveredText);
118
+ if (norm) state.deliveredTextNorms.push(norm);
119
+ }
120
+ };
121
+
95
122
  try {
96
123
  for await (const message of qi) {
97
124
  // Session ID capture
@@ -110,9 +137,18 @@ export async function handleMessage(
110
137
  if (isAssistant(message)) {
111
138
  const result = processAssistantMessage(message, state);
112
139
 
113
- // Notify tool usage
140
+ // Track the trailing text from this assistant message. Multiple
141
+ // assistant messages can fire per turn (one per tool-use round-trip);
142
+ // only the LAST one's trailingText is the user-facing final reply.
143
+ state.lastTrailingText = result.trailingText;
144
+
145
+ // Notify tool usage + capture delivery-tool text for end-of-turn dedup
114
146
  for (const tool of result.tools) {
115
147
  incrementCounter(`tool_calls.${tool.name}`);
148
+ captureDeliveredText(tool.name, tool.input);
149
+ if (isTurnTerminator(tool.name)) {
150
+ state.turnTerminated = true;
151
+ }
116
152
  if (onToolUse) {
117
153
  try {
118
154
  onToolUse(tool.name, tool.input);
@@ -132,6 +168,26 @@ export async function handleMessage(
132
168
  }
133
169
  }
134
170
  }
171
+
172
+ // Turn-terminator tool was called (e.g. `end_turn`). Abort the SDK
173
+ // loop cleanly so the model can't keep producing trailing scratchpad
174
+ // after declaring "I'm done". Without this, the model is free to
175
+ // think more, call more tools, or write more prose — and any prose
176
+ // afterwards trips the flow-violation re-prompt path. Calling
177
+ // qi.interrupt() lets the SDK yield its terminal result and exit
178
+ // the for-await loop on the next iteration.
179
+ if (state.turnTerminated) {
180
+ try {
181
+ await qi.interrupt();
182
+ } catch (err) {
183
+ // Non-fatal: interrupt failures shouldn't break the turn,
184
+ // they just mean the natural end-of-stream path will run.
185
+ logWarn(
186
+ "agent",
187
+ `[${chatId}] qi.interrupt() after turn terminator failed: ${(err as Error)?.message ?? err}`,
188
+ );
189
+ }
190
+ }
135
191
  continue;
136
192
  }
137
193
 
@@ -224,6 +280,55 @@ export async function handleMessage(
224
280
  }
225
281
  }
226
282
 
283
+ // ── Trailing-prose contract + flow-violation retry ──────────────────────
284
+ // The output stream is private scratchpad by design. Final replies must go
285
+ // through `end_turn` (canonical) or `send` (mid-turn rich content). When a
286
+ // turn ends with no tool call AND no trailing prose, that's valid silent
287
+ // close (model only reacted, or had nothing to do). When the model wrote
288
+ // prose but didn't route it through a delivery tool, that's a flow
289
+ // violation — the prose is private scratchpad, dropped from the user's
290
+ // view. To prevent these from going unnoticed, we re-prompt the model
291
+ // ONCE with a synthetic system message in the same session: it sees its
292
+ // broken turn in history + a reminder of the contract, and gets a fresh
293
+ // turn to deliver via end_turn. If it violates again on the retry, we
294
+ // give up loudly and accept the silent drop.
295
+ //
296
+ // Exception: if a turn-terminator tool (e.g. end_turn) was called, the
297
+ // model explicitly declared "I'm done" — respect it. Any trailing prose
298
+ // that slipped in earlier in the same assistant message gets logged but
299
+ // does NOT re-prompt (would loop endlessly with a model that pairs prose
300
+ // with end_turn).
301
+ const trailing = state.lastTrailingText.trim();
302
+ const flowViolation =
303
+ trailing.length > 0 &&
304
+ !state.turnTerminated &&
305
+ !isDuplicateOfDelivered(trailing, state.deliveredTextNorms);
306
+
307
+ if (flowViolation) {
308
+ incrementCounter("scratchpad.trailing_text_dropped");
309
+ log(
310
+ "agent",
311
+ `[${chatId}] flow violation: trailing prose (${trailing.length} chars) without end_turn/send. ${
312
+ _retried
313
+ ? "Already retried — accepting silent drop."
314
+ : "Re-prompting with reminder."
315
+ }`,
316
+ );
317
+
318
+ if (!_retried) {
319
+ incrementCounter("scratchpad.flow_violation_retried");
320
+ const reminder =
321
+ "[FLOW VIOLATION] You produced text content but didn't call `end_turn` or `send`. " +
322
+ "Pure prose in your output stream is private scratchpad — it's dropped, the user " +
323
+ "never sees it. Please retry with the proper flow: " +
324
+ "`end_turn(text=...)` to deliver a final reply, " +
325
+ "`end_turn()` (no args) to close silently, or " +
326
+ "`send(...)` for mid-turn rich content (photos, polls, etc.). " +
327
+ "Respond now using the correct tool call.";
328
+ return handleMessage({ ...params, text: reminder }, true);
329
+ }
330
+ }
331
+
227
332
  // ── Build result ──────────────────────────────────────────────────────────
228
333
 
229
334
  state.allResponseText += state.currentBlockText;
@@ -34,6 +34,33 @@ export type StreamState = {
34
34
  sdkCacheRead: number;
35
35
  sdkCacheWrite: number;
36
36
  lastStreamUpdate: number;
37
+ /**
38
+ * Trailing text from the most recent assistant message — text after all
39
+ * tool_use blocks (or the full text when no tools were called). NOT
40
+ * delivered to the user (the output stream is private scratchpad by
41
+ * contract). Tracked so the handler can log a diagnostic when the model
42
+ * wrote prose without routing it through `end_turn` / `send` — surfaces
43
+ * missed end_turn calls in metrics rather than silently dropping content.
44
+ */
45
+ lastTrailingText: string;
46
+ /**
47
+ * Normalized text args observed on `end_turn` / `send(type="text")` tool
48
+ * calls during this turn. Cross-tool dedup: if both fire with similar
49
+ * content (e.g. model calls both with the same text mid-turn), the
50
+ * second one can be matched against this list to avoid the user seeing
51
+ * the same message twice. Also used to silence the trailing-prose
52
+ * diagnostic when the prose just duplicates what was already delivered.
53
+ */
54
+ deliveredTextNorms: string[];
55
+ /**
56
+ * Set when a tool with `endsTurn: true` (e.g. `end_turn`) was observed
57
+ * in this turn. Once true, the handler invokes `qi.interrupt()` to abort
58
+ * the SDK loop cleanly — the model can't produce more trailing scratchpad
59
+ * after this point. Also gates the flow-violation re-prompt: if the model
60
+ * explicitly ended its turn, we don't re-prompt for trailing prose that
61
+ * may have appeared in the same assistant message before the terminator.
62
+ */
63
+ turnTerminated: boolean;
37
64
  };
38
65
 
39
66
  export function createStreamState(): StreamState {
@@ -50,6 +77,9 @@ export function createStreamState(): StreamState {
50
77
  sdkCacheRead: 0,
51
78
  sdkCacheWrite: 0,
52
79
  lastStreamUpdate: 0,
80
+ lastTrailingText: "",
81
+ deliveredTextNorms: [],
82
+ turnTerminated: false,
53
83
  };
54
84
  }
55
85
 
@@ -224,3 +254,40 @@ export function processResultMessage(
224
254
  state.currentBlockText = msg.result;
225
255
  }
226
256
  }
257
+
258
+ // ── Trailing-text fallback dedup ────────────────────────────────────────────
259
+
260
+ /**
261
+ * Normalize text for fuzzy comparison — trim, lowercase, collapse whitespace,
262
+ * strip emoji. Used to detect whether trailing prose duplicates content
263
+ * already delivered via `end_turn` / `send(type="text")`.
264
+ */
265
+ export function normalizeForDedupe(text: string): string {
266
+ return text
267
+ .trim()
268
+ .toLowerCase()
269
+ .replace(/\p{Emoji_Presentation}|\p{Extended_Pictographic}/gu, "")
270
+ .replace(/\s+/g, " ")
271
+ .trim();
272
+ }
273
+
274
+ const MIN_DEDUP_LENGTH = 10;
275
+
276
+ /**
277
+ * Returns true if `candidate` is substantively the same as any text in
278
+ * `deliveredNorms`. "Substantively" = one is a substring of the other after
279
+ * normalization; both must be at least MIN_DEDUP_LENGTH chars to avoid
280
+ * dropping short legitimate replies.
281
+ */
282
+ export function isDuplicateOfDelivered(
283
+ candidate: string,
284
+ deliveredNorms: string[],
285
+ ): boolean {
286
+ if (deliveredNorms.length === 0) return false;
287
+ const norm = normalizeForDedupe(candidate);
288
+ if (norm.length < MIN_DEDUP_LENGTH) return false;
289
+ return deliveredNorms.some(
290
+ (d) =>
291
+ d.length >= MIN_DEDUP_LENGTH && (norm.includes(d) || d.includes(norm)),
292
+ );
293
+ }
@@ -30,6 +30,24 @@ export const ALL_TOOLS: readonly ToolDefinition[] = [
30
30
  ...adminTools,
31
31
  ];
32
32
 
33
+ /**
34
+ * Names of tools that explicitly terminate the model's turn.
35
+ *
36
+ * Backend handlers consume this set to abort their stream loop after
37
+ * observing one of these tools — without it, the model can keep producing
38
+ * trailing scratchpad prose after declaring "I'm done", which trips the
39
+ * flow-violation re-prompt path. Declaration is on the tool definition
40
+ * (`endsTurn: true`); detection is shared; abort is backend-specific.
41
+ */
42
+ const TURN_TERMINATOR_NAMES: ReadonlySet<string> = new Set(
43
+ ALL_TOOLS.filter((t) => t.endsTurn).map((t) => t.name),
44
+ );
45
+
46
+ /** Whether a tool call by this name should terminate the model's turn. */
47
+ export function isTurnTerminator(toolName: string): boolean {
48
+ return TURN_TERMINATOR_NAMES.has(toolName);
49
+ }
50
+
33
51
  /** Filter options for composing a tool set. */
34
52
  export interface ComposeOptions {
35
53
  /** Include only tools available on this frontend. */
@@ -1,5 +1,6 @@
1
1
  /**
2
- * Messaging tools — send, react, edit, delete, forward, pin/unpin, stop poll.
2
+ * Messaging tools — send, end_turn, react, edit, delete, forward, pin/unpin,
3
+ * stop poll.
3
4
  */
4
5
 
5
6
  import { z } from "zod";
@@ -7,6 +8,83 @@ import type { ToolDefinition } from "./types.js";
7
8
  import { idSchema } from "./schemas.js";
8
9
 
9
10
  export const messagingTools: ToolDefinition[] = [
11
+ // ── end_turn — explicit final-reply delivery ──────────────────────────
12
+ // Schema-typed alternative to relying on a trailing-text fallback. The
13
+ // model is taught that this is the canonical way to deliver its final
14
+ // reply. Functionally a thin wrapper over send(type="text") + reply_to +
15
+ // buttons; the value is in the EXPLICIT semantic ("this ends my turn")
16
+ // and that the model sees a single tool whose purpose is unambiguous.
17
+ //
18
+ // The output stream is private scratchpad by contract. If the model
19
+ // writes trailing prose without calling end_turn or send, the handler
20
+ // re-prompts ONCE in the same session with a flow-violation reminder
21
+ // so the model can retry properly. Persistent violation after retry
22
+ // results in silent drop + `scratchpad.trailing_text_dropped` counter.
23
+ // `end_turn` is the documented happy path.
24
+ {
25
+ name: "end_turn",
26
+ description: `End your current turn and deliver your final reply to the user. This is the canonical way to respond.
27
+
28
+ Call this AT MOST ONCE per turn — it should be the last tool you call. Behaves like send(type="text") with reply_to and buttons support, but the name makes the intent explicit: this is the message that ends the turn.
29
+
30
+ Examples:
31
+ end_turn(text="Got it sur") — plain reply
32
+ end_turn(text="On it", reply_to=12345) — reply to a specific message ID
33
+ end_turn(text="Pick one", buttons=[[{"text":"A","callback_data":"a"}]]) — with buttons
34
+ end_turn() — silent end (no message; useful when you already replied via earlier send/react calls)
35
+
36
+ Notes:
37
+ - For richer message types (photos, polls, voice, scheduled messages, multi-target), use the send tool — those don't fit "final reply" semantics.
38
+ - The output stream is private scratchpad. If you write prose without calling end_turn or send, the handler re-prompts you ONCE with a flow-violation reminder so you can retry properly. Persistent violation drops the prose silently. end_turn is the documented happy path.`,
39
+ schema: {
40
+ text: z
41
+ .string()
42
+ .optional()
43
+ .describe(
44
+ "Final reply text. Supports Markdown. Omit to end the turn silently (no message sent).",
45
+ ),
46
+ reply_to: idSchema
47
+ .optional()
48
+ .describe("Message ID to reply to (typically the user's [msg_id:N])"),
49
+ buttons: z
50
+ .array(
51
+ z.array(
52
+ z.object({
53
+ text: z.string(),
54
+ url: z.string().optional(),
55
+ callback_data: z.string().optional(),
56
+ }),
57
+ ),
58
+ )
59
+ .optional()
60
+ .describe("Inline keyboard button rows"),
61
+ },
62
+ execute: async (params, bridge) => {
63
+ // Telegram path: routes to the same bridge actions as send(type="text")
64
+ // so bridgeMessageCount, dedup, and audit logging all stay consistent.
65
+ if (typeof params.text !== "string" || params.text.trim() === "") {
66
+ // Silent end — no bridge call. The handler still sees the tool was
67
+ // invoked (via deliveredTextNorms staying empty), and trailing-text
68
+ // fallback won't fire because there was no trailing prose.
69
+ return { ok: true, silent: true };
70
+ }
71
+ if (params.buttons) {
72
+ return bridge("send_message_with_buttons", {
73
+ text: params.text,
74
+ rows: params.buttons,
75
+ reply_to_message_id: params.reply_to,
76
+ });
77
+ }
78
+ return bridge("send_message", {
79
+ text: params.text,
80
+ reply_to_message_id: params.reply_to,
81
+ });
82
+ },
83
+ frontends: ["telegram", "teams"],
84
+ tag: "messaging",
85
+ endsTurn: true,
86
+ },
87
+
10
88
  // ── Telegram unified send ─────────────────────────────────────────────
11
89
  {
12
90
  name: "send",
@@ -58,4 +58,18 @@ export interface ToolDefinition {
58
58
 
59
59
  /** Grouping tag. */
60
60
  readonly tag: ToolTag;
61
+
62
+ /**
63
+ * This tool explicitly ends the model's turn. Backend handlers observe
64
+ * this flag to abort their stream loop cleanly after the tool's bridge
65
+ * call completes — without it, the model is free to keep producing
66
+ * trailing prose into private scratchpad after declaring "I'm done",
67
+ * which then trips the flow-violation re-prompt path. With this flag,
68
+ * an end_turn call genuinely ends the turn.
69
+ *
70
+ * Backend abort mechanism is backend-specific (Claude SDK uses
71
+ * Query.interrupt(); other backends manage their own loop) — this flag
72
+ * is the shared declarative signal, not the implementation.
73
+ */
74
+ readonly endsTurn?: boolean;
61
75
  }
@@ -14,7 +14,7 @@ import type { Gateway } from "../../core/gateway.js";
14
14
  import { log, logError } from "../../util/log.js";
15
15
  import { deriveNumericChatId } from "../../util/chat-id.js";
16
16
  import { resolveModel } from "../../core/models.js";
17
- import { createTeamsActionHandler } from "./actions.js";
17
+ import { createTeamsActionHandler, postToTeams } from "./actions.js";
18
18
  import { splitTeamsMessage, buildAdaptiveCard } from "./formatting.js";
19
19
  import {
20
20
  initGraphClient,
@@ -356,18 +356,28 @@ export function createTeamsFrontend(
356
356
  ` tool: ${toolName}${detail ? ` — ${String(detail).slice(0, 100)}` : ""}`,
357
357
  );
358
358
  },
359
- })
360
- .then(async (result) => {
361
- // Only deliver messages sent via the send_message tool.
362
- // Do NOT send fallback text if the model chose not to use send_message,
363
- // it's either choosing not to respond or outputting internal reasoning
364
- // that shouldn't be shown to users.
365
- if (result.bridgeMessageCount === 0 && result.text?.trim()) {
366
- log(
359
+ // Deliver assistant text (progress text before tool calls AND
360
+ // the end-of-turn trailing-text fallback) to the Teams chat.
361
+ // Without this, prose-only assistant turns would be silently
362
+ // droppedsame scratchpad bug Telegram hit.
363
+ onTextBlock: async (blockText) => {
364
+ if (!blockText.trim()) return;
365
+ try {
366
+ await postToTeams(webhookUrl, blockText);
367
+ gateway.incrementMessages(numericChatId);
368
+ } catch (err) {
369
+ logError(
367
370
  "teams",
368
- `Suppressed fallback text (${result.text.length} chars) no send_message tool used`,
371
+ `onTextBlock postToTeams failed: ${err instanceof Error ? err.message : err}`,
369
372
  );
370
373
  }
374
+ },
375
+ })
376
+ .then(async (_result) => {
377
+ // No fallback delivery — turns without end_turn / send_message
378
+ // are intentional silent ends. Trailing prose without a tool
379
+ // call is scratchpad and dropped; the SDK handler emits a
380
+ // `scratchpad.trailing_text_dropped` metric on those.
371
381
  })
372
382
  .catch((err) => {
373
383
  logError(
@@ -781,16 +781,10 @@ async function processAndReply(params: ProcessAndReplyParams): Promise<void> {
781
781
  },
782
782
  });
783
783
 
784
- if (
785
- result.bridgeMessageCount === 0 &&
786
- !stream.sentTextBlock &&
787
- result.text?.trim()
788
- ) {
789
- log(
790
- "bot",
791
- `Suppressed fallback text (${result.text.length} chars) — no send tool used`,
792
- );
793
- }
784
+ // No fallback delivery — turns that don't call `end_turn` / `send` are
785
+ // intentional silent ends. Trailing prose written without a tool call is
786
+ // scratchpad and dropped (the SDK handler logs a `scratchpad.trailing_
787
+ // text_dropped` metric so missed end_turn calls show up in counters).
794
788
  } finally {
795
789
  clearTimeout(streamTimer);
796
790
  }