talon-agent 1.9.2 → 1.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/prompts/telegram.md +24 -6
- package/src/__tests__/end-turn.test.ts +189 -0
- package/src/__tests__/handlers.test.ts +11 -5
- package/src/backend/claude-sdk/handler.ts +106 -1
- package/src/backend/claude-sdk/stream.ts +67 -0
- package/src/core/tools/index.ts +18 -0
- package/src/core/tools/messaging.ts +79 -1
- package/src/core/tools/types.ts +14 -0
- package/src/frontend/teams/index.ts +20 -10
- package/src/frontend/telegram/handlers.ts +4 -10
package/package.json
CHANGED
package/prompts/telegram.md
CHANGED
|
@@ -2,15 +2,33 @@
|
|
|
2
2
|
|
|
3
3
|
In groups, you'll see messages prefixed with [Name]: — use their name naturally.
|
|
4
4
|
|
|
5
|
-
###
|
|
5
|
+
### Response flow — IMPORTANT
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
Your output stream (this prose right here) is **private scratchpad**. The user never sees it. The ONLY ways for content to reach the user are:
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
- **`end_turn(text=...)`** — the canonical way to deliver your final reply. Closes the turn. Optional `reply_to` for threaded replies, optional `buttons` for inline keyboards.
|
|
10
|
+
- **`end_turn()`** with no args — explicit silent close. Use this when you've done what you needed to (e.g. reacted with an emoji, ran a tool that didn't need a reply) and want to make it clear that the silence is intentional.
|
|
11
|
+
- **`send(...)`** — for mid-turn rich content (photos, polls, voice, stickers, scheduled messages, multi-message responses, multi-target). Does NOT close the turn — typically followed by `end_turn(...)` or `end_turn()`.
|
|
12
|
+
- **`react(message_id, emoji)`** — emoji reaction on a message. Often the right response to acknowledge without replying. Pair with `end_turn()` to close cleanly.
|
|
10
13
|
|
|
11
|
-
|
|
14
|
+
**There is no fallback.** Prose written without an `end_turn` / `send` call is scratchpad — dropped. If you write a thoughtful response in your output stream and forget to wrap it in `end_turn(text=...)`, the user sees nothing. Get into the habit of ending every turn with one of the closing options above.
|
|
12
15
|
|
|
13
|
-
|
|
16
|
+
Doing nothing — no tool call at all — is also a valid silent close (the model genuinely had nothing to do), but `end_turn()` makes the intent explicit and is preferred when the silence is deliberate.
|
|
17
|
+
|
|
18
|
+
**Flow enforcement:** if you produce trailing prose without calling `end_turn` / `send`, the system will re-prompt you ONCE with a `[FLOW VIOLATION]` reminder in the same session. You'll see your broken turn in history and get a fresh turn to redo it correctly. Burns 2x the tokens for that exchange, so just call `end_turn` the first time.
|
|
19
|
+
|
|
20
|
+
### When to use `send` vs `end_turn`
|
|
21
|
+
|
|
22
|
+
- **`end_turn`** = the final reply that ends your turn. Plain text + optional reply_to + optional buttons. The closer.
|
|
23
|
+
- **`send`** = anything richer or anything mid-turn: photos, polls, voice, scheduled messages, stickers, locations, dice, contacts, multi-message responses, replies to other chats.
|
|
24
|
+
|
|
25
|
+
For a plain text final reply, prefer `end_turn(text=...)` over `send(type="text", text=...)`. They reach the same delivery path, but the name makes the intent unambiguous.
|
|
26
|
+
|
|
27
|
+
### The `send` tool (rich content)
|
|
28
|
+
|
|
29
|
+
One tool, set `type` to choose what to send:
|
|
30
|
+
|
|
31
|
+
- `send(type="text", text="Hello!")` — plain text (use end_turn instead for final reply)
|
|
14
32
|
- `send(type="text", text="Hey", reply_to=12345)` — reply to a specific message
|
|
15
33
|
- `send(type="text", text="Pick", buttons=[[{"text":"A","callback_data":"a"}]])` — with buttons
|
|
16
34
|
- `send(type="text", text="Reminder", delay_seconds=60)` — schedule for later
|
|
@@ -54,7 +72,7 @@ The user's message ID is in the prompt as [msg_id:N]. Use with `reply_to` and `r
|
|
|
54
72
|
You don't HAVE to respond to every message. If a message doesn't need a response:
|
|
55
73
|
|
|
56
74
|
- React with an emoji using the `react` tool — this is the PREFERRED way to acknowledge without replying.
|
|
57
|
-
- Or
|
|
75
|
+
- Or call `end_turn()` with no args to end the turn silently.
|
|
58
76
|
- In groups, prefer reactions over replies for simple acknowledgements.
|
|
59
77
|
|
|
60
78
|
### Reactions
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the `end_turn` tool and the cross-tool dedup helpers used to
|
|
3
|
+
* suppress duplicate deliveries when the model calls both `end_turn` and
|
|
4
|
+
* `send(type="text")` with similar content in the same turn.
|
|
5
|
+
*
|
|
6
|
+
* Covers:
|
|
7
|
+
* - normalizeForDedupe / isDuplicateOfDelivered (dedup math)
|
|
8
|
+
* - end_turn tool definition (schema, dispatch, silent path)
|
|
9
|
+
* - StreamState carries lastTrailingText and deliveredTextNorms
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { describe, it, expect, vi } from "vitest";
|
|
13
|
+
import {
|
|
14
|
+
normalizeForDedupe,
|
|
15
|
+
isDuplicateOfDelivered,
|
|
16
|
+
createStreamState,
|
|
17
|
+
} from "../backend/claude-sdk/stream.js";
|
|
18
|
+
import { messagingTools } from "../core/tools/messaging.js";
|
|
19
|
+
import { isTurnTerminator, ALL_TOOLS } from "../core/tools/index.js";
|
|
20
|
+
|
|
21
|
+
describe("normalizeForDedupe", () => {
|
|
22
|
+
it("trims, lowercases, and collapses whitespace", () => {
|
|
23
|
+
expect(normalizeForDedupe(" Hello World ")).toBe("hello world");
|
|
24
|
+
expect(normalizeForDedupe("HELLO\n\tWORLD")).toBe("hello world");
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
it("strips emoji so prose-with-emoji matches messaging-tool-text", () => {
|
|
28
|
+
expect(normalizeForDedupe("Got it 👍")).toBe("got it");
|
|
29
|
+
expect(normalizeForDedupe("Done ✅ and dusted")).toBe("done and dusted");
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("returns empty string for whitespace-only input", () => {
|
|
33
|
+
expect(normalizeForDedupe(" \n\t ")).toBe("");
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
describe("isDuplicateOfDelivered", () => {
|
|
38
|
+
it("returns false when nothing has been delivered yet", () => {
|
|
39
|
+
expect(isDuplicateOfDelivered("hello there", [])).toBe(false);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("returns false for very short candidates (below dedup threshold)", () => {
|
|
43
|
+
// Below MIN_DEDUP_LENGTH (10) — short replies like "ok" / "sure" should
|
|
44
|
+
// never be deduped, even if they happened to coincide with a longer
|
|
45
|
+
// delivered text containing them.
|
|
46
|
+
expect(isDuplicateOfDelivered("ok", ["ok thanks pal"])).toBe(false);
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("matches when normalized candidate is a substring of delivered", () => {
|
|
50
|
+
const delivered = [normalizeForDedupe("Got it sur, pushing now")];
|
|
51
|
+
expect(isDuplicateOfDelivered("Got it sur, pushing now", delivered)).toBe(
|
|
52
|
+
true,
|
|
53
|
+
);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it("matches when normalized delivered is a substring of candidate", () => {
|
|
57
|
+
// Model called end_turn(text="Pushing now") then wrote prose
|
|
58
|
+
// "I'm pushing now and back in a sec." — fuzzy match catches this.
|
|
59
|
+
const delivered = [normalizeForDedupe("Pushing now")];
|
|
60
|
+
expect(
|
|
61
|
+
isDuplicateOfDelivered("I'm pushing now and back in a sec.", delivered),
|
|
62
|
+
).toBe(true);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("does not match unrelated content", () => {
|
|
66
|
+
const delivered = [normalizeForDedupe("PR #106 merged")];
|
|
67
|
+
expect(
|
|
68
|
+
isDuplicateOfDelivered("Got it, I'll look at the docker logs", delivered),
|
|
69
|
+
).toBe(false);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("ignores emoji differences when comparing", () => {
|
|
73
|
+
// Model wrote "Done 🎉" as prose, also called end_turn(text="Done")
|
|
74
|
+
const delivered = [normalizeForDedupe("Done")];
|
|
75
|
+
expect(isDuplicateOfDelivered("Done 🎉", delivered)).toBe(false);
|
|
76
|
+
// Above is false because "done" (3 chars) < MIN_DEDUP_LENGTH (10).
|
|
77
|
+
// For a longer match:
|
|
78
|
+
const longDelivered = [normalizeForDedupe("All set, pushing now")];
|
|
79
|
+
expect(
|
|
80
|
+
isDuplicateOfDelivered("All set, pushing now 🚀", longDelivered),
|
|
81
|
+
).toBe(true);
|
|
82
|
+
});
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
describe("createStreamState", () => {
|
|
86
|
+
it("initializes lastTrailingText and deliveredTextNorms", () => {
|
|
87
|
+
const state = createStreamState();
|
|
88
|
+
expect(state.lastTrailingText).toBe("");
|
|
89
|
+
expect(state.deliveredTextNorms).toEqual([]);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it("initializes turnTerminated to false", () => {
|
|
93
|
+
const state = createStreamState();
|
|
94
|
+
expect(state.turnTerminated).toBe(false);
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
describe("turn-terminator declaration", () => {
|
|
99
|
+
it("end_turn is declared with endsTurn: true", () => {
|
|
100
|
+
const endTurn = messagingTools.find((t) => t.name === "end_turn");
|
|
101
|
+
expect(endTurn?.endsTurn).toBe(true);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("send is NOT declared as a turn terminator", () => {
|
|
105
|
+
// `send` is for mid-turn rich content (photos, polls, scheduled messages,
|
|
106
|
+
// etc.) — calling it does NOT mean the model is done. Only end_turn
|
|
107
|
+
// declares the turn finished.
|
|
108
|
+
const send = messagingTools.find((t) => t.name === "send");
|
|
109
|
+
expect(send?.endsTurn).toBeFalsy();
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it("isTurnTerminator returns true for end_turn", () => {
|
|
113
|
+
expect(isTurnTerminator("end_turn")).toBe(true);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("isTurnTerminator returns false for non-terminator tools", () => {
|
|
117
|
+
expect(isTurnTerminator("send")).toBe(false);
|
|
118
|
+
expect(isTurnTerminator("react")).toBe(false);
|
|
119
|
+
expect(isTurnTerminator("fetch_url")).toBe(false);
|
|
120
|
+
expect(isTurnTerminator("nonexistent_tool")).toBe(false);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it("only one turn terminator currently exists (end_turn)", () => {
|
|
124
|
+
// If a future change adds a second terminator, this test should fail
|
|
125
|
+
// and the author should document why a new terminator is necessary.
|
|
126
|
+
const terminators = ALL_TOOLS.filter((t) => t.endsTurn).map((t) => t.name);
|
|
127
|
+
expect(terminators).toEqual(["end_turn"]);
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
describe("end_turn tool definition", () => {
|
|
132
|
+
const endTurn = messagingTools.find((t) => t.name === "end_turn");
|
|
133
|
+
|
|
134
|
+
it("is registered in messagingTools", () => {
|
|
135
|
+
expect(endTurn).toBeDefined();
|
|
136
|
+
expect(endTurn?.tag).toBe("messaging");
|
|
137
|
+
expect(endTurn?.frontends).toEqual(["telegram", "teams"]);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it("has text, reply_to, and buttons schema fields", () => {
|
|
141
|
+
expect(endTurn?.schema).toBeDefined();
|
|
142
|
+
expect(endTurn?.schema.text).toBeDefined();
|
|
143
|
+
expect(endTurn?.schema.reply_to).toBeDefined();
|
|
144
|
+
expect(endTurn?.schema.buttons).toBeDefined();
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it("dispatches plain text via send_message bridge", async () => {
|
|
148
|
+
const bridge = vi.fn(async () => ({ ok: true }));
|
|
149
|
+
await endTurn!.execute({ text: "Hello sur" }, bridge);
|
|
150
|
+
expect(bridge).toHaveBeenCalledWith("send_message", {
|
|
151
|
+
text: "Hello sur",
|
|
152
|
+
reply_to_message_id: undefined,
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
it("dispatches text + reply_to via send_message bridge", async () => {
|
|
157
|
+
const bridge = vi.fn(async () => ({ ok: true }));
|
|
158
|
+
await endTurn!.execute({ text: "Yep", reply_to: 12345 }, bridge);
|
|
159
|
+
expect(bridge).toHaveBeenCalledWith("send_message", {
|
|
160
|
+
text: "Yep",
|
|
161
|
+
reply_to_message_id: 12345,
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it("dispatches text + buttons via send_message_with_buttons bridge", async () => {
|
|
166
|
+
const bridge = vi.fn(async () => ({ ok: true }));
|
|
167
|
+
const buttons = [[{ text: "Click", callback_data: "x" }]];
|
|
168
|
+
await endTurn!.execute({ text: "Pick", buttons }, bridge);
|
|
169
|
+
expect(bridge).toHaveBeenCalledWith("send_message_with_buttons", {
|
|
170
|
+
text: "Pick",
|
|
171
|
+
rows: buttons,
|
|
172
|
+
reply_to_message_id: undefined,
|
|
173
|
+
});
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it("ends silently with no bridge call when text is omitted", async () => {
|
|
177
|
+
const bridge = vi.fn(async () => ({ ok: true }));
|
|
178
|
+
const result = await endTurn!.execute({}, bridge);
|
|
179
|
+
expect(bridge).not.toHaveBeenCalled();
|
|
180
|
+
expect(result).toEqual({ ok: true, silent: true });
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
it("ends silently with no bridge call when text is whitespace-only", async () => {
|
|
184
|
+
const bridge = vi.fn(async () => ({ ok: true }));
|
|
185
|
+
const result = await endTurn!.execute({ text: " \n\t " }, bridge);
|
|
186
|
+
expect(bridge).not.toHaveBeenCalled();
|
|
187
|
+
expect(result).toEqual({ ok: true, silent: true });
|
|
188
|
+
});
|
|
189
|
+
});
|
|
@@ -2676,13 +2676,19 @@ describe("processAndReply — group message without senderId", () => {
|
|
|
2676
2676
|
}, 3000);
|
|
2677
2677
|
});
|
|
2678
2678
|
|
|
2679
|
-
describe("processAndReply —
|
|
2680
|
-
|
|
2679
|
+
describe("processAndReply — fallback text no longer suppressed", () => {
|
|
2680
|
+
// Pre-end_turn behavior: when bridgeMessageCount=0 and result.text was
|
|
2681
|
+
// non-empty, the frontend logged "Suppressed fallback text" and dropped
|
|
2682
|
+
// the content (the scratchpad bug). The handler now fires onTextBlock
|
|
2683
|
+
// with trailing text instead, so:
|
|
2684
|
+
// - the "Suppressed fallback" log is gone entirely
|
|
2685
|
+
// - the delivery path is exercised by createStreamCallbacks tests above
|
|
2686
|
+
it("does NOT log a 'Suppressed fallback' warning anymore", async () => {
|
|
2681
2687
|
const { log } = await import("../util/log.js");
|
|
2682
2688
|
(log as ReturnType<typeof vi.fn>).mockClear();
|
|
2683
2689
|
|
|
2684
2690
|
executeMock.mockResolvedValueOnce({
|
|
2685
|
-
text: "
|
|
2691
|
+
text: "trailing prose that used to be suppressed",
|
|
2686
2692
|
durationMs: 10,
|
|
2687
2693
|
inputTokens: 1,
|
|
2688
2694
|
outputTokens: 1,
|
|
@@ -2694,7 +2700,7 @@ describe("processAndReply — suppressed fallback text logged (L572 TRUE branch)
|
|
|
2694
2700
|
const ctx = {
|
|
2695
2701
|
chat: { id: 99600, type: "private" },
|
|
2696
2702
|
message: {
|
|
2697
|
-
text: "test
|
|
2703
|
+
text: "test fallback delivery",
|
|
2698
2704
|
message_id: 1600,
|
|
2699
2705
|
reply_to_message: null,
|
|
2700
2706
|
},
|
|
@@ -2709,7 +2715,7 @@ describe("processAndReply — suppressed fallback text logged (L572 TRUE branch)
|
|
|
2709
2715
|
const suppressedLog = logCalls.find((c: unknown[]) =>
|
|
2710
2716
|
String(c[1]).includes("Suppressed fallback"),
|
|
2711
2717
|
);
|
|
2712
|
-
expect(suppressedLog).
|
|
2718
|
+
expect(suppressedLog).toBeUndefined();
|
|
2713
2719
|
}, 3000);
|
|
2714
2720
|
});
|
|
2715
2721
|
|
|
@@ -24,6 +24,7 @@ import { log, logError, logWarn } from "../../util/log.js";
|
|
|
24
24
|
import { traceMessage } from "../../util/trace.js";
|
|
25
25
|
import { incrementCounter, recordHistogram } from "../../util/metrics.js";
|
|
26
26
|
import { formatFullDatetime } from "../../util/time.js";
|
|
27
|
+
import { isTurnTerminator } from "../../core/tools/index.js";
|
|
27
28
|
|
|
28
29
|
import type { Query } from "@anthropic-ai/claude-agent-sdk";
|
|
29
30
|
import type { QueryParams, QueryResult } from "../../core/types.js";
|
|
@@ -38,6 +39,8 @@ import {
|
|
|
38
39
|
processStreamDelta,
|
|
39
40
|
processAssistantMessage,
|
|
40
41
|
processResultMessage,
|
|
42
|
+
normalizeForDedupe,
|
|
43
|
+
isDuplicateOfDelivered,
|
|
41
44
|
} from "./stream.js";
|
|
42
45
|
|
|
43
46
|
// ── Active query store ──────────────────────────────────────────────────────
|
|
@@ -92,6 +95,30 @@ export async function handleMessage(
|
|
|
92
95
|
activeQueries.set(chatId, qi);
|
|
93
96
|
const state = createStreamState();
|
|
94
97
|
|
|
98
|
+
// Capture text args from delivery tools (`end_turn`, `send(type="text")`)
|
|
99
|
+
// so the end-of-turn trailing-text fallback can dedupe against content
|
|
100
|
+
// already delivered. Without this, a model that writes prose AND calls a
|
|
101
|
+
// delivery tool with similar text would surface twice in the chat.
|
|
102
|
+
const captureDeliveredText = (
|
|
103
|
+
toolName: string,
|
|
104
|
+
input: Record<string, unknown>,
|
|
105
|
+
): void => {
|
|
106
|
+
let deliveredText: string | undefined;
|
|
107
|
+
if (toolName === "end_turn" && typeof input.text === "string") {
|
|
108
|
+
deliveredText = input.text;
|
|
109
|
+
} else if (
|
|
110
|
+
toolName === "send" &&
|
|
111
|
+
input.type === "text" &&
|
|
112
|
+
typeof input.text === "string"
|
|
113
|
+
) {
|
|
114
|
+
deliveredText = input.text;
|
|
115
|
+
}
|
|
116
|
+
if (deliveredText) {
|
|
117
|
+
const norm = normalizeForDedupe(deliveredText);
|
|
118
|
+
if (norm) state.deliveredTextNorms.push(norm);
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
|
|
95
122
|
try {
|
|
96
123
|
for await (const message of qi) {
|
|
97
124
|
// Session ID capture
|
|
@@ -110,9 +137,18 @@ export async function handleMessage(
|
|
|
110
137
|
if (isAssistant(message)) {
|
|
111
138
|
const result = processAssistantMessage(message, state);
|
|
112
139
|
|
|
113
|
-
//
|
|
140
|
+
// Track the trailing text from this assistant message. Multiple
|
|
141
|
+
// assistant messages can fire per turn (one per tool-use round-trip);
|
|
142
|
+
// only the LAST one's trailingText is the user-facing final reply.
|
|
143
|
+
state.lastTrailingText = result.trailingText;
|
|
144
|
+
|
|
145
|
+
// Notify tool usage + capture delivery-tool text for end-of-turn dedup
|
|
114
146
|
for (const tool of result.tools) {
|
|
115
147
|
incrementCounter(`tool_calls.${tool.name}`);
|
|
148
|
+
captureDeliveredText(tool.name, tool.input);
|
|
149
|
+
if (isTurnTerminator(tool.name)) {
|
|
150
|
+
state.turnTerminated = true;
|
|
151
|
+
}
|
|
116
152
|
if (onToolUse) {
|
|
117
153
|
try {
|
|
118
154
|
onToolUse(tool.name, tool.input);
|
|
@@ -132,6 +168,26 @@ export async function handleMessage(
|
|
|
132
168
|
}
|
|
133
169
|
}
|
|
134
170
|
}
|
|
171
|
+
|
|
172
|
+
// Turn-terminator tool was called (e.g. `end_turn`). Abort the SDK
|
|
173
|
+
// loop cleanly so the model can't keep producing trailing scratchpad
|
|
174
|
+
// after declaring "I'm done". Without this, the model is free to
|
|
175
|
+
// think more, call more tools, or write more prose — and any prose
|
|
176
|
+
// afterwards trips the flow-violation re-prompt path. Calling
|
|
177
|
+
// qi.interrupt() lets the SDK yield its terminal result and exit
|
|
178
|
+
// the for-await loop on the next iteration.
|
|
179
|
+
if (state.turnTerminated) {
|
|
180
|
+
try {
|
|
181
|
+
await qi.interrupt();
|
|
182
|
+
} catch (err) {
|
|
183
|
+
// Non-fatal: interrupt failures shouldn't break the turn,
|
|
184
|
+
// they just mean the natural end-of-stream path will run.
|
|
185
|
+
logWarn(
|
|
186
|
+
"agent",
|
|
187
|
+
`[${chatId}] qi.interrupt() after turn terminator failed: ${(err as Error)?.message ?? err}`,
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
135
191
|
continue;
|
|
136
192
|
}
|
|
137
193
|
|
|
@@ -224,6 +280,55 @@ export async function handleMessage(
|
|
|
224
280
|
}
|
|
225
281
|
}
|
|
226
282
|
|
|
283
|
+
// ── Trailing-prose contract + flow-violation retry ──────────────────────
|
|
284
|
+
// The output stream is private scratchpad by design. Final replies must go
|
|
285
|
+
// through `end_turn` (canonical) or `send` (mid-turn rich content). When a
|
|
286
|
+
// turn ends with no tool call AND no trailing prose, that's valid silent
|
|
287
|
+
// close (model only reacted, or had nothing to do). When the model wrote
|
|
288
|
+
// prose but didn't route it through a delivery tool, that's a flow
|
|
289
|
+
// violation — the prose is private scratchpad, dropped from the user's
|
|
290
|
+
// view. To prevent these from going unnoticed, we re-prompt the model
|
|
291
|
+
// ONCE with a synthetic system message in the same session: it sees its
|
|
292
|
+
// broken turn in history + a reminder of the contract, and gets a fresh
|
|
293
|
+
// turn to deliver via end_turn. If it violates again on the retry, we
|
|
294
|
+
// give up loudly and accept the silent drop.
|
|
295
|
+
//
|
|
296
|
+
// Exception: if a turn-terminator tool (e.g. end_turn) was called, the
|
|
297
|
+
// model explicitly declared "I'm done" — respect it. Any trailing prose
|
|
298
|
+
// that slipped in earlier in the same assistant message gets logged but
|
|
299
|
+
// does NOT re-prompt (would loop endlessly with a model that pairs prose
|
|
300
|
+
// with end_turn).
|
|
301
|
+
const trailing = state.lastTrailingText.trim();
|
|
302
|
+
const flowViolation =
|
|
303
|
+
trailing.length > 0 &&
|
|
304
|
+
!state.turnTerminated &&
|
|
305
|
+
!isDuplicateOfDelivered(trailing, state.deliveredTextNorms);
|
|
306
|
+
|
|
307
|
+
if (flowViolation) {
|
|
308
|
+
incrementCounter("scratchpad.trailing_text_dropped");
|
|
309
|
+
log(
|
|
310
|
+
"agent",
|
|
311
|
+
`[${chatId}] flow violation: trailing prose (${trailing.length} chars) without end_turn/send. ${
|
|
312
|
+
_retried
|
|
313
|
+
? "Already retried — accepting silent drop."
|
|
314
|
+
: "Re-prompting with reminder."
|
|
315
|
+
}`,
|
|
316
|
+
);
|
|
317
|
+
|
|
318
|
+
if (!_retried) {
|
|
319
|
+
incrementCounter("scratchpad.flow_violation_retried");
|
|
320
|
+
const reminder =
|
|
321
|
+
"[FLOW VIOLATION] You produced text content but didn't call `end_turn` or `send`. " +
|
|
322
|
+
"Pure prose in your output stream is private scratchpad — it's dropped, the user " +
|
|
323
|
+
"never sees it. Please retry with the proper flow: " +
|
|
324
|
+
"`end_turn(text=...)` to deliver a final reply, " +
|
|
325
|
+
"`end_turn()` (no args) to close silently, or " +
|
|
326
|
+
"`send(...)` for mid-turn rich content (photos, polls, etc.). " +
|
|
327
|
+
"Respond now using the correct tool call.";
|
|
328
|
+
return handleMessage({ ...params, text: reminder }, true);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
227
332
|
// ── Build result ──────────────────────────────────────────────────────────
|
|
228
333
|
|
|
229
334
|
state.allResponseText += state.currentBlockText;
|
|
@@ -34,6 +34,33 @@ export type StreamState = {
|
|
|
34
34
|
sdkCacheRead: number;
|
|
35
35
|
sdkCacheWrite: number;
|
|
36
36
|
lastStreamUpdate: number;
|
|
37
|
+
/**
|
|
38
|
+
* Trailing text from the most recent assistant message — text after all
|
|
39
|
+
* tool_use blocks (or the full text when no tools were called). NOT
|
|
40
|
+
* delivered to the user (the output stream is private scratchpad by
|
|
41
|
+
* contract). Tracked so the handler can log a diagnostic when the model
|
|
42
|
+
* wrote prose without routing it through `end_turn` / `send` — surfaces
|
|
43
|
+
* missed end_turn calls in metrics rather than silently dropping content.
|
|
44
|
+
*/
|
|
45
|
+
lastTrailingText: string;
|
|
46
|
+
/**
|
|
47
|
+
* Normalized text args observed on `end_turn` / `send(type="text")` tool
|
|
48
|
+
* calls during this turn. Cross-tool dedup: if both fire with similar
|
|
49
|
+
* content (e.g. model calls both with the same text mid-turn), the
|
|
50
|
+
* second one can be matched against this list to avoid the user seeing
|
|
51
|
+
* the same message twice. Also used to silence the trailing-prose
|
|
52
|
+
* diagnostic when the prose just duplicates what was already delivered.
|
|
53
|
+
*/
|
|
54
|
+
deliveredTextNorms: string[];
|
|
55
|
+
/**
|
|
56
|
+
* Set when a tool with `endsTurn: true` (e.g. `end_turn`) was observed
|
|
57
|
+
* in this turn. Once true, the handler invokes `qi.interrupt()` to abort
|
|
58
|
+
* the SDK loop cleanly — the model can't produce more trailing scratchpad
|
|
59
|
+
* after this point. Also gates the flow-violation re-prompt: if the model
|
|
60
|
+
* explicitly ended its turn, we don't re-prompt for trailing prose that
|
|
61
|
+
* may have appeared in the same assistant message before the terminator.
|
|
62
|
+
*/
|
|
63
|
+
turnTerminated: boolean;
|
|
37
64
|
};
|
|
38
65
|
|
|
39
66
|
export function createStreamState(): StreamState {
|
|
@@ -50,6 +77,9 @@ export function createStreamState(): StreamState {
|
|
|
50
77
|
sdkCacheRead: 0,
|
|
51
78
|
sdkCacheWrite: 0,
|
|
52
79
|
lastStreamUpdate: 0,
|
|
80
|
+
lastTrailingText: "",
|
|
81
|
+
deliveredTextNorms: [],
|
|
82
|
+
turnTerminated: false,
|
|
53
83
|
};
|
|
54
84
|
}
|
|
55
85
|
|
|
@@ -224,3 +254,40 @@ export function processResultMessage(
|
|
|
224
254
|
state.currentBlockText = msg.result;
|
|
225
255
|
}
|
|
226
256
|
}
|
|
257
|
+
|
|
258
|
+
// ── Trailing-text fallback dedup ────────────────────────────────────────────
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Normalize text for fuzzy comparison — trim, lowercase, collapse whitespace,
|
|
262
|
+
* strip emoji. Used to detect whether trailing prose duplicates content
|
|
263
|
+
* already delivered via `end_turn` / `send(type="text")`.
|
|
264
|
+
*/
|
|
265
|
+
export function normalizeForDedupe(text: string): string {
|
|
266
|
+
return text
|
|
267
|
+
.trim()
|
|
268
|
+
.toLowerCase()
|
|
269
|
+
.replace(/\p{Emoji_Presentation}|\p{Extended_Pictographic}/gu, "")
|
|
270
|
+
.replace(/\s+/g, " ")
|
|
271
|
+
.trim();
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const MIN_DEDUP_LENGTH = 10;
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Returns true if `candidate` is substantively the same as any text in
|
|
278
|
+
* `deliveredNorms`. "Substantively" = one is a substring of the other after
|
|
279
|
+
* normalization; both must be at least MIN_DEDUP_LENGTH chars to avoid
|
|
280
|
+
* dropping short legitimate replies.
|
|
281
|
+
*/
|
|
282
|
+
export function isDuplicateOfDelivered(
|
|
283
|
+
candidate: string,
|
|
284
|
+
deliveredNorms: string[],
|
|
285
|
+
): boolean {
|
|
286
|
+
if (deliveredNorms.length === 0) return false;
|
|
287
|
+
const norm = normalizeForDedupe(candidate);
|
|
288
|
+
if (norm.length < MIN_DEDUP_LENGTH) return false;
|
|
289
|
+
return deliveredNorms.some(
|
|
290
|
+
(d) =>
|
|
291
|
+
d.length >= MIN_DEDUP_LENGTH && (norm.includes(d) || d.includes(norm)),
|
|
292
|
+
);
|
|
293
|
+
}
|
package/src/core/tools/index.ts
CHANGED
|
@@ -30,6 +30,24 @@ export const ALL_TOOLS: readonly ToolDefinition[] = [
|
|
|
30
30
|
...adminTools,
|
|
31
31
|
];
|
|
32
32
|
|
|
33
|
+
/**
|
|
34
|
+
* Names of tools that explicitly terminate the model's turn.
|
|
35
|
+
*
|
|
36
|
+
* Backend handlers consume this set to abort their stream loop after
|
|
37
|
+
* observing one of these tools — without it, the model can keep producing
|
|
38
|
+
* trailing scratchpad prose after declaring "I'm done", which trips the
|
|
39
|
+
* flow-violation re-prompt path. Declaration is on the tool definition
|
|
40
|
+
* (`endsTurn: true`); detection is shared; abort is backend-specific.
|
|
41
|
+
*/
|
|
42
|
+
const TURN_TERMINATOR_NAMES: ReadonlySet<string> = new Set(
|
|
43
|
+
ALL_TOOLS.filter((t) => t.endsTurn).map((t) => t.name),
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
/** Whether a tool call by this name should terminate the model's turn. */
|
|
47
|
+
export function isTurnTerminator(toolName: string): boolean {
|
|
48
|
+
return TURN_TERMINATOR_NAMES.has(toolName);
|
|
49
|
+
}
|
|
50
|
+
|
|
33
51
|
/** Filter options for composing a tool set. */
|
|
34
52
|
export interface ComposeOptions {
|
|
35
53
|
/** Include only tools available on this frontend. */
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Messaging tools — send, react, edit, delete, forward, pin/unpin,
|
|
2
|
+
* Messaging tools — send, end_turn, react, edit, delete, forward, pin/unpin,
|
|
3
|
+
* stop poll.
|
|
3
4
|
*/
|
|
4
5
|
|
|
5
6
|
import { z } from "zod";
|
|
@@ -7,6 +8,83 @@ import type { ToolDefinition } from "./types.js";
|
|
|
7
8
|
import { idSchema } from "./schemas.js";
|
|
8
9
|
|
|
9
10
|
export const messagingTools: ToolDefinition[] = [
|
|
11
|
+
// ── end_turn — explicit final-reply delivery ──────────────────────────
|
|
12
|
+
// Schema-typed alternative to relying on a trailing-text fallback. The
|
|
13
|
+
// model is taught that this is the canonical way to deliver its final
|
|
14
|
+
// reply. Functionally a thin wrapper over send(type="text") + reply_to +
|
|
15
|
+
// buttons; the value is in the EXPLICIT semantic ("this ends my turn")
|
|
16
|
+
// and that the model sees a single tool whose purpose is unambiguous.
|
|
17
|
+
//
|
|
18
|
+
// The output stream is private scratchpad by contract. If the model
|
|
19
|
+
// writes trailing prose without calling end_turn or send, the handler
|
|
20
|
+
// re-prompts ONCE in the same session with a flow-violation reminder
|
|
21
|
+
// so the model can retry properly. Persistent violation after retry
|
|
22
|
+
// results in silent drop + `scratchpad.trailing_text_dropped` counter.
|
|
23
|
+
// `end_turn` is the documented happy path.
|
|
24
|
+
{
|
|
25
|
+
name: "end_turn",
|
|
26
|
+
description: `End your current turn and deliver your final reply to the user. This is the canonical way to respond.
|
|
27
|
+
|
|
28
|
+
Call this AT MOST ONCE per turn — it should be the last tool you call. Behaves like send(type="text") with reply_to and buttons support, but the name makes the intent explicit: this is the message that ends the turn.
|
|
29
|
+
|
|
30
|
+
Examples:
|
|
31
|
+
end_turn(text="Got it sur") — plain reply
|
|
32
|
+
end_turn(text="On it", reply_to=12345) — reply to a specific message ID
|
|
33
|
+
end_turn(text="Pick one", buttons=[[{"text":"A","callback_data":"a"}]]) — with buttons
|
|
34
|
+
end_turn() — silent end (no message; useful when you already replied via earlier send/react calls)
|
|
35
|
+
|
|
36
|
+
Notes:
|
|
37
|
+
- For richer message types (photos, polls, voice, scheduled messages, multi-target), use the send tool — those don't fit "final reply" semantics.
|
|
38
|
+
- The output stream is private scratchpad. If you write prose without calling end_turn or send, the handler re-prompts you ONCE with a flow-violation reminder so you can retry properly. Persistent violation drops the prose silently. end_turn is the documented happy path.`,
|
|
39
|
+
schema: {
|
|
40
|
+
text: z
|
|
41
|
+
.string()
|
|
42
|
+
.optional()
|
|
43
|
+
.describe(
|
|
44
|
+
"Final reply text. Supports Markdown. Omit to end the turn silently (no message sent).",
|
|
45
|
+
),
|
|
46
|
+
reply_to: idSchema
|
|
47
|
+
.optional()
|
|
48
|
+
.describe("Message ID to reply to (typically the user's [msg_id:N])"),
|
|
49
|
+
buttons: z
|
|
50
|
+
.array(
|
|
51
|
+
z.array(
|
|
52
|
+
z.object({
|
|
53
|
+
text: z.string(),
|
|
54
|
+
url: z.string().optional(),
|
|
55
|
+
callback_data: z.string().optional(),
|
|
56
|
+
}),
|
|
57
|
+
),
|
|
58
|
+
)
|
|
59
|
+
.optional()
|
|
60
|
+
.describe("Inline keyboard button rows"),
|
|
61
|
+
},
|
|
62
|
+
execute: async (params, bridge) => {
|
|
63
|
+
// Telegram path: routes to the same bridge actions as send(type="text")
|
|
64
|
+
// so bridgeMessageCount, dedup, and audit logging all stay consistent.
|
|
65
|
+
if (typeof params.text !== "string" || params.text.trim() === "") {
|
|
66
|
+
// Silent end — no bridge call. The handler still sees the tool was
|
|
67
|
+
// invoked (via deliveredTextNorms staying empty), and trailing-text
|
|
68
|
+
// fallback won't fire because there was no trailing prose.
|
|
69
|
+
return { ok: true, silent: true };
|
|
70
|
+
}
|
|
71
|
+
if (params.buttons) {
|
|
72
|
+
return bridge("send_message_with_buttons", {
|
|
73
|
+
text: params.text,
|
|
74
|
+
rows: params.buttons,
|
|
75
|
+
reply_to_message_id: params.reply_to,
|
|
76
|
+
});
|
|
77
|
+
}
|
|
78
|
+
return bridge("send_message", {
|
|
79
|
+
text: params.text,
|
|
80
|
+
reply_to_message_id: params.reply_to,
|
|
81
|
+
});
|
|
82
|
+
},
|
|
83
|
+
frontends: ["telegram", "teams"],
|
|
84
|
+
tag: "messaging",
|
|
85
|
+
endsTurn: true,
|
|
86
|
+
},
|
|
87
|
+
|
|
10
88
|
// ── Telegram unified send ─────────────────────────────────────────────
|
|
11
89
|
{
|
|
12
90
|
name: "send",
|
package/src/core/tools/types.ts
CHANGED
|
@@ -58,4 +58,18 @@ export interface ToolDefinition {
|
|
|
58
58
|
|
|
59
59
|
/** Grouping tag. */
|
|
60
60
|
readonly tag: ToolTag;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* This tool explicitly ends the model's turn. Backend handlers observe
|
|
64
|
+
* this flag to abort their stream loop cleanly after the tool's bridge
|
|
65
|
+
* call completes — without it, the model is free to keep producing
|
|
66
|
+
* trailing prose into private scratchpad after declaring "I'm done",
|
|
67
|
+
* which then trips the flow-violation re-prompt path. With this flag,
|
|
68
|
+
* an end_turn call genuinely ends the turn.
|
|
69
|
+
*
|
|
70
|
+
* Backend abort mechanism is backend-specific (Claude SDK uses
|
|
71
|
+
* Query.interrupt(); other backends manage their own loop) — this flag
|
|
72
|
+
* is the shared declarative signal, not the implementation.
|
|
73
|
+
*/
|
|
74
|
+
readonly endsTurn?: boolean;
|
|
61
75
|
}
|
|
@@ -14,7 +14,7 @@ import type { Gateway } from "../../core/gateway.js";
|
|
|
14
14
|
import { log, logError } from "../../util/log.js";
|
|
15
15
|
import { deriveNumericChatId } from "../../util/chat-id.js";
|
|
16
16
|
import { resolveModel } from "../../core/models.js";
|
|
17
|
-
import { createTeamsActionHandler } from "./actions.js";
|
|
17
|
+
import { createTeamsActionHandler, postToTeams } from "./actions.js";
|
|
18
18
|
import { splitTeamsMessage, buildAdaptiveCard } from "./formatting.js";
|
|
19
19
|
import {
|
|
20
20
|
initGraphClient,
|
|
@@ -356,18 +356,28 @@ export function createTeamsFrontend(
|
|
|
356
356
|
` tool: ${toolName}${detail ? ` — ${String(detail).slice(0, 100)}` : ""}`,
|
|
357
357
|
);
|
|
358
358
|
},
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
359
|
+
// Deliver assistant text (progress text before tool calls AND
|
|
360
|
+
// the end-of-turn trailing-text fallback) to the Teams chat.
|
|
361
|
+
// Without this, prose-only assistant turns would be silently
|
|
362
|
+
// dropped — same scratchpad bug Telegram hit.
|
|
363
|
+
onTextBlock: async (blockText) => {
|
|
364
|
+
if (!blockText.trim()) return;
|
|
365
|
+
try {
|
|
366
|
+
await postToTeams(webhookUrl, blockText);
|
|
367
|
+
gateway.incrementMessages(numericChatId);
|
|
368
|
+
} catch (err) {
|
|
369
|
+
logError(
|
|
367
370
|
"teams",
|
|
368
|
-
`
|
|
371
|
+
`onTextBlock postToTeams failed: ${err instanceof Error ? err.message : err}`,
|
|
369
372
|
);
|
|
370
373
|
}
|
|
374
|
+
},
|
|
375
|
+
})
|
|
376
|
+
.then(async (_result) => {
|
|
377
|
+
// No fallback delivery — turns without end_turn / send_message
|
|
378
|
+
// are intentional silent ends. Trailing prose without a tool
|
|
379
|
+
// call is scratchpad and dropped; the SDK handler emits a
|
|
380
|
+
// `scratchpad.trailing_text_dropped` metric on those.
|
|
371
381
|
})
|
|
372
382
|
.catch((err) => {
|
|
373
383
|
logError(
|
|
@@ -781,16 +781,10 @@ async function processAndReply(params: ProcessAndReplyParams): Promise<void> {
|
|
|
781
781
|
},
|
|
782
782
|
});
|
|
783
783
|
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
) {
|
|
789
|
-
log(
|
|
790
|
-
"bot",
|
|
791
|
-
`Suppressed fallback text (${result.text.length} chars) — no send tool used`,
|
|
792
|
-
);
|
|
793
|
-
}
|
|
784
|
+
// No fallback delivery — turns that don't call `end_turn` / `send` are
|
|
785
|
+
// intentional silent ends. Trailing prose written without a tool call is
|
|
786
|
+
// scratchpad and dropped (the SDK handler logs a `scratchpad.trailing_
|
|
787
|
+
// text_dropped` metric so missed end_turn calls show up in counters).
|
|
794
788
|
} finally {
|
|
795
789
|
clearTimeout(streamTimer);
|
|
796
790
|
}
|