switchroom 0.13.56 → 0.13.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -53,6 +53,7 @@ import { OutboundDedupCache } from '../recent-outbound-dedup.js'
53
53
  import { createInboundCoalescer, inboundCoalesceKey } from './inbound-coalesce.js'
54
54
  import { StatusReactionController } from '../status-reactions.js'
55
55
  import { isTelegramReplyTool, isTelegramSurfaceTool } from '../tool-names.js'
56
+ import { deriveIntentSurface } from '../tool-intent-surface.js'
56
57
  import { toolLabel } from '../tool-labels.js'
57
58
  import { createTypingWrapper } from '../typing-wrap.js'
58
59
  import { type DraftStreamHandle } from '../draft-stream.js'
@@ -1291,6 +1292,15 @@ type CurrentTurn = {
1291
1292
  // Phase 1 of #332: count of tool_use events in the current turn, for
1292
1293
  // the tool_call_count column in the turns registry.
1293
1294
  toolCallCount: number
1295
+ // Tool-intent surface (the human-feel UX follow-up to #1921's
1296
+ // PreToolUse gate). When the model emits its first non-reply tool_use
1297
+ // of a turn AND no outbound has happened yet, the gateway lifts the
1298
+ // tool's already-formed intent (name + input → `toolLabel()`) into a
1299
+ // user-visible "<i>running</i>: ls -la /var/log" message. One-shot
1300
+ // per turn — subsequent tool_use events stay quiet so a multi-tool
1301
+ // turn doesn't spam. The model never has to call reply just to ack;
1302
+ // its own intent stream IS the ack source.
1303
+ intentSurfaceFired: boolean
1294
1304
  // Issue #195 — answer-lane streaming. Lazily created on the first text
1295
1305
  // event of a turn (once enough text has accumulated, the stream itself
1296
1306
  // gates on minInitialChars). Materialized and cleared at turn_end.
@@ -6832,6 +6842,7 @@ function handleSessionEvent(ev: SessionEvent): void {
6832
6842
  lastAssistantMsgId: null,
6833
6843
  lastAssistantDone: false,
6834
6844
  toolCallCount: 0,
6845
+ intentSurfaceFired: false,
6835
6846
  answerStream: null,
6836
6847
  isDm: isDmChatId(ev.chatId),
6837
6848
  }
@@ -6949,6 +6960,66 @@ function handleSessionEvent(ev: SessionEvent): void {
6949
6960
  turn.orphanedReplyTimeoutId = null
6950
6961
  }
6951
6962
  }
6963
+ // Tool-intent surface — companion to the PreToolUse ack-first gate
6964
+ // (#1921). On the FIRST non-reply tool_use of a turn AND only when
6965
+ // no outbound has happened yet, the gateway lifts the model's tool
6966
+ // intent (name + input → `toolLabel()`) into a brief framework-voice
6967
+ // status: `<i>running:</i> ls -la /var/log`. The model never has to
6968
+ // call reply just to ack — its own intent stream IS the ack. The
6969
+ // gate continues to fire IN PARALLEL: if it produces a model-voice
6970
+ // ack first (`replyCalled=true`), the surface stays quiet by the
6971
+ // condition below. One-shot per turn.
6972
+ if (
6973
+ !turn.replyCalled
6974
+ && !turn.intentSurfaceFired
6975
+ && !isTelegramSurfaceTool(name)
6976
+ ) {
6977
+ turn.intentSurfaceFired = true
6978
+ // `ev.input` is the canonical SessionEvent property
6979
+ // (`telegram-plugin/session-tail.ts:95`). All other tool_use
6980
+ // sites in this file use `ev.input` — keep that consistent.
6981
+ const surface = deriveIntentSurface(name, ev.input, ev.precomputedLabel)
6982
+ if (surface.text != null) {
6983
+ // Mark the ack-flag synchronously BEFORE the async send so a
6984
+ // PreToolUse ack-first hook (#1921) firing concurrently for this
6985
+ // same tool call sees the flag already present and allows the
6986
+ // tool through. The Telegram send is fire-and-forget; failure
6987
+ // is logged but does not block the model.
6988
+ try {
6989
+ markAckSent()
6990
+ } catch (err) {
6991
+ process.stderr.write(`telegram gateway: intent-surface markAckSent failed: ${err}\n`)
6992
+ }
6993
+ const surfaceChat = turn.sessionChatId
6994
+ const surfaceThread = turn.sessionThreadId
6995
+ const surfaceText = surface.text
6996
+ void (async () => {
6997
+ try {
6998
+ await robustApiCall(
6999
+ () => bot.api.sendMessage(surfaceChat, surfaceText, {
7000
+ ...(surfaceThread != null ? { message_thread_id: surfaceThread } : {}),
7001
+ parse_mode: 'HTML',
7002
+ // Framework-narrating beat — silent, ambient, not a
7003
+ // device buzz. The user is meant to glance and know
7004
+ // the model is alive + on-task.
7005
+ disable_notification: true,
7006
+ }),
7007
+ { chat_id: surfaceChat, ...(surfaceThread != null ? { threadId: surfaceThread } : {}), verb: 'intent-surface' },
7008
+ )
7009
+ // Deliberately NOT calling signalTracker.noteOutbound /
7010
+ // silencePoke.noteOutbound here — framework-owned
7011
+ // ambient messages are not model-author outbounds, so
7012
+ // they should not reset the TTFO clock or short-circuit
7013
+ // the silence-poke ladder. Mirrors the sibling
7014
+ // `onAwarenessPing` handler (silence-poke.ts:169
7015
+ // contract: "Caller must NOT call back into noteOutbound
7016
+ // for this — it's a framework-sourced message").
7017
+ } catch (err) {
7018
+ process.stderr.write(`telegram gateway: intent-surface send failed: ${err}\n`)
7019
+ }
7020
+ })()
7021
+ }
7022
+ }
6952
7023
  if (!ctrl) return
6953
7024
  if (isTelegramSurfaceTool(name)) return
6954
7025
  ctrl.setTool(name)
@@ -0,0 +1,128 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { deriveIntentSurface } from "../tool-intent-surface.js";
3
+
4
+ describe("deriveIntentSurface — gateway lifts model's tool intent into framework-voice status", () => {
5
+ describe("tool-class verb mapping", () => {
6
+ it("Bash → running", () => {
7
+ const out = deriveIntentSurface("Bash", { command: "ls -la /var/log" });
8
+ expect(out.text).toContain("<i>running:</i>");
9
+ expect(out.text).toContain("ls -la /var/log");
10
+ });
11
+
12
+ it("WebSearch → searching", () => {
13
+ const out = deriveIntentSurface("WebSearch", { query: "Victoria drink driving" });
14
+ expect(out.text).toContain("<i>searching:</i>");
15
+ expect(out.text).toContain("Victoria drink driving");
16
+ });
17
+
18
+ it("WebFetch → fetching (hostname extracted)", () => {
19
+ const out = deriveIntentSurface("WebFetch", { url: "https://example.com/a/b" });
20
+ expect(out.text).toContain("<i>fetching:</i>");
21
+ expect(out.text).toContain("example.com");
22
+ });
23
+
24
+ it("Read → reading (basename only)", () => {
25
+ const out = deriveIntentSurface("Read", { file_path: "/etc/os-release" });
26
+ expect(out.text).toContain("<i>reading:</i>");
27
+ expect(out.text).toContain("os-release");
28
+ expect(out.text).not.toContain("/etc/");
29
+ });
30
+
31
+ it("Write → writing", () => {
32
+ const out = deriveIntentSurface("Write", { file_path: "/tmp/hello.sh" });
33
+ expect(out.text).toContain("<i>writing:</i>");
34
+ expect(out.text).toContain("hello.sh");
35
+ });
36
+
37
+ it("Edit / MultiEdit / NotebookEdit → editing", () => {
38
+ for (const t of ["Edit", "MultiEdit", "NotebookEdit"]) {
39
+ expect(
40
+ deriveIntentSurface(t, { file_path: "/a/foo.ts" }).text,
41
+ ).toContain("<i>editing:</i>");
42
+ }
43
+ });
44
+
45
+ it("Grep / Glob → searching", () => {
46
+ expect(
47
+ deriveIntentSurface("Grep", { pattern: "TODO", path: "src/" }).text,
48
+ ).toContain("<i>searching:</i>");
49
+ expect(
50
+ deriveIntentSurface("Glob", { pattern: "**/*.ts" }).text,
51
+ ).toContain("<i>searching:</i>");
52
+ });
53
+
54
+ it("Task / Agent → dispatching", () => {
55
+ expect(
56
+ deriveIntentSurface("Task", { description: "review the auth code" }).text,
57
+ ).toContain("<i>dispatching:</i>");
58
+ });
59
+ });
60
+
61
+ describe("user-facing tools stay quiet (never re-surfaced)", () => {
62
+ const surfaceTools = [
63
+ "mcp__switchroom-telegram__reply",
64
+ "mcp__switchroom-telegram__stream_reply",
65
+ "mcp__switchroom-telegram__edit_message",
66
+ "mcp__switchroom-telegram__react",
67
+ "mcp__switchroom-telegram__send_typing",
68
+ "mcp__switchroom-telegram__progress_update",
69
+ ];
70
+ for (const tool of surfaceTools) {
71
+ it(`returns null for ${tool}`, () => {
72
+ expect(
73
+ deriveIntentSurface(tool, { text: "hi", chat_id: "1" }).text,
74
+ ).toBeNull();
75
+ });
76
+ }
77
+ });
78
+
79
+ describe("unknown MCP tools", () => {
80
+ it("uses 'using <tool>' for unknown MCP tool servers", () => {
81
+ const out = deriveIntentSurface(
82
+ "mcp__google-workspace__list_drive_files",
83
+ { folderId: "abc" },
84
+ );
85
+ expect(out.text).toMatch(/<i>using list[ _]drive[ _]files:?<\/i>/);
86
+ });
87
+
88
+ it("falls back gracefully when input has no recognisable label field", () => {
89
+ const out = deriveIntentSurface("Bash", { weird: "no-command-here" });
90
+ // No label resolved → verb-only output
91
+ expect(out.text).toBe("<i>running</i>");
92
+ });
93
+ });
94
+
95
+ describe("privacy / safety", () => {
96
+ it("escapes HTML in the label so a malicious input can't inject markup", () => {
97
+ const out = deriveIntentSurface("Bash", {
98
+ command: "echo '<script>alert(1)</script>'",
99
+ });
100
+ expect(out.text).not.toContain("<script>");
101
+ expect(out.text).toContain("&lt;script&gt;");
102
+ });
103
+
104
+ it("truncates long labels to keep the surface message tight", () => {
105
+ const longCmd = "echo " + "x".repeat(500);
106
+ const out = deriveIntentSurface("Bash", { command: longCmd });
107
+ // toolLabel already truncates Bash to 40 chars; safety cap then
108
+ // bounds anything else to MAX_LABEL_LEN.
109
+ expect((out.text ?? "").length).toBeLessThan(200);
110
+ });
111
+
112
+ it("returns null when toolName is empty (defensive)", () => {
113
+ expect(deriveIntentSurface("", { command: "x" }).text).toBeNull();
114
+ });
115
+ });
116
+
117
+ describe("precomputed label precedence", () => {
118
+ it("uses precomputed label when present (matches toolLabel's contract)", () => {
119
+ const out = deriveIntentSurface(
120
+ "Bash",
121
+ { command: "ls" },
122
+ "checking the logs",
123
+ );
124
+ expect(out.text).toContain("<i>running:</i>");
125
+ expect(out.text).toContain("checking the logs");
126
+ });
127
+ });
128
+ });
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Tool-intent surface — lifts the model's already-formed `tool_use`
3
+ * intent (tool name + input) into a brief user-visible Telegram
4
+ * message when the model goes to work without first calling reply.
5
+ *
6
+ * Companion to the PreToolUse ack-first gate (#1921). The gate forces
7
+ * the model to author a brief acknowledgement via the reply tool
8
+ * before any other tool runs. THIS surface is the lower-overhead
9
+ * sibling: when the model's own `tool_use` stream already carries the
10
+ * intent (e.g. `Bash {command: "ls -la /var/log"}`), the gateway can
11
+ * pass that intent through as the user-visible "we're alive and this
12
+ * is what we're doing" beat, without the model having to call any
13
+ * extra tool.
14
+ *
15
+ * Why both. The gate produces MODEL-VOICE acks ("on it — checking the
16
+ * logs") — warmer, persona-driven. The surface produces FRAMEWORK-
17
+ * VOICE pass-throughs ("_running:_ ls -la /var/log") — honest and
18
+ * cheaper. They compose: if the gate fires, the model authors an ack
19
+ * which lands first; the surface stays quiet (already-acked). If the
20
+ * gate fails (kill-switched / regression / hook spawn failure), the
21
+ * surface still lands — defence in depth.
22
+ *
23
+ * Output format: italicised framework verb + colon + the model's own
24
+ * `toolLabel()` output. Italics are the conventional "framework
25
+ * narrating, not the model speaking" marker; the verb signals which
26
+ * lane the work is in. Length capped at ~140 chars by `toolLabel()`
27
+ * already; nothing more is added on top.
28
+ *
29
+ * Privacy posture. The model's `tool_use.input` may contain user-
30
+ * provided strings (web search queries, file paths the user named).
31
+ * Those are already going to land in chat history one way or another
32
+ * (e.g. via the model's reply describing what it did), so surfacing
33
+ * a brief label here doesn't expand the leakage surface materially.
34
+ * `toolLabel()` already truncates and HTML-escapes its output via
35
+ * the renderer.
36
+ */
37
+
38
+ import { toolLabel } from "./tool-labels.js";
39
+
40
+ const MAX_LABEL_LEN = 140;
41
+
42
+ /**
43
+ * Compute the user-facing "framework verb" for a tool. Verbs match
44
+ * the action class so the user reads "running" for Bash, "searching"
45
+ * for WebSearch, etc. Tools without a friendly verb fall back to
46
+ * `using <ToolName>` — better than blanking out.
47
+ */
48
+ function frameworkVerbFor(toolName: string): string {
49
+ // Strip "mcp__<server>__" prefix to match suffixes consistently.
50
+ // Most MCP tools surface as `mcp__<server>__<tool>` in the stream.
51
+ const m = /^mcp__[^_]+__(.+)$/.exec(toolName);
52
+ const suffix = (m ? m[1] : toolName).toLowerCase();
53
+
54
+ switch (suffix) {
55
+ case "bash":
56
+ case "bashoutput":
57
+ case "killshell":
58
+ return "running";
59
+ case "websearch":
60
+ case "grep":
61
+ case "glob":
62
+ return "searching";
63
+ case "webfetch":
64
+ return "fetching";
65
+ case "read":
66
+ return "reading";
67
+ case "write":
68
+ return "writing";
69
+ case "edit":
70
+ case "multiedit":
71
+ case "notebookedit":
72
+ return "editing";
73
+ case "todowrite":
74
+ case "todoread":
75
+ return "noting";
76
+ case "task":
77
+ case "agent":
78
+ return "dispatching";
79
+ case "toolsearch":
80
+ return "loading tools";
81
+ default:
82
+ // For unknown / MCP tools, prefer a short generic — "using gdrive"
83
+ // is more honest than guessing.
84
+ if (m) return `using ${m[1].replace(/_/g, " ")}`;
85
+ return `using ${toolName}`;
86
+ }
87
+ }
88
+
89
+ /** A tool that surfaces in the chat itself (reply / stream_reply / etc.)
90
+ * — these tools ARE the user surface, so the gateway never re-surfaces
91
+ * them. Mirrors `isTelegramSurfaceTool` in `tool-names.ts`. */
92
+ function isUserFacingTool(toolName: string): boolean {
93
+ const m = /^mcp__switchroom-telegram__(.+)$/.exec(toolName);
94
+ const suffix = m ? m[1] : toolName;
95
+ return (
96
+ suffix === "reply" ||
97
+ suffix === "stream_reply" ||
98
+ suffix === "edit_message" ||
99
+ suffix === "react" ||
100
+ suffix === "send_typing" ||
101
+ suffix === "pin_message" ||
102
+ suffix === "delete_message" ||
103
+ suffix === "forward_message" ||
104
+ suffix === "download_attachment" ||
105
+ suffix === "get_recent_messages" ||
106
+ suffix === "progress_update"
107
+ );
108
+ }
109
+
110
+ export interface SurfaceTextResult {
111
+ /** Final HTML text the gateway sends to Telegram, or null when the
112
+ * surface should NOT fire (tool is user-facing, label is empty, etc.) */
113
+ text: string | null;
114
+ }
115
+
116
+ /**
117
+ * Pure decision: given a tool name + input + optional precomputed label
118
+ * (from the existing PreToolUse label hook), return the HTML the
119
+ * gateway should send, or null to stay quiet.
120
+ *
121
+ * Exposed for unit tests; the gateway wires this into the `tool_use`
122
+ * session-event handler.
123
+ */
124
+ export function deriveIntentSurface(
125
+ toolName: string,
126
+ toolInput: Record<string, unknown> | undefined,
127
+ precomputedLabel?: string,
128
+ ): SurfaceTextResult {
129
+ if (!toolName) return { text: null };
130
+ if (isUserFacingTool(toolName)) return { text: null };
131
+
132
+ const label = toolLabel(toolName, toolInput, undefined, precomputedLabel);
133
+ if (!label || !label.trim()) {
134
+ // No label available for this tool/input shape — fall back to just
135
+ // the verb so the user at least sees "_running_" rather than
136
+ // nothing. Keeps the beat reliable on weird inputs.
137
+ return {
138
+ text: `<i>${escapeHtml(frameworkVerbFor(toolName))}</i>`,
139
+ };
140
+ }
141
+
142
+ const verb = frameworkVerbFor(toolName);
143
+ // `toolLabel()` may include backticks / quotes — let those through
144
+ // (Telegram HTML doesn't choke on them) but escape any stray inline
145
+ // HTML markers so a malicious or odd input can't inject markup.
146
+ const safeLabel = escapeHtml(label).slice(0, MAX_LABEL_LEN);
147
+ return { text: `<i>${escapeHtml(verb)}:</i> ${safeLabel}` };
148
+ }
149
+
150
+ function escapeHtml(s: string): string {
151
+ return s
152
+ .replace(/&/g, "&amp;")
153
+ .replace(/</g, "&lt;")
154
+ .replace(/>/g, "&gt;");
155
+ }