switchroom 0.13.55 → 0.13.57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,20 +26,27 @@ const ORIGINAL_KILL_SWITCH = process.env.SWITCHROOM_DISABLE_SILENCE_POKE
26
26
  interface TestFixtures {
27
27
  emitted: SilencePokeMetric[]
28
28
  fallbacks: FrameworkFallbackContext[]
29
+ awarenessPings: FrameworkFallbackContext[]
29
30
  }
30
31
 
31
32
  function setupDeps(opts?: { thresholds?: Partial<typeof DEFAULT_THRESHOLDS> }): TestFixtures {
32
- const fixtures: TestFixtures = { emitted: [], fallbacks: [] }
33
+ const fixtures: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
33
34
  __setDepsForTests({
34
35
  emitMetric: (e) => fixtures.emitted.push(e),
35
36
  onFrameworkFallback: (ctx) => { fixtures.fallbacks.push(ctx) },
37
+ onAwarenessPing: (ctx) => { fixtures.awarenessPings.push(ctx) },
36
38
  // The ack budget (a new poke that fires *earlier* than `soft`) is
37
39
  // disabled by default in this fixture so the soft/firm/fallback
38
40
  // ladder tests stay isolated from it. The 'ack budget' describe
39
41
  // block opts back in with a real value.
42
+ //
43
+ // The 60s awarenessPing is also disabled by default so the existing
44
+ // soft/firm/fallback ladder tests don't see the new sibling event;
45
+ // the 'awareness ping' describe block opts back in.
40
46
  thresholdsMs: {
41
47
  ...DEFAULT_THRESHOLDS,
42
48
  ack: Number.MAX_SAFE_INTEGER,
49
+ awarenessPing: Number.MAX_SAFE_INTEGER,
43
50
  ...(opts?.thresholds ?? {}),
44
51
  },
45
52
  })
@@ -733,13 +740,18 @@ describe('silence-poke — independence across turns', () => {
733
740
 
734
741
  describe('silence-poke — fallback handler errors do not break timer', () => {
735
742
  it('continues to function if onFrameworkFallback throws', () => {
736
- const fx: TestFixtures = { emitted: [], fallbacks: [] }
743
+ const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
737
744
  __setDepsForTests({
738
745
  emitMetric: (e) => fx.emitted.push(e),
739
746
  onFrameworkFallback: () => { throw new Error('oh no') },
740
- // ack budget out of the way — this test exercises the
747
+ onAwarenessPing: () => {},
748
+ // ack + awareness-ping out of the way — this test exercises the
741
749
  // soft/firm/fallback ladder under a throwing fallback handler.
742
- thresholdsMs: { ...DEFAULT_THRESHOLDS, ack: Number.MAX_SAFE_INTEGER },
750
+ thresholdsMs: {
751
+ ...DEFAULT_THRESHOLDS,
752
+ ack: Number.MAX_SAFE_INTEGER,
753
+ awarenessPing: Number.MAX_SAFE_INTEGER,
754
+ },
743
755
  })
744
756
  startTurn('k', 0)
745
757
  expect(() => {
@@ -752,12 +764,17 @@ describe('silence-poke — fallback handler errors do not break timer', () => {
752
764
  })
753
765
 
754
766
  it('continues to function if onFrameworkFallback returns a rejected promise', async () => {
755
- const fx: TestFixtures = { emitted: [], fallbacks: [] }
767
+ const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
756
768
  __setDepsForTests({
757
769
  emitMetric: (e) => fx.emitted.push(e),
758
770
  onFrameworkFallback: () => Promise.reject(new Error('async fail')),
759
- // ack budget out of the way — see the throwing-handler test above.
760
- thresholdsMs: { ...DEFAULT_THRESHOLDS, ack: Number.MAX_SAFE_INTEGER },
771
+ onAwarenessPing: () => {},
772
+ // ack + awareness-ping out of the way — see the throwing-handler test above.
773
+ thresholdsMs: {
774
+ ...DEFAULT_THRESHOLDS,
775
+ ack: Number.MAX_SAFE_INTEGER,
776
+ awarenessPing: Number.MAX_SAFE_INTEGER,
777
+ },
761
778
  })
762
779
  startTurn('k', 0)
763
780
  __tickForTests(75_000)
@@ -860,3 +877,96 @@ describe('silence-poke — performance', () => {
860
877
  expect(elapsed).toBeLessThan(50)
861
878
  })
862
879
  })
880
+
881
+ describe('silence-poke — awareness ping (early framework-owned user-visible status)', () => {
882
+ it('fires once at 60s when no outbound has happened', () => {
883
+ const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
884
+ startTurn('k', 0)
885
+ __tickForTests(59_000)
886
+ expect(fx.awarenessPings.length).toBe(0)
887
+ __tickForTests(60_000)
888
+ expect(fx.awarenessPings.length).toBe(1)
889
+ expect(fx.awarenessPings[0]!.silenceMs).toBeGreaterThanOrEqual(60_000)
890
+ expect(fx.emitted.some(e => e.kind === 'awareness_ping_sent')).toBe(true)
891
+ })
892
+
893
+ it('is one-shot per turn — does not re-fire as silence continues', () => {
894
+ const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
895
+ startTurn('k', 0)
896
+ __tickForTests(60_000)
897
+ __tickForTests(120_000)
898
+ __tickForTests(180_000)
899
+ expect(fx.awarenessPings.length).toBe(1)
900
+ })
901
+
902
+ it('is suppressed by an early outbound', () => {
903
+ const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
904
+ startTurn('k', 0)
905
+ noteOutbound('k', 30_000)
906
+ __tickForTests(90_000)
907
+ expect(fx.awarenessPings.length).toBe(0)
908
+ })
909
+
910
+ it('is suppressed when subagentDispatchActive is true', () => {
911
+ // Sub-agent dispatch already widens soft to 300s; the awareness-ping
912
+ // should also defer so we don't pre-empt the sub-agent's natural
913
+ // progress signal.
914
+ const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
915
+ startTurn('k', 0)
916
+ noteSubagentDispatch('k')
917
+ __tickForTests(120_000)
918
+ expect(fx.awarenessPings.length).toBe(0)
919
+ })
920
+
921
+ it('does NOT advance the soft/firm/fallback ladder', () => {
922
+ // Awareness ping is a sibling signal; soft/firm/fallback continue
923
+ // to escalate on their own schedule (and the model-targeted ack-poke
924
+ // similarly remains independent).
925
+ const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
926
+ startTurn('k', 0)
927
+ __tickForTests(60_000) // awareness fires
928
+ __tickForTests(75_000) // soft fires
929
+ __tickForTests(180_000) // firm fires
930
+ __tickForTests(300_000) // fallback fires
931
+ expect(fx.awarenessPings.length).toBe(1)
932
+ expect(fx.fallbacks.length).toBe(1)
933
+ expect(fx.emitted.filter(e => e.kind === 'silence_poke_fired').map(e => (e as { level: string }).level))
934
+ .toEqual(['soft', 'firm'])
935
+ expect(fx.emitted.some(e => e.kind === 'silence_fallback_sent')).toBe(true)
936
+ })
937
+
938
+ it('carries fallbackKind=thinking when a recent thinking event landed', () => {
939
+ const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
940
+ startTurn('k', 0)
941
+ noteThinking('k', 45_000)
942
+ __tickForTests(60_000)
943
+ expect(fx.awarenessPings.length).toBe(1)
944
+ expect(fx.awarenessPings[0]!.fallbackKind).toBe('thinking')
945
+ })
946
+
947
+ it('does not fire if turn ends before the threshold', () => {
948
+ const fx = setupDeps({ thresholds: { awarenessPing: 60_000 } })
949
+ startTurn('k', 0)
950
+ endTurn('k')
951
+ __tickForTests(120_000)
952
+ expect(fx.awarenessPings.length).toBe(0)
953
+ })
954
+
955
+ it('handler errors do not break the timer', () => {
956
+ const fx: TestFixtures = { emitted: [], fallbacks: [], awarenessPings: [] }
957
+ __setDepsForTests({
958
+ emitMetric: (e) => fx.emitted.push(e),
959
+ onFrameworkFallback: () => {},
960
+ onAwarenessPing: () => { throw new Error('awareness handler boom') },
961
+ thresholdsMs: {
962
+ ...DEFAULT_THRESHOLDS,
963
+ ack: Number.MAX_SAFE_INTEGER,
964
+ awarenessPing: 60_000,
965
+ },
966
+ })
967
+ startTurn('k', 0)
968
+ expect(() => __tickForTests(60_000)).not.toThrow()
969
+ // Telemetry still emitted
970
+ expect(fx.emitted.some(e => e.kind === 'awareness_ping_sent')).toBe(true)
971
+ })
972
+ })
@@ -0,0 +1,128 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import { deriveIntentSurface } from "../tool-intent-surface.js";
3
+
4
+ describe("deriveIntentSurface — gateway lifts model's tool intent into framework-voice status", () => {
5
+ describe("tool-class verb mapping", () => {
6
+ it("Bash → running", () => {
7
+ const out = deriveIntentSurface("Bash", { command: "ls -la /var/log" });
8
+ expect(out.text).toContain("<i>running:</i>");
9
+ expect(out.text).toContain("ls -la /var/log");
10
+ });
11
+
12
+ it("WebSearch → searching", () => {
13
+ const out = deriveIntentSurface("WebSearch", { query: "Victoria drink driving" });
14
+ expect(out.text).toContain("<i>searching:</i>");
15
+ expect(out.text).toContain("Victoria drink driving");
16
+ });
17
+
18
+ it("WebFetch → fetching (hostname extracted)", () => {
19
+ const out = deriveIntentSurface("WebFetch", { url: "https://example.com/a/b" });
20
+ expect(out.text).toContain("<i>fetching:</i>");
21
+ expect(out.text).toContain("example.com");
22
+ });
23
+
24
+ it("Read → reading (basename only)", () => {
25
+ const out = deriveIntentSurface("Read", { file_path: "/etc/os-release" });
26
+ expect(out.text).toContain("<i>reading:</i>");
27
+ expect(out.text).toContain("os-release");
28
+ expect(out.text).not.toContain("/etc/");
29
+ });
30
+
31
+ it("Write → writing", () => {
32
+ const out = deriveIntentSurface("Write", { file_path: "/tmp/hello.sh" });
33
+ expect(out.text).toContain("<i>writing:</i>");
34
+ expect(out.text).toContain("hello.sh");
35
+ });
36
+
37
+ it("Edit / MultiEdit / NotebookEdit → editing", () => {
38
+ for (const t of ["Edit", "MultiEdit", "NotebookEdit"]) {
39
+ expect(
40
+ deriveIntentSurface(t, { file_path: "/a/foo.ts" }).text,
41
+ ).toContain("<i>editing:</i>");
42
+ }
43
+ });
44
+
45
+ it("Grep / Glob → searching", () => {
46
+ expect(
47
+ deriveIntentSurface("Grep", { pattern: "TODO", path: "src/" }).text,
48
+ ).toContain("<i>searching:</i>");
49
+ expect(
50
+ deriveIntentSurface("Glob", { pattern: "**/*.ts" }).text,
51
+ ).toContain("<i>searching:</i>");
52
+ });
53
+
54
+ it("Task / Agent → dispatching", () => {
55
+ expect(
56
+ deriveIntentSurface("Task", { description: "review the auth code" }).text,
57
+ ).toContain("<i>dispatching:</i>");
58
+ });
59
+ });
60
+
61
+ describe("user-facing tools stay quiet (never re-surfaced)", () => {
62
+ const surfaceTools = [
63
+ "mcp__switchroom-telegram__reply",
64
+ "mcp__switchroom-telegram__stream_reply",
65
+ "mcp__switchroom-telegram__edit_message",
66
+ "mcp__switchroom-telegram__react",
67
+ "mcp__switchroom-telegram__send_typing",
68
+ "mcp__switchroom-telegram__progress_update",
69
+ ];
70
+ for (const tool of surfaceTools) {
71
+ it(`returns null for ${tool}`, () => {
72
+ expect(
73
+ deriveIntentSurface(tool, { text: "hi", chat_id: "1" }).text,
74
+ ).toBeNull();
75
+ });
76
+ }
77
+ });
78
+
79
+ describe("unknown MCP tools", () => {
80
+ it("uses 'using <tool>' for unknown MCP tool servers", () => {
81
+ const out = deriveIntentSurface(
82
+ "mcp__google-workspace__list_drive_files",
83
+ { folderId: "abc" },
84
+ );
85
+ expect(out.text).toMatch(/<i>using list[ _]drive[ _]files:?<\/i>/);
86
+ });
87
+
88
+ it("falls back gracefully when input has no recognisable label field", () => {
89
+ const out = deriveIntentSurface("Bash", { weird: "no-command-here" });
90
+ // No label resolved → verb-only output
91
+ expect(out.text).toBe("<i>running</i>");
92
+ });
93
+ });
94
+
95
+ describe("privacy / safety", () => {
96
+ it("escapes HTML in the label so a malicious input can't inject markup", () => {
97
+ const out = deriveIntentSurface("Bash", {
98
+ command: "echo '<script>alert(1)</script>'",
99
+ });
100
+ expect(out.text).not.toContain("<script>");
101
+ expect(out.text).toContain("&lt;script&gt;");
102
+ });
103
+
104
+ it("truncates long labels to keep the surface message tight", () => {
105
+ const longCmd = "echo " + "x".repeat(500);
106
+ const out = deriveIntentSurface("Bash", { command: longCmd });
107
+ // toolLabel already truncates Bash to 40 chars; safety cap then
108
+ // bounds anything else to MAX_LABEL_LEN.
109
+ expect((out.text ?? "").length).toBeLessThan(200);
110
+ });
111
+
112
+ it("returns null when toolName is empty (defensive)", () => {
113
+ expect(deriveIntentSurface("", { command: "x" }).text).toBeNull();
114
+ });
115
+ });
116
+
117
+ describe("precomputed label precedence", () => {
118
+ it("uses precomputed label when present (matches toolLabel's contract)", () => {
119
+ const out = deriveIntentSurface(
120
+ "Bash",
121
+ { command: "ls" },
122
+ "checking the logs",
123
+ );
124
+ expect(out.text).toContain("<i>running:</i>");
125
+ expect(out.text).toContain("checking the logs");
126
+ });
127
+ });
128
+ });
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Tool-intent surface — lifts the model's already-formed `tool_use`
3
+ * intent (tool name + input) into a brief user-visible Telegram
4
+ * message when the model goes to work without first calling reply.
5
+ *
6
+ * Companion to the PreToolUse ack-first gate (#1921). The gate forces
7
+ * the model to author a brief acknowledgement via the reply tool
8
+ * before any other tool runs. THIS surface is the lower-overhead
9
+ * sibling: when the model's own `tool_use` stream already carries the
10
+ * intent (e.g. `Bash {command: "ls -la /var/log"}`), the gateway can
11
+ * pass that intent through as the user-visible "we're alive and this
12
+ * is what we're doing" beat, without the model having to call any
13
+ * extra tool.
14
+ *
15
+ * Why both. The gate produces MODEL-VOICE acks ("on it — checking the
16
+ * logs") — warmer, persona-driven. The surface produces FRAMEWORK-
17
+ * VOICE pass-throughs ("_running:_ ls -la /var/log") — honest and
18
+ * cheaper. They compose: if the gate fires, the model authors an ack
19
+ * which lands first; the surface stays quiet (already-acked). If the
20
+ * gate fails (kill-switched / regression / hook spawn failure), the
21
+ * surface still lands — defence in depth.
22
+ *
23
+ * Output format: italicised framework verb + colon + the model's own
24
+ * `toolLabel()` output. Italics are the conventional "framework
25
+ * narrating, not the model speaking" marker; the verb signals which
26
+ * lane the work is in. Length capped at ~140 chars by `toolLabel()`
27
+ * already; nothing more is added on top.
28
+ *
29
+ * Privacy posture. The model's `tool_use.input` may contain user-
30
+ * provided strings (web search queries, file paths the user named).
31
+ * Those are already going to land in chat history one way or another
32
+ * (e.g. via the model's reply describing what it did), so surfacing
33
+ * a brief label here doesn't expand the leakage surface materially.
34
+ * `toolLabel()` already truncates and HTML-escapes its output via
35
+ * the renderer.
36
+ */
37
+
38
+ import { toolLabel } from "./tool-labels.js";
39
+
40
+ const MAX_LABEL_LEN = 140;
41
+
42
+ /**
43
+ * Compute the user-facing "framework verb" for a tool. Verbs match
44
+ * the action class so the user reads "running" for Bash, "searching"
45
+ * for WebSearch, etc. Tools without a friendly verb fall back to
46
+ * `using <ToolName>` — better than blanking out.
47
+ */
48
+ function frameworkVerbFor(toolName: string): string {
49
+ // Strip "mcp__<server>__" prefix to match suffixes consistently.
50
+ // Most MCP tools surface as `mcp__<server>__<tool>` in the stream.
51
+ const m = /^mcp__[^_]+__(.+)$/.exec(toolName);
52
+ const suffix = (m ? m[1] : toolName).toLowerCase();
53
+
54
+ switch (suffix) {
55
+ case "bash":
56
+ case "bashoutput":
57
+ case "killshell":
58
+ return "running";
59
+ case "websearch":
60
+ case "grep":
61
+ case "glob":
62
+ return "searching";
63
+ case "webfetch":
64
+ return "fetching";
65
+ case "read":
66
+ return "reading";
67
+ case "write":
68
+ return "writing";
69
+ case "edit":
70
+ case "multiedit":
71
+ case "notebookedit":
72
+ return "editing";
73
+ case "todowrite":
74
+ case "todoread":
75
+ return "noting";
76
+ case "task":
77
+ case "agent":
78
+ return "dispatching";
79
+ case "toolsearch":
80
+ return "loading tools";
81
+ default:
82
+ // For unknown / MCP tools, prefer a short generic — "using gdrive"
83
+ // is more honest than guessing.
84
+ if (m) return `using ${m[1].replace(/_/g, " ")}`;
85
+ return `using ${toolName}`;
86
+ }
87
+ }
88
+
89
+ /** A tool that surfaces in the chat itself (reply / stream_reply / etc.)
90
+ * — these tools ARE the user surface, so the gateway never re-surfaces
91
+ * them. Mirrors `isTelegramSurfaceTool` in `tool-names.ts`. */
92
+ function isUserFacingTool(toolName: string): boolean {
93
+ const m = /^mcp__switchroom-telegram__(.+)$/.exec(toolName);
94
+ const suffix = m ? m[1] : toolName;
95
+ return (
96
+ suffix === "reply" ||
97
+ suffix === "stream_reply" ||
98
+ suffix === "edit_message" ||
99
+ suffix === "react" ||
100
+ suffix === "send_typing" ||
101
+ suffix === "pin_message" ||
102
+ suffix === "delete_message" ||
103
+ suffix === "forward_message" ||
104
+ suffix === "download_attachment" ||
105
+ suffix === "get_recent_messages" ||
106
+ suffix === "progress_update"
107
+ );
108
+ }
109
+
110
+ export interface SurfaceTextResult {
111
+ /** Final HTML text the gateway sends to Telegram, or null when the
112
+ * surface should NOT fire (tool is user-facing, label is empty, etc.) */
113
+ text: string | null;
114
+ }
115
+
116
+ /**
117
+ * Pure decision: given a tool name + input + optional precomputed label
118
+ * (from the existing PreToolUse label hook), return the HTML the
119
+ * gateway should send, or null to stay quiet.
120
+ *
121
+ * Exposed for unit tests; the gateway wires this into the `tool_use`
122
+ * session-event handler.
123
+ */
124
+ export function deriveIntentSurface(
125
+ toolName: string,
126
+ toolInput: Record<string, unknown> | undefined,
127
+ precomputedLabel?: string,
128
+ ): SurfaceTextResult {
129
+ if (!toolName) return { text: null };
130
+ if (isUserFacingTool(toolName)) return { text: null };
131
+
132
+ const label = toolLabel(toolName, toolInput, undefined, precomputedLabel);
133
+ if (!label || !label.trim()) {
134
+ // No label available for this tool/input shape — fall back to just
135
+ // the verb so the user at least sees "_running_" rather than
136
+ // nothing. Keeps the beat reliable on weird inputs.
137
+ return {
138
+ text: `<i>${escapeHtml(frameworkVerbFor(toolName))}</i>`,
139
+ };
140
+ }
141
+
142
+ const verb = frameworkVerbFor(toolName);
143
+ // `toolLabel()` may include backticks / quotes — let those through
144
+ // (Telegram HTML doesn't choke on them) but escape any stray inline
145
+ // HTML markers so a malicious or odd input can't inject markup.
146
+ const safeLabel = escapeHtml(label).slice(0, MAX_LABEL_LEN);
147
+ return { text: `<i>${escapeHtml(verb)}:</i> ${safeLabel}` };
148
+ }
149
+
150
+ function escapeHtml(s: string): string {
151
+ return s
152
+ .replace(/&/g, "&amp;")
153
+ .replace(/</g, "&lt;")
154
+ .replace(/>/g, "&gt;");
155
+ }