switchroom 0.14.8 → 0.14.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,5 @@
1
1
  import { describe, it, expect } from "vitest";
2
2
  import {
3
- makeEmptyActivityState,
4
- register,
5
- formatSummary,
6
- registerAndRender,
7
- verbForTool,
8
3
  describeToolUse,
9
4
  appendActivityLine,
10
5
  appendActivityLabel,
@@ -77,217 +72,6 @@ describe("describeToolUse — friendly per-tool rendering (draft-mirror)", () =>
77
72
  });
78
73
  });
79
74
 
80
- describe("verbForTool — tool name → past-tense verb", () => {
81
- it("maps standard CLI tools to readable verbs", () => {
82
- expect(verbForTool("Read")).toBe("read");
83
- expect(verbForTool("Write")).toBe("created");
84
- expect(verbForTool("Edit")).toBe("edited");
85
- expect(verbForTool("MultiEdit")).toBe("edited");
86
- expect(verbForTool("NotebookEdit")).toBe("edited");
87
- expect(verbForTool("Bash")).toBe("ran");
88
- expect(verbForTool("BashOutput")).toBe("ran");
89
- expect(verbForTool("WebSearch")).toBe("searched");
90
- expect(verbForTool("Grep")).toBe("searched");
91
- expect(verbForTool("Glob")).toBe("searched");
92
- expect(verbForTool("WebFetch")).toBe("fetched");
93
- expect(verbForTool("Task")).toBe("dispatched");
94
- expect(verbForTool("Agent")).toBe("dispatched");
95
- expect(verbForTool("TodoWrite")).toBe("noted");
96
- });
97
-
98
- it("skips user-facing switchroom-telegram tools (those ARE the surface)", () => {
99
- expect(verbForTool("mcp__switchroom-telegram__reply")).toBeNull();
100
- expect(verbForTool("mcp__switchroom-telegram__stream_reply")).toBeNull();
101
- expect(verbForTool("mcp__switchroom-telegram__edit_message")).toBeNull();
102
- expect(verbForTool("mcp__switchroom-telegram__react")).toBeNull();
103
- });
104
-
105
- it("maps recognised MCP tools (hindsight, google-workspace, notion) to specific verbs", () => {
106
- // hindsight: recall/reflect → searched, retain/update_memory → saved
107
- expect(verbForTool("mcp__hindsight__reflect")).toBe("searched");
108
- expect(verbForTool("mcp__hindsight__recall")).toBe("searched");
109
- expect(verbForTool("mcp__hindsight__retain")).toBe("saved");
110
- expect(verbForTool("mcp__hindsight__update_memory")).toBe("saved");
111
- // google-workspace / claude.ai variants: read-shaped → searched, write-shaped → edited
112
- expect(verbForTool("mcp__google-workspace__list_files")).toBe("searched");
113
- expect(verbForTool("mcp__claude_ai_Gmail__search_messages")).toBe("searched");
114
- expect(verbForTool("mcp__google-workspace__create_file")).toBe("edited");
115
- expect(verbForTool("mcp__claude_ai_Google_Drive__download_file_content")).toBe("searched");
116
- // notion: query/get → searched, create/update → edited
117
- expect(verbForTool("mcp__notion__query_database")).toBe("searched");
118
- expect(verbForTool("mcp__claude_ai_Notion__notion-search")).toBe("searched");
119
- expect(verbForTool("mcp__claude_ai_Notion__notion-update-page")).toBe("edited");
120
- });
121
-
122
- it("returns 'used' for genuinely unknown MCP / future tools (generic fallback)", () => {
123
- expect(verbForTool("mcp__random-third-party__do_thing")).toBe("used");
124
- expect(verbForTool("SomeFutureUnknownTool")).toBe("used");
125
- });
126
-
127
- it("returns null for empty toolName (defensive)", () => {
128
- expect(verbForTool("")).toBeNull();
129
- });
130
- });
131
-
132
- describe("register + formatSummary — Claude Code-style summary", () => {
133
- it("formats a single Read as 'Read a file'", () => {
134
- const s = makeEmptyActivityState();
135
- register(s, "Read");
136
- expect(formatSummary(s)).toBe("Read a file");
137
- });
138
-
139
- it("formats multiple Reads as 'Read N files'", () => {
140
- const s = makeEmptyActivityState();
141
- register(s, "Read");
142
- register(s, "Read");
143
- register(s, "Read");
144
- expect(formatSummary(s)).toBe("Read 3 files");
145
- });
146
-
147
- it("formats single Bash as 'Ran a command'", () => {
148
- const s = makeEmptyActivityState();
149
- register(s, "Bash");
150
- expect(formatSummary(s)).toBe("Ran a command");
151
- });
152
-
153
- it("formats multiple Bash as 'Ran N commands'", () => {
154
- const s = makeEmptyActivityState();
155
- for (let i = 0; i < 5; i++) register(s, "Bash");
156
- expect(formatSummary(s)).toBe("Ran 5 commands");
157
- });
158
-
159
- it("joins multiple verb-classes with commas (first-occurrence order)", () => {
160
- const s = makeEmptyActivityState();
161
- // Tools fire in this order: Read → Bash → Edit
162
- register(s, "Read");
163
- register(s, "Bash");
164
- register(s, "Edit");
165
- // The summary renders chronologically: read, ran, edited.
166
- expect(formatSummary(s)).toBe("Read a file, ran a command, edited a file");
167
- });
168
-
169
- it("matches the Claude Code screenshot examples", () => {
170
- // "Ran 5 commands, read a file"
171
- const s1 = makeEmptyActivityState();
172
- for (let i = 0; i < 5; i++) register(s1, "Bash");
173
- register(s1, "Read");
174
- expect(formatSummary(s1)).toBe("Ran 5 commands, read a file");
175
-
176
- // "Edited a file, read a file, ran a command"
177
- const s2 = makeEmptyActivityState();
178
- register(s2, "Edit");
179
- register(s2, "Read");
180
- register(s2, "Bash");
181
- expect(formatSummary(s2)).toBe("Edited a file, read a file, ran a command");
182
-
183
- // "Created a file, ran a command"
184
- const s3 = makeEmptyActivityState();
185
- register(s3, "Write");
186
- register(s3, "Bash");
187
- expect(formatSummary(s3)).toBe("Created a file, ran a command");
188
- });
189
-
190
- it("returns null when state is empty", () => {
191
- expect(formatSummary(makeEmptyActivityState())).toBeNull();
192
- });
193
-
194
- it("ignores user-facing tools (reply/stream_reply etc.)", () => {
195
- const s = makeEmptyActivityState();
196
- register(s, "mcp__switchroom-telegram__reply");
197
- register(s, "mcp__switchroom-telegram__stream_reply");
198
- expect(formatSummary(s)).toBeNull(); // nothing tracked
199
- });
200
-
201
- it("includes generic 'used' for genuinely-unknown MCP tools (fallback)", () => {
202
- const s = makeEmptyActivityState();
203
- register(s, "mcp__random-third-party__do_thing");
204
- expect(formatSummary(s)).toBe("Used a tool");
205
- register(s, "mcp__another-unknown-server__something_else");
206
- expect(formatSummary(s)).toBe("Used 2 tools");
207
- });
208
-
209
- it("maps recognised MCP tools to natural-language summaries (no generic 'Used N tools')", () => {
210
- // hindsight search shows up as 'searched' (memory)
211
- const s = makeEmptyActivityState();
212
- register(s, "mcp__hindsight__reflect");
213
- expect(formatSummary(s)).toBe("Ran a search");
214
- register(s, "mcp__hindsight__reflect");
215
- expect(formatSummary(s)).toBe("Ran 2 searches");
216
- // hindsight retain shows up as 'saved a memory'
217
- register(s, "mcp__hindsight__retain");
218
- expect(formatSummary(s)).toBe("Ran 2 searches, saved a memory");
219
- });
220
-
221
- it("tracks firstToolName for forensic / telemetry use", () => {
222
- const s = makeEmptyActivityState();
223
- register(s, "Read");
224
- register(s, "Bash");
225
- expect(s.firstToolName).toBe("Read");
226
- });
227
- });
228
-
229
- describe("parallel-tool-use coalescing — render only reflects accumulated state", () => {
230
- it("synchronous burst of N tool_uses produces the right summary at each step", () => {
231
- // Modern Claude emits parallel tool_uses in a tight sync loop. The
232
- // gateway calls register() N times before any async drain runs.
233
- // After N registers, the rendered string should reflect ALL of them
234
- // — so when the drain fires once with the latest pendingRender, the
235
- // sent text is correct and complete.
236
- const s = makeEmptyActivityState();
237
- register(s, "Read");
238
- register(s, "Read");
239
- register(s, "Read");
240
- register(s, "Bash");
241
- register(s, "Bash");
242
- expect(formatSummary(s)).toBe("Read 3 files, ran 2 commands");
243
- });
244
-
245
- it("ordering is preserved across a chronological burst", () => {
246
- const s = makeEmptyActivityState();
247
- // Simulates: Bash, then Read, then Bash, then Read, then Edit
248
- register(s, "Bash");
249
- register(s, "Read");
250
- register(s, "Bash");
251
- register(s, "Read");
252
- register(s, "Edit");
253
- // Bash was first, then Read, then Edit. Counts: bash 2, read 2, edit 1.
254
- expect(formatSummary(s)).toBe(
255
- "Ran 2 commands, read 2 files, edited a file",
256
- );
257
- });
258
-
259
- it("registerAndRender returns null on user-facing tools (no race contribution)", () => {
260
- const s = makeEmptyActivityState();
261
- register(s, "Read");
262
- // A reply tool fires concurrently — should not enter the activity state.
263
- expect(
264
- registerAndRender(s, "mcp__switchroom-telegram__reply"),
265
- ).toBeNull();
266
- // State still reflects only the Read.
267
- expect(formatSummary(s)).toBe("Read a file");
268
- });
269
- });
270
-
271
- describe("registerAndRender — ergonomic full-pipeline call", () => {
272
- it("returns the updated rendered text on a real tool (chronological)", () => {
273
- const s = makeEmptyActivityState();
274
- expect(registerAndRender(s, "Read")).toBe("Read a file");
275
- // Bash fires AFTER Read — chronological order shows read first.
276
- expect(registerAndRender(s, "Bash")).toBe(
277
- "Read a file, ran a command",
278
- );
279
- });
280
-
281
- it("returns null on a surface tool (no-op)", () => {
282
- const s = makeEmptyActivityState();
283
- expect(
284
- registerAndRender(s, "mcp__switchroom-telegram__reply"),
285
- ).toBeNull();
286
- // State unchanged
287
- expect(s.firstToolName).toBeNull();
288
- });
289
- });
290
-
291
75
  describe("appendActivityLine + renderActivityFeed — accumulating activity feed", () => {
292
76
  it("accumulates distinct actions chronologically (newest = current → bold, earlier = done ✓ italic)", () => {
293
77
  const lines: string[] = [];
@@ -1,205 +1,26 @@
1
1
  /**
2
- * Tool-activity summary — Claude Code-style natural-language progress
3
- * line that batches tool_use events for a turn into a single Telegram
4
- * message that updates in place.
2
+ * Tool-activity feeda Claude-Code-style live list of what the agent
3
+ * is doing this turn, rendered into ONE Telegram message that edits in
4
+ * place and clears the moment the model's real reply lands.
5
5
  *
6
- * Replaces the per-tool intent surface (#1924). The screenshot from
7
- * Claude Code's own UI shows lines like:
6
+ * Each non-surface tool gets a human-friendly, present-tense line
7
+ * ("Reading CLAUDE.md", "Searching memory", "Running a command"); the
8
+ * feed renders them chronologically (oldest first, newest = the
9
+ * in-progress step), consecutive duplicates collapsed, capped to the
10
+ * most recent MIRROR_MAX_LINES with a "+N earlier" header.
8
11
  *
9
- * "Ran 5 commands, read a file"
10
- * "Edited a file, read a file, ran a command"
11
- *
12
- * Past tense, comma-joined, singular/plural-aware. One message per
13
- * "phase" (turn start first reply), progressively edited as tools
14
- * accumulate. NOT raw tool calls — descriptions of what the agent has
15
- * been doing.
16
- *
17
- * Why this beats per-tool labels:
18
- * - One Telegram message per phase (low signal-to-noise vs N
19
- * messages on a heavy turn)
20
- * - The user sees ACCUMULATED work in a glanceable form, not a flood
21
- * - Plays nicely with the existing answer-lane stream that handles
22
- * the actual reply text
23
- *
24
- * Tracking shape: per-turn counters keyed by `verb` (the action class
25
- * derived from tool name). One counter per verb so the summary line
26
- * collapses neatly regardless of which specific Read/Bash/WebSearch
27
- * the model chose. `register()` increments the counter; `formatSummary()`
28
- * renders the current state.
12
+ * Two append entrypoints feed the same `lines: string[]` accumulator:
13
+ * - `appendActivityLabel` for a pre-computed label from the
14
+ * real-time PreToolUse sidecar (`tool_label` event). This is the
15
+ * gateway's live driver: it fires at tool-call time regardless of
16
+ * when claude flushes the transcript, so it stays deterministic on
17
+ * fast/clustered-tool turns.
18
+ * - `appendActivityLine` — derives the label from a tool_use's name +
19
+ * input via `describeToolUse` (used where the raw tool_use is the
20
+ * only signal available).
29
21
  */
30
22
 
31
- const READ_VERBS = new Set(["read"]);
32
- const WRITE_VERBS = new Set(["wrote", "created", "edited"]);
33
-
34
- export type ActivityVerb =
35
- | "read"
36
- | "edited"
37
- | "created"
38
- | "ran"
39
- | "searched"
40
- | "fetched"
41
- | "dispatched"
42
- | "noted"
43
- | "saved" // memory-retain class (hindsight, etc.) — distinct from "noted" (TodoWrite)
44
- | "used"; // generic fallback
45
-
46
- /** Object form so `register()` can mutate; pure functions inside the
47
- * module work against this shape (easier to unit-test than a Map). */
48
- export interface ActivityState {
49
- counts: Partial<Record<ActivityVerb, number>>;
50
- /** Order verbs were first observed this turn. The summary renders in
51
- * this order so the line reads as a chronological natural-language
52
- * account: "edited a file, read a file, ran a command" matches the
53
- * agent's actual sequence of actions. Stable — once a verb is added
54
- * to this list, it never moves. */
55
- order: ActivityVerb[];
56
- /** First non-trivial tool name observed this turn (for telemetry / future
57
- * "what kicked this off" forensic). Not used in the rendered summary. */
58
- firstToolName: string | null;
59
- }
60
-
61
- export function makeEmptyActivityState(): ActivityState {
62
- return { counts: {}, order: [], firstToolName: null };
63
- }
64
-
65
- /** Map a tool name → verb. Mirrors the existing `tool-intent-surface.ts`
66
- * verb table but in past tense. Tools that don't map (or surface tools
67
- * like reply/stream_reply) return null — the caller skips them. */
68
- export function verbForTool(toolName: string): ActivityVerb | null {
69
- if (!toolName) return null;
70
- // Lazy match on the server segment so names containing underscores
71
- // (e.g. `mcp__claude_ai_Gmail__search`) parse as
72
- // server="claude_ai_Gmail", tool="search"
73
- // instead of the prior `[^_]+` which stopped at the first inner `_`.
74
- const mcpMatch = /^mcp__(.+?)__(.+)$/.exec(toolName);
75
- // Skip user-facing Telegram-plugin tools entirely — those ARE the
76
- // surface, never to be summarised.
77
- if (mcpMatch && mcpMatch[1] === "switchroom-telegram") return null;
78
-
79
- // MCP allowlist — map common MCP tools to specific verbs so the summary
80
- // reads as "Searched memory" or "Read 2 files" instead of the generic
81
- // fallback "Used 2 tools". Tools NOT on this list fall through to the
82
- // generic "used" verb, which is still better than nothing for one-offs
83
- // but hurts on heavy MCP turns. Mirrors the label table in
84
- // `telegram-plugin/hooks/tool-label-pretool.mjs` — keep them in sync.
85
- if (mcpMatch) {
86
- // Case-insensitive match — claude.ai prefixes use mixed-case
87
- // server names ("claude_ai_Gmail", "claude_ai_Google_Drive") so we
88
- // lowercase both sides before comparing.
89
- const server = mcpMatch[1].toLowerCase();
90
- const mcpTool = mcpMatch[2].toLowerCase();
91
- if (server === "hindsight") {
92
- if (mcpTool === "recall" || mcpTool === "reflect") return "searched";
93
- if (mcpTool === "retain" || mcpTool === "update_memory" || mcpTool === "sync_retain") return "saved";
94
- }
95
- if (server === "google-workspace" || server === "claude_ai_google_drive" || server === "claude_ai_gmail" || server === "claude_ai_google_calendar") {
96
- if (/^(search|list|query|read|get|fetch|download)/i.test(mcpTool)) return "searched";
97
- if (/^(create|update|write|send|move|copy|duplicate)/i.test(mcpTool)) return "edited";
98
- }
99
- if (server === "notion" || server === "claude_ai_notion") {
100
- // claude.ai Notion exposes tools as `notion-search`, `notion-update-page`,
101
- // etc. Strip the redundant `notion-` prefix before matching the verb.
102
- const action = mcpTool.replace(/^notion-/, "");
103
- if (/^(search|fetch|query|get|read)/i.test(action)) return "searched";
104
- if (/^(create|update|move|duplicate|comment)/i.test(action)) return "edited";
105
- }
106
- }
107
-
108
- const suffix = (mcpMatch ? mcpMatch[2] : toolName).toLowerCase();
109
- switch (suffix) {
110
- case "read":
111
- return "read";
112
- case "write":
113
- return "created";
114
- case "edit":
115
- case "multiedit":
116
- case "notebookedit":
117
- return "edited";
118
- case "bash":
119
- case "bashoutput":
120
- case "killshell":
121
- return "ran";
122
- case "websearch":
123
- case "grep":
124
- case "glob":
125
- return "searched";
126
- case "webfetch":
127
- return "fetched";
128
- case "task":
129
- case "agent":
130
- return "dispatched";
131
- case "todowrite":
132
- case "todoread":
133
- return "noted";
134
- default:
135
- return "used";
136
- }
137
- }
138
-
139
- /** Mutates `state` to record one tool_use of `toolName`. Returns true
140
- * iff the activity state changed (so the caller knows to refresh the
141
- * rendered summary). */
142
- export function register(state: ActivityState, toolName: string): boolean {
143
- const verb = verbForTool(toolName);
144
- if (!verb) return false;
145
- if (state.firstToolName == null) state.firstToolName = toolName;
146
- const prior = state.counts[verb] ?? 0;
147
- if (prior === 0) state.order.push(verb);
148
- state.counts[verb] = prior + 1;
149
- return true;
150
- }
151
-
152
- interface VerbPhrase {
153
- singular: string;
154
- plural: string;
155
- }
156
-
157
- const VERB_PHRASE: Record<ActivityVerb, VerbPhrase> = {
158
- read: { singular: "read a file", plural: "read $N files" },
159
- edited: { singular: "edited a file", plural: "edited $N files" },
160
- created: { singular: "created a file", plural: "created $N files" },
161
- ran: { singular: "ran a command", plural: "ran $N commands" },
162
- searched: { singular: "ran a search", plural: "ran $N searches" },
163
- fetched: { singular: "fetched a URL", plural: "fetched $N URLs" },
164
- dispatched: { singular: "dispatched a sub-agent", plural: "dispatched $N sub-agents" },
165
- noted: { singular: "updated the todo list", plural: "updated the todo list ($N edits)" },
166
- saved: { singular: "saved a memory", plural: "saved $N memories" },
167
- used: { singular: "used a tool", plural: "used $N tools" },
168
- };
169
-
170
- /** Render the activity state as a single natural-language line.
171
- * Verbs are rendered in `state.order` — first-occurrence order — so
172
- * the line reads chronologically ("edited a file, read a file, ran
173
- * a command" mirrors the agent's actual action sequence). Returns
174
- * null when the state is empty (nothing to show yet). */
175
- export function formatSummary(state: ActivityState): string | null {
176
- const phrases: string[] = [];
177
- for (const verb of state.order) {
178
- const n = state.counts[verb] ?? 0;
179
- if (n <= 0) continue;
180
- const p = VERB_PHRASE[verb];
181
- phrases.push(n === 1 ? p.singular : p.plural.replace("$N", String(n)));
182
- }
183
- if (phrases.length === 0) return null;
184
- // Capitalize first letter so the sentence reads as a statement.
185
- const sentence = phrases.join(", ");
186
- return sentence.charAt(0).toUpperCase() + sentence.slice(1);
187
- }
188
-
189
- /** Convenience: ergonomic full pipeline for callers that just want
190
- * "given the new tool name and prior state, give me the updated rendered
191
- * text or null if nothing changed". Returns null when the tool is a
192
- * surface tool / no-op (so the caller can skip the Telegram edit). */
193
- export function registerAndRender(
194
- state: ActivityState,
195
- toolName: string,
196
- ): string | null {
197
- const changed = register(state, toolName);
198
- if (!changed) return null;
199
- return formatSummary(state);
200
- }
201
-
202
- // ─── Friendly per-tool rendering (draft-mirror, RFC draft-mirror-preview) ───
23
+ // ─── Friendly per-tool rendering ────────────────────────────────────────────
203
24
  //
204
25
  // Claude Code's own UI reads human-friendly because the model AUTHORS the
205
26
  // descriptive text inside each tool_use.input — verified against a real