npm - switchroom - Versions diffs - 0.13.57 → 0.13.59 - Mend

switchroom 0.13.57 → 0.13.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/cli/switchroom.js +451 -343
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +611 -759
package/telegram-plugin/gateway/gateway.ts +191 -92
package/telegram-plugin/tests/tool-activity-summary.test.ts +191 -0
package/telegram-plugin/tool-activity-summary.ts +164 -0
package/telegram-plugin/tests/tool-intent-surface.test.ts +0 -128
package/telegram-plugin/tool-intent-surface.ts +0 -155

package/telegram-plugin/tool-activity-summary.ts ADDED Viewed

@@ -0,0 +1,164 @@
+/**
+ * Tool-activity summary — Claude Code-style natural-language progress
+ * line that batches tool_use events for a turn into a single Telegram
+ * message that updates in place.
+ *
+ * Replaces the per-tool intent surface (#1924). The screenshot from
+ * Claude Code's own UI shows lines like:
+ *
+ *   "Ran 5 commands, read a file"
+ *   "Edited a file, read a file, ran a command"
+ *
+ * Past tense, comma-joined, singular/plural-aware. One message per
+ * "phase" (turn start → first reply), progressively edited as tools
+ * accumulate. NOT raw tool calls — descriptions of what the agent has
+ * been doing.
+ *
+ * Why this beats per-tool labels:
+ *   - One Telegram message per phase (low signal-to-noise vs N
+ *     messages on a heavy turn)
+ *   - The user sees ACCUMULATED work in a glanceable form, not a flood
+ *   - Plays nicely with the existing answer-lane stream that handles
+ *     the actual reply text
+ *
+ * Tracking shape: per-turn counters keyed by `verb` (the action class
+ * derived from tool name). One counter per verb so the summary line
+ * collapses neatly regardless of which specific Read/Bash/WebSearch
+ * the model chose. `register()` increments the counter; `formatSummary()`
+ * renders the current state.
+ */
+const READ_VERBS = new Set(["read"]);
+const WRITE_VERBS = new Set(["wrote", "created", "edited"]);
+export type ActivityVerb =
+  | "read"
+  | "edited"
+  | "created"
+  | "ran"
+  | "searched"
+  | "fetched"
+  | "dispatched"
+  | "noted"
+  | "used"; // generic fallback
+/** Object form so `register()` can mutate; pure functions inside the
+ *  module work against this shape (easier to unit-test than a Map). */
+export interface ActivityState {
+  counts: Partial<Record<ActivityVerb, number>>;
+  /** Order verbs were first observed this turn. The summary renders in
+   *  this order so the line reads as a chronological natural-language
+   *  account: "edited a file, read a file, ran a command" matches the
+   *  agent's actual sequence of actions. Stable — once a verb is added
+   *  to this list, it never moves. */
+  order: ActivityVerb[];
+  /** First non-trivial tool name observed this turn (for telemetry / future
+   *  "what kicked this off" forensic). Not used in the rendered summary. */
+  firstToolName: string | null;
+}
+export function makeEmptyActivityState(): ActivityState {
+  return { counts: {}, order: [], firstToolName: null };
+}
+/** Map a tool name → verb. Mirrors the existing `tool-intent-surface.ts`
+ *  verb table but in past tense. Tools that don't map (or surface tools
+ *  like reply/stream_reply) return null — the caller skips them. */
+export function verbForTool(toolName: string): ActivityVerb | null {
+  if (!toolName) return null;
+  const mcpMatch = /^mcp__([^_]+)__(.+)$/.exec(toolName);
+  // Skip user-facing Telegram-plugin tools entirely — those ARE the
+  // surface, never to be summarised.
+  if (mcpMatch && mcpMatch[1] === "switchroom-telegram") return null;
+  const suffix = (mcpMatch ? mcpMatch[2] : toolName).toLowerCase();
+  switch (suffix) {
+    case "read":
+      return "read";
+    case "write":
+      return "created";
+    case "edit":
+    case "multiedit":
+    case "notebookedit":
+      return "edited";
+    case "bash":
+    case "bashoutput":
+    case "killshell":
+      return "ran";
+    case "websearch":
+    case "grep":
+    case "glob":
+      return "searched";
+    case "webfetch":
+      return "fetched";
+    case "task":
+    case "agent":
+      return "dispatched";
+    case "todowrite":
+    case "todoread":
+      return "noted";
+    default:
+      return "used";
+  }
+}
+/** Mutates `state` to record one tool_use of `toolName`. Returns true
+ *  iff the activity state changed (so the caller knows to refresh the
+ *  rendered summary). */
+export function register(state: ActivityState, toolName: string): boolean {
+  const verb = verbForTool(toolName);
+  if (!verb) return false;
+  if (state.firstToolName == null) state.firstToolName = toolName;
+  const prior = state.counts[verb] ?? 0;
+  if (prior === 0) state.order.push(verb);
+  state.counts[verb] = prior + 1;
+  return true;
+}
+interface VerbPhrase {
+  singular: string;
+  plural: string;
+}
+const VERB_PHRASE: Record<ActivityVerb, VerbPhrase> = {
+  read: { singular: "read a file", plural: "read $N files" },
+  edited: { singular: "edited a file", plural: "edited $N files" },
+  created: { singular: "created a file", plural: "created $N files" },
+  ran: { singular: "ran a command", plural: "ran $N commands" },
+  searched: { singular: "ran a search", plural: "ran $N searches" },
+  fetched: { singular: "fetched a URL", plural: "fetched $N URLs" },
+  dispatched: { singular: "dispatched a sub-agent", plural: "dispatched $N sub-agents" },
+  noted: { singular: "updated the todo list", plural: "updated the todo list ($N edits)" },
+  used: { singular: "used a tool", plural: "used $N tools" },
+};
+/** Render the activity state as a single natural-language line.
+ *  Verbs are rendered in `state.order` — first-occurrence order — so
+ *  the line reads chronologically ("edited a file, read a file, ran
+ *  a command" mirrors the agent's actual action sequence). Returns
+ *  null when the state is empty (nothing to show yet). */
+export function formatSummary(state: ActivityState): string | null {
+  const phrases: string[] = [];
+  for (const verb of state.order) {
+    const n = state.counts[verb] ?? 0;
+    if (n <= 0) continue;
+    const p = VERB_PHRASE[verb];
+    phrases.push(n === 1 ? p.singular : p.plural.replace("$N", String(n)));
+  }
+  if (phrases.length === 0) return null;
+  // Capitalize first letter so the sentence reads as a statement.
+  const sentence = phrases.join(", ");
+  return sentence.charAt(0).toUpperCase() + sentence.slice(1);
+}
+/** Convenience: ergonomic full pipeline for callers that just want
+ *  "given the new tool name and prior state, give me the updated rendered
+ *  text or null if nothing changed". Returns null when the tool is a
+ *  surface tool / no-op (so the caller can skip the Telegram edit). */
+export function registerAndRender(
+  state: ActivityState,
+  toolName: string,
+): string | null {
+  const changed = register(state, toolName);
+  if (!changed) return null;
+  return formatSummary(state);
+}

package/telegram-plugin/tests/tool-intent-surface.test.ts DELETED Viewed

@@ -1,128 +0,0 @@
-import { describe, it, expect } from "vitest";
-import { deriveIntentSurface } from "../tool-intent-surface.js";
-describe("deriveIntentSurface — gateway lifts model's tool intent into framework-voice status", () => {
-  describe("tool-class verb mapping", () => {
-    it("Bash → running", () => {
-      const out = deriveIntentSurface("Bash", { command: "ls -la /var/log" });
-      expect(out.text).toContain("<i>running:</i>");
-      expect(out.text).toContain("ls -la /var/log");
-    });
-    it("WebSearch → searching", () => {
-      const out = deriveIntentSurface("WebSearch", { query: "Victoria drink driving" });
-      expect(out.text).toContain("<i>searching:</i>");
-      expect(out.text).toContain("Victoria drink driving");
-    });
-    it("WebFetch → fetching (hostname extracted)", () => {
-      const out = deriveIntentSurface("WebFetch", { url: "https://example.com/a/b" });
-      expect(out.text).toContain("<i>fetching:</i>");
-      expect(out.text).toContain("example.com");
-    });
-    it("Read → reading (basename only)", () => {
-      const out = deriveIntentSurface("Read", { file_path: "/etc/os-release" });
-      expect(out.text).toContain("<i>reading:</i>");
-      expect(out.text).toContain("os-release");
-      expect(out.text).not.toContain("/etc/");
-    });
-    it("Write → writing", () => {
-      const out = deriveIntentSurface("Write", { file_path: "/tmp/hello.sh" });
-      expect(out.text).toContain("<i>writing:</i>");
-      expect(out.text).toContain("hello.sh");
-    });
-    it("Edit / MultiEdit / NotebookEdit → editing", () => {
-      for (const t of ["Edit", "MultiEdit", "NotebookEdit"]) {
-        expect(
-          deriveIntentSurface(t, { file_path: "/a/foo.ts" }).text,
-        ).toContain("<i>editing:</i>");
-      }
-    });
-    it("Grep / Glob → searching", () => {
-      expect(
-        deriveIntentSurface("Grep", { pattern: "TODO", path: "src/" }).text,
-      ).toContain("<i>searching:</i>");
-      expect(
-        deriveIntentSurface("Glob", { pattern: "**/*.ts" }).text,
-      ).toContain("<i>searching:</i>");
-    });
-    it("Task / Agent → dispatching", () => {
-      expect(
-        deriveIntentSurface("Task", { description: "review the auth code" }).text,
-      ).toContain("<i>dispatching:</i>");
-    });
-  });
-  describe("user-facing tools stay quiet (never re-surfaced)", () => {
-    const surfaceTools = [
-      "mcp__switchroom-telegram__reply",
-      "mcp__switchroom-telegram__stream_reply",
-      "mcp__switchroom-telegram__edit_message",
-      "mcp__switchroom-telegram__react",
-      "mcp__switchroom-telegram__send_typing",
-      "mcp__switchroom-telegram__progress_update",
-    ];
-    for (const tool of surfaceTools) {
-      it(`returns null for ${tool}`, () => {
-        expect(
-          deriveIntentSurface(tool, { text: "hi", chat_id: "1" }).text,
-        ).toBeNull();
-      });
-    }
-  });
-  describe("unknown MCP tools", () => {
-    it("uses 'using <tool>' for unknown MCP tool servers", () => {
-      const out = deriveIntentSurface(
-        "mcp__google-workspace__list_drive_files",
-        { folderId: "abc" },
-      );
-      expect(out.text).toMatch(/<i>using list[ _]drive[ _]files:?<\/i>/);
-    });
-    it("falls back gracefully when input has no recognisable label field", () => {
-      const out = deriveIntentSurface("Bash", { weird: "no-command-here" });
-      // No label resolved → verb-only output
-      expect(out.text).toBe("<i>running</i>");
-    });
-  });
-  describe("privacy / safety", () => {
-    it("escapes HTML in the label so a malicious input can't inject markup", () => {
-      const out = deriveIntentSurface("Bash", {
-        command: "echo '<script>alert(1)</script>'",
-      });
-      expect(out.text).not.toContain("<script>");
-      expect(out.text).toContain("&lt;script&gt;");
-    });
-    it("truncates long labels to keep the surface message tight", () => {
-      const longCmd = "echo " + "x".repeat(500);
-      const out = deriveIntentSurface("Bash", { command: longCmd });
-      // toolLabel already truncates Bash to 40 chars; safety cap then
-      // bounds anything else to MAX_LABEL_LEN.
-      expect((out.text ?? "").length).toBeLessThan(200);
-    });
-    it("returns null when toolName is empty (defensive)", () => {
-      expect(deriveIntentSurface("", { command: "x" }).text).toBeNull();
-    });
-  });
-  describe("precomputed label precedence", () => {
-    it("uses precomputed label when present (matches toolLabel's contract)", () => {
-      const out = deriveIntentSurface(
-        "Bash",
-        { command: "ls" },
-        "checking the logs",
-      );
-      expect(out.text).toContain("<i>running:</i>");
-      expect(out.text).toContain("checking the logs");
-    });
-  });
-});

package/telegram-plugin/tool-intent-surface.ts DELETED Viewed

@@ -1,155 +0,0 @@
-/**
- * Tool-intent surface — lifts the model's already-formed `tool_use`
- * intent (tool name + input) into a brief user-visible Telegram
- * message when the model goes to work without first calling reply.
- *
- * Companion to the PreToolUse ack-first gate (#1921). The gate forces
- * the model to author a brief acknowledgement via the reply tool
- * before any other tool runs. THIS surface is the lower-overhead
- * sibling: when the model's own `tool_use` stream already carries the
- * intent (e.g. `Bash {command: "ls -la /var/log"}`), the gateway can
- * pass that intent through as the user-visible "we're alive and this
- * is what we're doing" beat, without the model having to call any
- * extra tool.
- *
- * Why both. The gate produces MODEL-VOICE acks ("on it — checking the
- * logs") — warmer, persona-driven. The surface produces FRAMEWORK-
- * VOICE pass-throughs ("_running:_ ls -la /var/log") — honest and
- * cheaper. They compose: if the gate fires, the model authors an ack
- * which lands first; the surface stays quiet (already-acked). If the
- * gate fails (kill-switched / regression / hook spawn failure), the
- * surface still lands — defence in depth.
- *
- * Output format: italicised framework verb + colon + the model's own
- * `toolLabel()` output. Italics are the conventional "framework
- * narrating, not the model speaking" marker; the verb signals which
- * lane the work is in. Length capped at ~140 chars by `toolLabel()`
- * already; nothing more is added on top.
- *
- * Privacy posture. The model's `tool_use.input` may contain user-
- * provided strings (web search queries, file paths the user named).
- * Those are already going to land in chat history one way or another
- * (e.g. via the model's reply describing what it did), so surfacing
- * a brief label here doesn't expand the leakage surface materially.
- * `toolLabel()` already truncates and HTML-escapes its output via
- * the renderer.
- */
-import { toolLabel } from "./tool-labels.js";
-const MAX_LABEL_LEN = 140;
-/**
- * Compute the user-facing "framework verb" for a tool. Verbs match
- * the action class so the user reads "running" for Bash, "searching"
- * for WebSearch, etc. Tools without a friendly verb fall back to
- * `using <ToolName>` — better than blanking out.
- */
-function frameworkVerbFor(toolName: string): string {
-  // Strip "mcp__<server>__" prefix to match suffixes consistently.
-  // Most MCP tools surface as `mcp__<server>__<tool>` in the stream.
-  const m = /^mcp__[^_]+__(.+)$/.exec(toolName);
-  const suffix = (m ? m[1] : toolName).toLowerCase();
-  switch (suffix) {
-    case "bash":
-    case "bashoutput":
-    case "killshell":
-      return "running";
-    case "websearch":
-    case "grep":
-    case "glob":
-      return "searching";
-    case "webfetch":
-      return "fetching";
-    case "read":
-      return "reading";
-    case "write":
-      return "writing";
-    case "edit":
-    case "multiedit":
-    case "notebookedit":
-      return "editing";
-    case "todowrite":
-    case "todoread":
-      return "noting";
-    case "task":
-    case "agent":
-      return "dispatching";
-    case "toolsearch":
-      return "loading tools";
-    default:
-      // For unknown / MCP tools, prefer a short generic — "using gdrive"
-      // is more honest than guessing.
-      if (m) return `using ${m[1].replace(/_/g, " ")}`;
-      return `using ${toolName}`;
-  }
-}
-/** A tool that surfaces in the chat itself (reply / stream_reply / etc.)
- *  — these tools ARE the user surface, so the gateway never re-surfaces
- *  them. Mirrors `isTelegramSurfaceTool` in `tool-names.ts`. */
-function isUserFacingTool(toolName: string): boolean {
-  const m = /^mcp__switchroom-telegram__(.+)$/.exec(toolName);
-  const suffix = m ? m[1] : toolName;
-  return (
-    suffix === "reply" ||
-    suffix === "stream_reply" ||
-    suffix === "edit_message" ||
-    suffix === "react" ||
-    suffix === "send_typing" ||
-    suffix === "pin_message" ||
-    suffix === "delete_message" ||
-    suffix === "forward_message" ||
-    suffix === "download_attachment" ||
-    suffix === "get_recent_messages" ||
-    suffix === "progress_update"
-  );
-}
-export interface SurfaceTextResult {
-  /** Final HTML text the gateway sends to Telegram, or null when the
-   *  surface should NOT fire (tool is user-facing, label is empty, etc.) */
-  text: string | null;
-}
-/**
- * Pure decision: given a tool name + input + optional precomputed label
- * (from the existing PreToolUse label hook), return the HTML the
- * gateway should send, or null to stay quiet.
- *
- * Exposed for unit tests; the gateway wires this into the `tool_use`
- * session-event handler.
- */
-export function deriveIntentSurface(
-  toolName: string,
-  toolInput: Record<string, unknown> | undefined,
-  precomputedLabel?: string,
-): SurfaceTextResult {
-  if (!toolName) return { text: null };
-  if (isUserFacingTool(toolName)) return { text: null };
-  const label = toolLabel(toolName, toolInput, undefined, precomputedLabel);
-  if (!label || !label.trim()) {
-    // No label available for this tool/input shape — fall back to just
-    // the verb so the user at least sees "_running_" rather than
-    // nothing. Keeps the beat reliable on weird inputs.
-    return {
-      text: `<i>${escapeHtml(frameworkVerbFor(toolName))}</i>`,
-    };
-  }
-  const verb = frameworkVerbFor(toolName);
-  // `toolLabel()` may include backticks / quotes — let those through
-  // (Telegram HTML doesn't choke on them) but escape any stray inline
-  // HTML markers so a malicious or odd input can't inject markup.
-  const safeLabel = escapeHtml(label).slice(0, MAX_LABEL_LEN);
-  return { text: `<i>${escapeHtml(verb)}:</i> ${safeLabel}` };
-}
-function escapeHtml(s: string): string {
-  return s
-    .replace(/&/g, "&amp;")
-    .replace(/</g, "&lt;")
-    .replace(/>/g, "&gt;");
-}