npm - switchroom - Versions diffs - 0.13.64 → 0.14.0 - Mend

switchroom 0.13.64 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli/switchroom.js +487 -285
package/package.json +1 -1
package/telegram-plugin/dist/gateway/gateway.js +113 -8
package/telegram-plugin/gateway/gateway.ts +35 -22
package/telegram-plugin/tests/tool-activity-summary.test.ts +66 -0
package/telegram-plugin/tool-activity-summary.ts +137 -0
package/telegram-plugin/uat/scenarios/jtbd-webkite-read-dm.test.ts +115 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "switchroom",
-  "version": "0.13.64",
+  "version": "0.14.0",
   "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
   "type": "module",
   "bin": {

package/telegram-plugin/dist/gateway/gateway.js CHANGED Viewed

@@ -31685,6 +31685,111 @@ function registerAndRender(state, toolName) {
     return null;
   return formatSummary(state);
 }
+function baseName(p) {
+  if (typeof p !== "string" || p.length === 0)
+    return null;
+  const parts = p.split("/").filter(Boolean);
+  return parts.length > 0 ? parts[parts.length - 1] : p;
+}
+function hostName(u) {
+  if (typeof u !== "string" || u.length === 0)
+    return null;
+  try {
+    return new URL(u).hostname.replace(/^www\./, "");
+  } catch {
+    return u.replace(/^https?:\/\//, "").split("/")[0] || null;
+  }
+}
+function clip(s, n) {
+  if (typeof s !== "string")
+    return null;
+  const t = s.trim();
+  if (t.length === 0)
+    return null;
+  return t.length > n ? t.slice(0, n - 1) + "\u2026" : t;
+}
+function describeToolUse(toolName, input) {
+  if (!toolName)
+    return null;
+  const inp = input ?? {};
+  const mcpMatch = /^mcp__(.+?)__(.+)$/.exec(toolName);
+  if (mcpMatch) {
+    const server = mcpMatch[1].toLowerCase();
+    const tool = mcpMatch[2].toLowerCase();
+    if (server === "switchroom-telegram")
+      return null;
+    if (server === "hindsight") {
+      if (tool === "recall" || tool === "reflect")
+        return "Searching memory";
+      if (tool === "retain" || tool === "update_memory" || tool === "sync_retain")
+        return "Saving to memory";
+      return "Working with memory";
+    }
+    if (server === "google-workspace" || server === "claude_ai_google_calendar") {
+      return "Checking your calendar";
+    }
+    if (server === "claude_ai_gmail")
+      return "Checking your email";
+    if (server === "claude_ai_google_drive")
+      return "Looking through your files";
+    if (server === "notion" || server === "claude_ai_notion") {
+      return "Checking your notes";
+    }
+    const desc = clip(inp.description, 60) ?? clip(inp.query, 50) ?? clip(inp.title, 50);
+    if (desc)
+      return desc;
+    return "Using " + tool.replace(/[-_]+/g, " ");
+  }
+  switch (toolName) {
+    case "Bash": {
+      return clip(inp.description, 70) ?? "Running a command";
+    }
+    case "BashOutput":
+    case "KillShell":
+      return "Managing a background command";
+    case "Read": {
+      const f = baseName(inp.file_path);
+      return f ? `Reading ${f}` : "Reading a file";
+    }
+    case "Edit":
+    case "MultiEdit":
+    case "NotebookEdit": {
+      const f = baseName(inp.file_path) ?? baseName(inp.notebook_path);
+      return f ? `Editing ${f}` : "Editing a file";
+    }
+    case "Write": {
+      const f = baseName(inp.file_path);
+      return f ? `Writing ${f}` : "Writing a file";
+    }
+    case "Grep":
+    case "Glob": {
+      const p = clip(inp.pattern, 40);
+      return p ? `Searching for ${p}` : "Searching files";
+    }
+    case "WebFetch": {
+      const h = hostName(inp.url);
+      return h ? `Reading ${h}` : "Reading a web page";
+    }
+    case "WebSearch": {
+      const q = clip(inp.query, 50);
+      return q ? `Searching the web for ${q}` : "Searching the web";
+    }
+    case "Task":
+    case "Agent": {
+      const d = clip(inp.description, 60);
+      return d ? `Delegating: ${d}` : "Delegating to a sub-agent";
+    }
+    case "TodoWrite":
+    case "TaskCreate":
+    case "TaskUpdate":
+    case "TaskList":
+      return "Updating the plan";
+    case "ToolSearch":
+      return "Finding the right tool";
+    default:
+      return "Working\u2026";
+  }
+}
 // tool-labels.ts
 var MAX_LABEL_CHARS = 60;
@@ -49716,10 +49821,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
 }
 // ../src/build-info.ts
-var VERSION = "0.13.64";
-var COMMIT_SHA = "52afe8b0";
-var COMMIT_DATE = "2026-05-28T04:44:59Z";
-var LATEST_PR = 1948;
+var VERSION = "0.14.0";
+var COMMIT_SHA = "d7cd6faa";
+var COMMIT_DATE = "2026-05-28T06:28:21Z";
+var LATEST_PR = 1954;
 var COMMITS_AHEAD_OF_TAG = 0;
 // gateway/boot-version.ts
@@ -53694,7 +53799,7 @@ async function drainActivitySummary(turn) {
       const target = turn.activityPendingRender;
       if (target == null)
         break;
-      const html = `<i>${target}</i>`;
+      const html = `<i>${escapeHtmlForTg(target)}</i>`;
       const chat = turn.sessionChatId;
       const thread = turn.sessionThreadId;
       const useDraft = turn.isDm && thread == null && sendMessageDraftFn != null;
@@ -53863,8 +53968,8 @@ function handleSessionEvent(ev) {
           clearActivitySummary(turn);
         }
       }
-      if (!DRAFT_MIRROR_ENABLED && !turn.replyCalled && !isTelegramSurfaceTool(name)) {
-        const rendered = registerAndRender(turn.toolActivity, name);
+      if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
+        const rendered = DRAFT_MIRROR_ENABLED ? describeToolUse(name, ev.input) : registerAndRender(turn.toolActivity, name);
         if (rendered != null) {
           turn.activityPendingRender = rendered;
           if (turn.activityInFlight == null) {
@@ -53891,7 +53996,7 @@ function handleSessionEvent(ev) {
             chatId: turn.sessionChatId,
             isPrivateChat: turn.isDm,
             threadId: turn.sessionThreadId,
-            ...DRAFT_MIRROR_ENABLED ? { sendMessageDraft: sendMessageDraftFn } : ANSWER_STREAM_VISIBLE_ENABLED ? { minInitialChars: 1 } : { sendMessageDraft: sendMessageDraftFn },
+            ...ANSWER_STREAM_VISIBLE_ENABLED ? { minInitialChars: 1 } : { sendMessageDraft: sendMessageDraftFn },
             sendMessage: async (chatId, text, params) => {
               const tid = params?.message_thread_id;
               const silent = params?.purpose !== "materialize";

package/telegram-plugin/gateway/gateway.ts CHANGED Viewed

@@ -57,6 +57,7 @@ import { allocateDraftId } from '../draft-transport.js'
 import {
   makeEmptyActivityState,
   registerAndRender,
+  describeToolUse,
   type ActivityState,
 } from '../tool-activity-summary.js'
 import { toolLabel } from '../tool-labels.js'
@@ -6837,7 +6838,12 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
     while (turn.activityPendingRender !== turn.activityLastSentRender) {
       const target = turn.activityPendingRender
       if (target == null) break
-      const html = `<i>${target}</i>`
+      // Escape before wrapping in <i> + parse_mode HTML. The legacy
+      // verb-count summaries were safe ASCII, but the draft-mirror's
+      // describeToolUse content (file names, Bash descriptions, search
+      // queries) can contain <, >, & — which would break HTML parsing
+      // and surface literal tags (the exact #1942 bug class).
+      const html = `<i>${escapeHtmlForTg(target)}</i>`
       const chat = turn.sessionChatId
       const thread = turn.sessionThreadId
       // sendMessageDraft doesn't support forum threads.
@@ -7130,14 +7136,21 @@ function handleSessionEvent(ev: SessionEvent): void {
       // exactly once at a time and re-running until pending matches
       // the last-sent. Captures `turn` so a late drain after turn-swap
       // can't corrupt the next turn's atom.
-      // DRAFT_MIRROR (RFC draft-mirror-preview, Phase 1): the model's
-      // prose narration owns the single per-chat draft slot. Suppress
-      // the activity-summary tool-count draft so the two don't collide
-      // (Telegram shows one draft per chat — the later write clobbers
-      // the earlier). The activity-summary code stays intact for the
-      // kill-switch path; it's retired for good only in Phase 4.
-      if (!DRAFT_MIRROR_ENABLED && !turn.replyCalled && !isTelegramSurfaceTool(name)) {
-        const rendered = registerAndRender(turn.toolActivity, name)
+      // DRAFT_MIRROR (RFC draft-mirror-preview): render each tool_use as a
+      // human-friendly line in the live preview, using the model-authored
+      // descriptive field (Bash.description, Read/Edit file basename,
+      // hindsight→"Searching memory", etc. — see describeToolUse). Latest
+      // action wins (the draft shows "doing X" live), clears on reply.
+      // Never surfaces raw shell/query syntax — option A, uniform across
+      // code + non-code agents.
+      //
+      // Flag OFF (default): the legacy generic verb-count summary
+      // ("Ran 5 commands") via registerAndRender — byte-identical to
+      // pre-draft-mirror behavior.
+      if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
+        const rendered = DRAFT_MIRROR_ENABLED
+          ? describeToolUse(name, ev.input)
+          : registerAndRender(turn.toolActivity, name)
         if (rendered != null) {
           turn.activityPendingRender = rendered
           if (turn.activityInFlight == null) {
@@ -7185,19 +7198,19 @@ function handleSessionEvent(ev: SessionEvent): void {
             isPrivateChat: turn.isDm,
             threadId: turn.sessionThreadId,
             // Transport selection:
-            // - DRAFT_MIRROR (RFC draft-mirror-preview, Phase 1): force
-            //   the ephemeral compose-area draft so narration is a
-            //   clears-on-reply preview. Wins over visible-answer-stream.
-            //   No-reply delivery is owned by turn-flush, not materialize.
-            // - else #869-Phase1 visible-answer-stream: omit the draft
-            //   API so the lane edits a user-visible chat-timeline
-            //   message (minInitialChars:1 opens it on the first chunk).
-            // - else legacy: draft transport.
-            ...(DRAFT_MIRROR_ENABLED
-              ? { sendMessageDraft: sendMessageDraftFn }
-              : ANSWER_STREAM_VISIBLE_ENABLED
-                ? { minInitialChars: 1 }
-                : { sendMessageDraft: sendMessageDraftFn }),
+            // #869-Phase1 visible-answer-stream: omit the draft API so
+            // the lane edits a user-visible chat-timeline message
+            // (minInitialChars:1 opens it on the first chunk). The
+            // draft-mirror does NOT touch this lane — the canary proved
+            // the model emits almost no interstitial assistant.text
+            // (it thinks→tool→reply), so routing it to the draft just
+            // emptied the preview. The draft-mirror instead renders the
+            // tool_use stream (case 'tool_use' above) where the real
+            // signal lives. assistant.text keeps its visible-message
+            // home; the reply tool stays the canonical answer.
+            ...(ANSWER_STREAM_VISIBLE_ENABLED
+              ? { minInitialChars: 1 }
+              : { sendMessageDraft: sendMessageDraftFn }),
             // #1075: route through robustApiCall so flood-wait,
             // benign-400, and THREAD_NOT_FOUND are handled uniformly
             // instead of crashing the answer-stream loop on a deleted

package/telegram-plugin/tests/tool-activity-summary.test.ts CHANGED Viewed

@@ -5,8 +5,74 @@ import {
   formatSummary,
   registerAndRender,
   verbForTool,
+  describeToolUse,
 } from "../tool-activity-summary.js";
+describe("describeToolUse — friendly per-tool rendering (draft-mirror)", () => {
+  it("Bash uses the model-authored description verbatim, never the command", () => {
+    expect(
+      describeToolUse("Bash", { command: "ls -la /tmp", description: "List workspace" }),
+    ).toBe("List workspace");
+    // No description → safe generic, still never the raw command.
+    expect(describeToolUse("Bash", { command: "grep -r foo ." })).toBe("Running a command");
+  });
+  it("Read/Edit/Write render the file basename, not the full path", () => {
+    expect(describeToolUse("Read", { file_path: "/home/ken/code/switchroom/gateway.ts" })).toBe(
+      "Reading gateway.ts",
+    );
+    expect(describeToolUse("Edit", { file_path: "/a/b/CLAUDE.md" })).toBe("Editing CLAUDE.md");
+    expect(describeToolUse("Write", { file_path: "notes.txt" })).toBe("Writing notes.txt");
+    expect(describeToolUse("Read", {})).toBe("Reading a file");
+  });
+  it("Grep/Glob show the pattern; WebFetch shows the hostname", () => {
+    expect(describeToolUse("Grep", { pattern: "TODO" })).toBe("Searching for TODO");
+    expect(describeToolUse("WebFetch", { url: "https://www.example.com/path?q=1" })).toBe(
+      "Reading example.com",
+    );
+    expect(describeToolUse("WebSearch", { query: "best running shoes" })).toBe(
+      "Searching the web for best running shoes",
+    );
+  });
+  it("Task/Agent surface the sub-agent task description", () => {
+    expect(describeToolUse("Task", { description: "Review the migration" })).toBe(
+      "Delegating: Review the migration",
+    );
+  });
+  it("domain MCP tools render human-meaningful labels (no jargon)", () => {
+    expect(describeToolUse("mcp__hindsight__reflect", { query: "x" })).toBe("Searching memory");
+    expect(describeToolUse("mcp__hindsight__retain", {})).toBe("Saving to memory");
+    expect(describeToolUse("mcp__claude_ai_Google_Calendar__list_events", {})).toBe(
+      "Checking your calendar",
+    );
+    expect(describeToolUse("mcp__claude_ai_Gmail__search", {})).toBe("Checking your email");
+    expect(describeToolUse("mcp__claude_ai_Google_Drive__search_files", {})).toBe(
+      "Looking through your files",
+    );
+    expect(describeToolUse("mcp__claude_ai_Notion__notion-search", {})).toBe("Checking your notes");
+  });
+  it("surface tools (reply/stream_reply) return null — never mirrored", () => {
+    expect(describeToolUse("mcp__switchroom-telegram__reply", { text: "hi" })).toBeNull();
+    expect(describeToolUse("mcp__switchroom-telegram__stream_reply", {})).toBeNull();
+  });
+  it("unknown MCP tool prefers a model-authored field, else humanizes the name", () => {
+    expect(describeToolUse("mcp__acme__do_thing", { description: "Fetched the report" })).toBe(
+      "Fetched the report",
+    );
+    expect(describeToolUse("mcp__acme__do_thing", {})).toBe("Using do thing");
+  });
+  it("unknown built-in falls back to a generic working line, never raw syntax", () => {
+    expect(describeToolUse("SomeFutureTool", {})).toBe("Working…");
+    expect(describeToolUse("", {})).toBeNull();
+  });
+});
 describe("verbForTool — tool name → past-tense verb", () => {
   it("maps standard CLI tools to readable verbs", () => {
     expect(verbForTool("Read")).toBe("read");

package/telegram-plugin/tool-activity-summary.ts CHANGED Viewed

@@ -198,3 +198,140 @@ export function registerAndRender(
   if (!changed) return null;
   return formatSummary(state);
 }
+// ─── Friendly per-tool rendering (draft-mirror, RFC draft-mirror-preview) ───
+//
+// Claude Code's own UI reads human-friendly because the model AUTHORS the
+// descriptive text inside each tool_use.input — verified against a real
+// session JSONL (1360 Bash calls etc.):
+//   Bash         → input.description   ("Get CLAUDE.md size and recent history")
+//   Read         → input.file_path     (basename → "Reading CLAUDE.md")
+//   Edit/Write   → input.file_path     (basename)
+//   Grep/Glob    → input.pattern
+//   Task/Agent   → input.description   (the sub-agent's task)
+//   WebFetch     → input.url           (hostname → "Reading example.com")
+//   hindsight    → friendly label      ("Searching memory")
+// There is never a raw `grep`/`jq`/`ls` to surface — only the model's own
+// plain-English description or a domain label. This is the signal the
+// draft-mirror renders (option A: uniform across code + non-code agents).
+/** Strip a path to its basename for display. */
+function baseName(p: unknown): string | null {
+  if (typeof p !== "string" || p.length === 0) return null;
+  const parts = p.split("/").filter(Boolean);
+  return parts.length > 0 ? parts[parts.length - 1] : p;
+}
+/** Extract a bare hostname from a URL for display (no scheme/path). */
+function hostName(u: unknown): string | null {
+  if (typeof u !== "string" || u.length === 0) return null;
+  try {
+    return new URL(u).hostname.replace(/^www\./, "");
+  } catch {
+    return u.replace(/^https?:\/\//, "").split("/")[0] || null;
+  }
+}
+function clip(s: unknown, n: number): string | null {
+  if (typeof s !== "string") return null;
+  const t = s.trim();
+  if (t.length === 0) return null;
+  return t.length > n ? t.slice(0, n - 1) + "…" : t;
+}
+/**
+ * Render a single tool_use into a human-friendly, present-tense activity
+ * line for the live draft preview — or null when the tool should NOT be
+ * surfaced (the Telegram-plugin surface tools, which ARE the conversation).
+ *
+ * Leads with the model-authored descriptive field per the map above; falls
+ * back to a domain label, then to a humanized tool name. Never emits raw
+ * shell/query syntax.
+ */
+export function describeToolUse(
+  toolName: string,
+  input: Record<string, unknown> | undefined,
+): string | null {
+  if (!toolName) return null;
+  const inp = input ?? {};
+  const mcpMatch = /^mcp__(.+?)__(.+)$/.exec(toolName);
+  if (mcpMatch) {
+    const server = mcpMatch[1].toLowerCase();
+    const tool = mcpMatch[2].toLowerCase();
+    // Surface tools ARE the conversation — never mirror them.
+    if (server === "switchroom-telegram") return null;
+    if (server === "hindsight") {
+      if (tool === "recall" || tool === "reflect") return "Searching memory";
+      if (tool === "retain" || tool === "update_memory" || tool === "sync_retain")
+        return "Saving to memory";
+      return "Working with memory";
+    }
+    if (
+      server === "google-workspace" ||
+      server === "claude_ai_google_calendar"
+    ) {
+      return "Checking your calendar";
+    }
+    if (server === "claude_ai_gmail") return "Checking your email";
+    if (server === "claude_ai_google_drive") return "Looking through your files";
+    if (server === "notion" || server === "claude_ai_notion") {
+      return "Checking your notes";
+    }
+    // Unknown MCP tool: prefer a model-authored field, else a humanized name.
+    const desc = clip(inp.description, 60) ?? clip(inp.query, 50) ?? clip(inp.title, 50);
+    if (desc) return desc;
+    return "Using " + tool.replace(/[-_]+/g, " ");
+  }
+  switch (toolName) {
+    case "Bash": {
+      // The model writes a plain-English description for every command.
+      return clip(inp.description, 70) ?? "Running a command";
+    }
+    case "BashOutput":
+    case "KillShell":
+      return "Managing a background command";
+    case "Read": {
+      const f = baseName(inp.file_path);
+      return f ? `Reading ${f}` : "Reading a file";
+    }
+    case "Edit":
+    case "MultiEdit":
+    case "NotebookEdit": {
+      const f = baseName(inp.file_path) ?? baseName(inp.notebook_path);
+      return f ? `Editing ${f}` : "Editing a file";
+    }
+    case "Write": {
+      const f = baseName(inp.file_path);
+      return f ? `Writing ${f}` : "Writing a file";
+    }
+    case "Grep":
+    case "Glob": {
+      const p = clip(inp.pattern, 40);
+      return p ? `Searching for ${p}` : "Searching files";
+    }
+    case "WebFetch": {
+      const h = hostName(inp.url);
+      return h ? `Reading ${h}` : "Reading a web page";
+    }
+    case "WebSearch": {
+      const q = clip(inp.query, 50);
+      return q ? `Searching the web for ${q}` : "Searching the web";
+    }
+    case "Task":
+    case "Agent": {
+      const d = clip(inp.description, 60);
+      return d ? `Delegating: ${d}` : "Delegating to a sub-agent";
+    }
+    case "TodoWrite":
+    case "TaskCreate":
+    case "TaskUpdate":
+    case "TaskList":
+      return "Updating the plan";
+    case "ToolSearch":
+      return "Finding the right tool";
+    default:
+      return "Working…";
+  }
+}

package/telegram-plugin/uat/scenarios/jtbd-webkite-read-dm.test.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * JTBD scenario — the agent fetches the web via webkite, transparently.
+ *
+ * Validates the v0.13.62/63 webkite rollout end-to-end through real
+ * Telegram: the user sends a URL and asks about its content WITHOUT
+ * ever naming "webkite". The agent must:
+ *
+ *   1. Reach for webkite on its own (the native WebFetch/WebSearch
+ *      tools are denied fleet-wide — see scaffold.ts
+ *      WEBKITE_FLEET_DENY_TOOLS — so the ONLY way the agent can answer
+ *      a "read this URL" prompt is via the webkite_* MCP tools). If the
+ *      agent returns the page's content, webkite did the work by
+ *      construction — there is no other web-fetch tool available.
+ *
+ *   2. Render JavaScript. The target is `quotes.toscrape.com/js/`, a
+ *      purpose-built scraping-practice SPA whose quotes are injected by
+ *      JS at runtime. A raw HTTP fetch (what the old WebFetch did) sees
+ *      an empty page — `curl` returns zero `class="quote"` nodes. Only
+ *      a JS-executing renderer (webkite → cloakbrowser headless
+ *      Chromium) produces the visible quote text. So a correct quote in
+ *      the reply is positive proof that JS rendering happened.
+ *
+ * The first quote on that page is Einstein's "The world as we have
+ * created it is a process of our thinking…". We assert the reply names
+ * Einstein AND carries a recognizable fragment of that quote.
+ *
+ * ## What this catches that other UATs don't
+ *
+ * - `jtbd-fast-trivial-dm` proves the agent replies fast, but never
+ *   touches a tool. This is the first UAT that forces a real web fetch.
+ * - The in-container `webkite read` smoke proves the binary works, but
+ *   not that the *model* chooses webkite unprompted over a denied
+ *   WebFetch, nor that the full inbound→claude→MCP→outbound path works.
+ *
+ * ## Failure modes this guards against
+ *
+ * - A regression that re-enables WebFetch (the model might fetch raw
+ *   HTML and miss the JS-rendered quotes → wrong/empty answer).
+ * - webkite MCP not wired / not trusted (agent says it can't browse).
+ * - cloakbrowser broken (agent returns the empty static page → no
+ *   quote, or a "page had no content" apology).
+ * - The glibc regression that the v0.13.62 canary caught (webkite
+ *   dead-on-arrival → agent can't browse at all).
+ */
+import { describe, it, expect } from "vitest";
+import { spinUp } from "../harness.js";
+const AGENT = "test-harness";
+// JS-rendered scraping-practice page. Quotes exist ONLY after JS runs;
+// a raw fetch sees none. Stable, purpose-built, no auth.
+const JS_URL = "https://quotes.toscrape.com/js/";
+// Deliberately does NOT mention webkite, fetch, browser, or any tool —
+// a natural "read this for me" ask. The agent must pick the tool.
+const PROMPT =
+  `Open ${JS_URL} and tell me the exact text of the very first quote ` +
+  `on the page and who said it. Just the quote and the author.`;
+// The first quote's author + a distinctive fragment of its text.
+const EXPECTED_AUTHOR = /einstein/i;
+const EXPECTED_FRAGMENT =
+  /world as we have created it|process of our thinking|changing our thinking/i;
+// Phrases that would indicate the agent FAILED to browse (fell back to
+// "I can't access the web" or got the empty static page).
+const CANT_BROWSE = [
+  /can.?t (access|browse|open|reach|fetch)/i,
+  /unable to (access|browse|open|reach|fetch)/i,
+  /no content|empty page|couldn.?t (find|load)/i,
+  /don.?t have (web|internet|browsing)/i,
+];
+describe("uat: agent fetches the web via webkite (JS page, unprompted)", () => {
+  it(
+    "URL prompt → agent returns JS-rendered content (proves webkite + cloakbrowser)",
+    async () => {
+      const sc = await spinUp({ agent: AGENT });
+      try {
+        await sc.sendDM(PROMPT);
+        // Generous budget: a real cloakbrowser render of an SPA is
+        // slower than a trivial reply (Chromium spawn + JS execution).
+        const reply = await sc.expectMessage(EXPECTED_FRAGMENT, {
+          from: "bot",
+          timeout: 90_000,
+        });
+        // Positive proof: the JS-gated quote text came back.
+        expect(reply.text).toMatch(EXPECTED_FRAGMENT);
+        // And the author — confirms it parsed the actual quote, not noise.
+        expect(reply.text).toMatch(EXPECTED_AUTHOR);
+        // Negative proof: no "I can't browse" fallback. (WebFetch is
+        // denied, so a failure to use webkite surfaces as an apology,
+        // not a wrong fetch.)
+        const failedToBrowse = CANT_BROWSE.some((re) => re.test(reply.text));
+        expect(
+          failedToBrowse,
+          `agent reply looks like a can't-browse fallback: ${JSON.stringify(reply.text.slice(0, 300))}`,
+        ).toBe(false);
+        console.log(
+          `[webkite-read] agent returned JS-rendered quote via webkite — ` +
+          `WebFetch denied, cloakbrowser rendered the SPA. ` +
+          `reply: ${JSON.stringify(reply.text.slice(0, 200))}`,
+        );
+      } finally {
+        await sc.tearDown();
+      }
+    },
+    120_000,
+  );
+});