switchroom 0.13.64 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "switchroom",
3
- "version": "0.13.64",
3
+ "version": "0.14.0",
4
4
  "description": "Run Claude Code 24/7 on your Claude Pro/Max subscription over Telegram. Open-source alternative to OpenClaw and NanoClaw — no API keys.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -31685,6 +31685,111 @@ function registerAndRender(state, toolName) {
31685
31685
  return null;
31686
31686
  return formatSummary(state);
31687
31687
  }
31688
+ function baseName(p) {
31689
+ if (typeof p !== "string" || p.length === 0)
31690
+ return null;
31691
+ const parts = p.split("/").filter(Boolean);
31692
+ return parts.length > 0 ? parts[parts.length - 1] : p;
31693
+ }
31694
+ function hostName(u) {
31695
+ if (typeof u !== "string" || u.length === 0)
31696
+ return null;
31697
+ try {
31698
+ return new URL(u).hostname.replace(/^www\./, "");
31699
+ } catch {
31700
+ return u.replace(/^https?:\/\//, "").split("/")[0] || null;
31701
+ }
31702
+ }
31703
+ function clip(s, n) {
31704
+ if (typeof s !== "string")
31705
+ return null;
31706
+ const t = s.trim();
31707
+ if (t.length === 0)
31708
+ return null;
31709
+ return t.length > n ? t.slice(0, n - 1) + "\u2026" : t;
31710
+ }
31711
+ function describeToolUse(toolName, input) {
31712
+ if (!toolName)
31713
+ return null;
31714
+ const inp = input ?? {};
31715
+ const mcpMatch = /^mcp__(.+?)__(.+)$/.exec(toolName);
31716
+ if (mcpMatch) {
31717
+ const server = mcpMatch[1].toLowerCase();
31718
+ const tool = mcpMatch[2].toLowerCase();
31719
+ if (server === "switchroom-telegram")
31720
+ return null;
31721
+ if (server === "hindsight") {
31722
+ if (tool === "recall" || tool === "reflect")
31723
+ return "Searching memory";
31724
+ if (tool === "retain" || tool === "update_memory" || tool === "sync_retain")
31725
+ return "Saving to memory";
31726
+ return "Working with memory";
31727
+ }
31728
+ if (server === "google-workspace" || server === "claude_ai_google_calendar") {
31729
+ return "Checking your calendar";
31730
+ }
31731
+ if (server === "claude_ai_gmail")
31732
+ return "Checking your email";
31733
+ if (server === "claude_ai_google_drive")
31734
+ return "Looking through your files";
31735
+ if (server === "notion" || server === "claude_ai_notion") {
31736
+ return "Checking your notes";
31737
+ }
31738
+ const desc = clip(inp.description, 60) ?? clip(inp.query, 50) ?? clip(inp.title, 50);
31739
+ if (desc)
31740
+ return desc;
31741
+ return "Using " + tool.replace(/[-_]+/g, " ");
31742
+ }
31743
+ switch (toolName) {
31744
+ case "Bash": {
31745
+ return clip(inp.description, 70) ?? "Running a command";
31746
+ }
31747
+ case "BashOutput":
31748
+ case "KillShell":
31749
+ return "Managing a background command";
31750
+ case "Read": {
31751
+ const f = baseName(inp.file_path);
31752
+ return f ? `Reading ${f}` : "Reading a file";
31753
+ }
31754
+ case "Edit":
31755
+ case "MultiEdit":
31756
+ case "NotebookEdit": {
31757
+ const f = baseName(inp.file_path) ?? baseName(inp.notebook_path);
31758
+ return f ? `Editing ${f}` : "Editing a file";
31759
+ }
31760
+ case "Write": {
31761
+ const f = baseName(inp.file_path);
31762
+ return f ? `Writing ${f}` : "Writing a file";
31763
+ }
31764
+ case "Grep":
31765
+ case "Glob": {
31766
+ const p = clip(inp.pattern, 40);
31767
+ return p ? `Searching for ${p}` : "Searching files";
31768
+ }
31769
+ case "WebFetch": {
31770
+ const h = hostName(inp.url);
31771
+ return h ? `Reading ${h}` : "Reading a web page";
31772
+ }
31773
+ case "WebSearch": {
31774
+ const q = clip(inp.query, 50);
31775
+ return q ? `Searching the web for ${q}` : "Searching the web";
31776
+ }
31777
+ case "Task":
31778
+ case "Agent": {
31779
+ const d = clip(inp.description, 60);
31780
+ return d ? `Delegating: ${d}` : "Delegating to a sub-agent";
31781
+ }
31782
+ case "TodoWrite":
31783
+ case "TaskCreate":
31784
+ case "TaskUpdate":
31785
+ case "TaskList":
31786
+ return "Updating the plan";
31787
+ case "ToolSearch":
31788
+ return "Finding the right tool";
31789
+ default:
31790
+ return "Working\u2026";
31791
+ }
31792
+ }
31688
31793
 
31689
31794
  // tool-labels.ts
31690
31795
  var MAX_LABEL_CHARS = 60;
@@ -49716,10 +49821,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
49716
49821
  }
49717
49822
 
49718
49823
  // ../src/build-info.ts
49719
- var VERSION = "0.13.64";
49720
- var COMMIT_SHA = "52afe8b0";
49721
- var COMMIT_DATE = "2026-05-28T04:44:59Z";
49722
- var LATEST_PR = 1948;
49824
+ var VERSION = "0.14.0";
49825
+ var COMMIT_SHA = "d7cd6faa";
49826
+ var COMMIT_DATE = "2026-05-28T06:28:21Z";
49827
+ var LATEST_PR = 1954;
49723
49828
  var COMMITS_AHEAD_OF_TAG = 0;
49724
49829
 
49725
49830
  // gateway/boot-version.ts
@@ -53694,7 +53799,7 @@ async function drainActivitySummary(turn) {
53694
53799
  const target = turn.activityPendingRender;
53695
53800
  if (target == null)
53696
53801
  break;
53697
- const html = `<i>${target}</i>`;
53802
+ const html = `<i>${escapeHtmlForTg(target)}</i>`;
53698
53803
  const chat = turn.sessionChatId;
53699
53804
  const thread = turn.sessionThreadId;
53700
53805
  const useDraft = turn.isDm && thread == null && sendMessageDraftFn != null;
@@ -53863,8 +53968,8 @@ function handleSessionEvent(ev) {
53863
53968
  clearActivitySummary(turn);
53864
53969
  }
53865
53970
  }
53866
- if (!DRAFT_MIRROR_ENABLED && !turn.replyCalled && !isTelegramSurfaceTool(name)) {
53867
- const rendered = registerAndRender(turn.toolActivity, name);
53971
+ if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
53972
+ const rendered = DRAFT_MIRROR_ENABLED ? describeToolUse(name, ev.input) : registerAndRender(turn.toolActivity, name);
53868
53973
  if (rendered != null) {
53869
53974
  turn.activityPendingRender = rendered;
53870
53975
  if (turn.activityInFlight == null) {
@@ -53891,7 +53996,7 @@ function handleSessionEvent(ev) {
53891
53996
  chatId: turn.sessionChatId,
53892
53997
  isPrivateChat: turn.isDm,
53893
53998
  threadId: turn.sessionThreadId,
53894
- ...DRAFT_MIRROR_ENABLED ? { sendMessageDraft: sendMessageDraftFn } : ANSWER_STREAM_VISIBLE_ENABLED ? { minInitialChars: 1 } : { sendMessageDraft: sendMessageDraftFn },
53999
+ ...ANSWER_STREAM_VISIBLE_ENABLED ? { minInitialChars: 1 } : { sendMessageDraft: sendMessageDraftFn },
53895
54000
  sendMessage: async (chatId, text, params) => {
53896
54001
  const tid = params?.message_thread_id;
53897
54002
  const silent = params?.purpose !== "materialize";
@@ -57,6 +57,7 @@ import { allocateDraftId } from '../draft-transport.js'
57
57
  import {
58
58
  makeEmptyActivityState,
59
59
  registerAndRender,
60
+ describeToolUse,
60
61
  type ActivityState,
61
62
  } from '../tool-activity-summary.js'
62
63
  import { toolLabel } from '../tool-labels.js'
@@ -6837,7 +6838,12 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
6837
6838
  while (turn.activityPendingRender !== turn.activityLastSentRender) {
6838
6839
  const target = turn.activityPendingRender
6839
6840
  if (target == null) break
6840
- const html = `<i>${target}</i>`
6841
+ // Escape before wrapping in <i> + parse_mode HTML. The legacy
6842
+ // verb-count summaries were safe ASCII, but the draft-mirror's
6843
+ // describeToolUse content (file names, Bash descriptions, search
6844
+ // queries) can contain <, >, & — which would break HTML parsing
6845
+ // and surface literal tags (the exact #1942 bug class).
6846
+ const html = `<i>${escapeHtmlForTg(target)}</i>`
6841
6847
  const chat = turn.sessionChatId
6842
6848
  const thread = turn.sessionThreadId
6843
6849
  // sendMessageDraft doesn't support forum threads.
@@ -7130,14 +7136,21 @@ function handleSessionEvent(ev: SessionEvent): void {
7130
7136
  // exactly once at a time and re-running until pending matches
7131
7137
  // the last-sent. Captures `turn` so a late drain after turn-swap
7132
7138
  // can't corrupt the next turn's atom.
7133
- // DRAFT_MIRROR (RFC draft-mirror-preview, Phase 1): the model's
7134
- // prose narration owns the single per-chat draft slot. Suppress
7135
- // the activity-summary tool-count draft so the two don't collide
7136
- // (Telegram shows one draft per chat the later write clobbers
7137
- // the earlier). The activity-summary code stays intact for the
7138
- // kill-switch path; it's retired for good only in Phase 4.
7139
- if (!DRAFT_MIRROR_ENABLED && !turn.replyCalled && !isTelegramSurfaceTool(name)) {
7140
- const rendered = registerAndRender(turn.toolActivity, name)
7139
+ // DRAFT_MIRROR (RFC draft-mirror-preview): render each tool_use as a
7140
+ // human-friendly line in the live preview, using the model-authored
7141
+ // descriptive field (Bash.description, Read/Edit file basename,
7142
+ // hindsight→"Searching memory", etc.see describeToolUse). Latest
7143
+ // action wins (the draft shows "doing X" live), clears on reply.
7144
+ // Never surfaces raw shell/query syntax option A, uniform across
7145
+ // code + non-code agents.
7146
+ //
7147
+ // Flag OFF (default): the legacy generic verb-count summary
7148
+ // ("Ran 5 commands") via registerAndRender — byte-identical to
7149
+ // pre-draft-mirror behavior.
7150
+ if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
7151
+ const rendered = DRAFT_MIRROR_ENABLED
7152
+ ? describeToolUse(name, ev.input)
7153
+ : registerAndRender(turn.toolActivity, name)
7141
7154
  if (rendered != null) {
7142
7155
  turn.activityPendingRender = rendered
7143
7156
  if (turn.activityInFlight == null) {
@@ -7185,19 +7198,19 @@ function handleSessionEvent(ev: SessionEvent): void {
7185
7198
  isPrivateChat: turn.isDm,
7186
7199
  threadId: turn.sessionThreadId,
7187
7200
  // Transport selection:
7188
- // - DRAFT_MIRROR (RFC draft-mirror-preview, Phase 1): force
7189
- // the ephemeral compose-area draft so narration is a
7190
- // clears-on-reply preview. Wins over visible-answer-stream.
7191
- // No-reply delivery is owned by turn-flush, not materialize.
7192
- // - else #869-Phase1 visible-answer-stream: omit the draft
7193
- // API so the lane edits a user-visible chat-timeline
7194
- // message (minInitialChars:1 opens it on the first chunk).
7195
- // - else legacy: draft transport.
7196
- ...(DRAFT_MIRROR_ENABLED
7197
- ? { sendMessageDraft: sendMessageDraftFn }
7198
- : ANSWER_STREAM_VISIBLE_ENABLED
7199
- ? { minInitialChars: 1 }
7200
- : { sendMessageDraft: sendMessageDraftFn }),
7201
+ // #869-Phase1 visible-answer-stream: omit the draft API so
7202
+ // the lane edits a user-visible chat-timeline message
7203
+ // (minInitialChars:1 opens it on the first chunk). The
7204
+ // draft-mirror does NOT touch this lane the canary proved
7205
+ // the model emits almost no interstitial assistant.text
7206
+ // (it thinks→tool→reply), so routing it to the draft just
7207
+ // emptied the preview. The draft-mirror instead renders the
7208
+ // tool_use stream (case 'tool_use' above) where the real
7209
+ // signal lives. assistant.text keeps its visible-message
7210
+ // home; the reply tool stays the canonical answer.
7211
+ ...(ANSWER_STREAM_VISIBLE_ENABLED
7212
+ ? { minInitialChars: 1 }
7213
+ : { sendMessageDraft: sendMessageDraftFn }),
7201
7214
  // #1075: route through robustApiCall so flood-wait,
7202
7215
  // benign-400, and THREAD_NOT_FOUND are handled uniformly
7203
7216
  // instead of crashing the answer-stream loop on a deleted
@@ -5,8 +5,74 @@ import {
5
5
  formatSummary,
6
6
  registerAndRender,
7
7
  verbForTool,
8
+ describeToolUse,
8
9
  } from "../tool-activity-summary.js";
9
10
 
11
+ describe("describeToolUse — friendly per-tool rendering (draft-mirror)", () => {
12
+ it("Bash uses the model-authored description verbatim, never the command", () => {
13
+ expect(
14
+ describeToolUse("Bash", { command: "ls -la /tmp", description: "List workspace" }),
15
+ ).toBe("List workspace");
16
+ // No description → safe generic, still never the raw command.
17
+ expect(describeToolUse("Bash", { command: "grep -r foo ." })).toBe("Running a command");
18
+ });
19
+
20
+ it("Read/Edit/Write render the file basename, not the full path", () => {
21
+ expect(describeToolUse("Read", { file_path: "/home/ken/code/switchroom/gateway.ts" })).toBe(
22
+ "Reading gateway.ts",
23
+ );
24
+ expect(describeToolUse("Edit", { file_path: "/a/b/CLAUDE.md" })).toBe("Editing CLAUDE.md");
25
+ expect(describeToolUse("Write", { file_path: "notes.txt" })).toBe("Writing notes.txt");
26
+ expect(describeToolUse("Read", {})).toBe("Reading a file");
27
+ });
28
+
29
+ it("Grep/Glob show the pattern; WebFetch shows the hostname", () => {
30
+ expect(describeToolUse("Grep", { pattern: "TODO" })).toBe("Searching for TODO");
31
+ expect(describeToolUse("WebFetch", { url: "https://www.example.com/path?q=1" })).toBe(
32
+ "Reading example.com",
33
+ );
34
+ expect(describeToolUse("WebSearch", { query: "best running shoes" })).toBe(
35
+ "Searching the web for best running shoes",
36
+ );
37
+ });
38
+
39
+ it("Task/Agent surface the sub-agent task description", () => {
40
+ expect(describeToolUse("Task", { description: "Review the migration" })).toBe(
41
+ "Delegating: Review the migration",
42
+ );
43
+ });
44
+
45
+ it("domain MCP tools render human-meaningful labels (no jargon)", () => {
46
+ expect(describeToolUse("mcp__hindsight__reflect", { query: "x" })).toBe("Searching memory");
47
+ expect(describeToolUse("mcp__hindsight__retain", {})).toBe("Saving to memory");
48
+ expect(describeToolUse("mcp__claude_ai_Google_Calendar__list_events", {})).toBe(
49
+ "Checking your calendar",
50
+ );
51
+ expect(describeToolUse("mcp__claude_ai_Gmail__search", {})).toBe("Checking your email");
52
+ expect(describeToolUse("mcp__claude_ai_Google_Drive__search_files", {})).toBe(
53
+ "Looking through your files",
54
+ );
55
+ expect(describeToolUse("mcp__claude_ai_Notion__notion-search", {})).toBe("Checking your notes");
56
+ });
57
+
58
+ it("surface tools (reply/stream_reply) return null — never mirrored", () => {
59
+ expect(describeToolUse("mcp__switchroom-telegram__reply", { text: "hi" })).toBeNull();
60
+ expect(describeToolUse("mcp__switchroom-telegram__stream_reply", {})).toBeNull();
61
+ });
62
+
63
+ it("unknown MCP tool prefers a model-authored field, else humanizes the name", () => {
64
+ expect(describeToolUse("mcp__acme__do_thing", { description: "Fetched the report" })).toBe(
65
+ "Fetched the report",
66
+ );
67
+ expect(describeToolUse("mcp__acme__do_thing", {})).toBe("Using do thing");
68
+ });
69
+
70
+ it("unknown built-in falls back to a generic working line, never raw syntax", () => {
71
+ expect(describeToolUse("SomeFutureTool", {})).toBe("Working…");
72
+ expect(describeToolUse("", {})).toBeNull();
73
+ });
74
+ });
75
+
10
76
  describe("verbForTool — tool name → past-tense verb", () => {
11
77
  it("maps standard CLI tools to readable verbs", () => {
12
78
  expect(verbForTool("Read")).toBe("read");
@@ -198,3 +198,140 @@ export function registerAndRender(
198
198
  if (!changed) return null;
199
199
  return formatSummary(state);
200
200
  }
201
+
202
+ // ─── Friendly per-tool rendering (draft-mirror, RFC draft-mirror-preview) ───
203
+ //
204
+ // Claude Code's own UI reads human-friendly because the model AUTHORS the
205
+ // descriptive text inside each tool_use.input — verified against a real
206
+ // session JSONL (1360 Bash calls etc.):
207
+ // Bash → input.description ("Get CLAUDE.md size and recent history")
208
+ // Read → input.file_path (basename → "Reading CLAUDE.md")
209
+ // Edit/Write → input.file_path (basename)
210
+ // Grep/Glob → input.pattern
211
+ // Task/Agent → input.description (the sub-agent's task)
212
+ // WebFetch → input.url (hostname → "Reading example.com")
213
+ // hindsight → friendly label ("Searching memory")
214
+ // There is never a raw `grep`/`jq`/`ls` to surface — only the model's own
215
+ // plain-English description or a domain label. This is the signal the
216
+ // draft-mirror renders (option A: uniform across code + non-code agents).
217
+
218
+ /** Strip a path to its basename for display. */
219
+ function baseName(p: unknown): string | null {
220
+ if (typeof p !== "string" || p.length === 0) return null;
221
+ const parts = p.split("/").filter(Boolean);
222
+ return parts.length > 0 ? parts[parts.length - 1] : p;
223
+ }
224
+
225
+ /** Extract a bare hostname from a URL for display (no scheme/path). */
226
+ function hostName(u: unknown): string | null {
227
+ if (typeof u !== "string" || u.length === 0) return null;
228
+ try {
229
+ return new URL(u).hostname.replace(/^www\./, "");
230
+ } catch {
231
+ return u.replace(/^https?:\/\//, "").split("/")[0] || null;
232
+ }
233
+ }
234
+
235
+ function clip(s: unknown, n: number): string | null {
236
+ if (typeof s !== "string") return null;
237
+ const t = s.trim();
238
+ if (t.length === 0) return null;
239
+ return t.length > n ? t.slice(0, n - 1) + "…" : t;
240
+ }
241
+
242
+ /**
243
+ * Render a single tool_use into a human-friendly, present-tense activity
244
+ * line for the live draft preview — or null when the tool should NOT be
245
+ * surfaced (the Telegram-plugin surface tools, which ARE the conversation).
246
+ *
247
+ * Leads with the model-authored descriptive field per the map above; falls
248
+ * back to a domain label, then to a humanized tool name. Never emits raw
249
+ * shell/query syntax.
250
+ */
251
+ export function describeToolUse(
252
+ toolName: string,
253
+ input: Record<string, unknown> | undefined,
254
+ ): string | null {
255
+ if (!toolName) return null;
256
+ const inp = input ?? {};
257
+
258
+ const mcpMatch = /^mcp__(.+?)__(.+)$/.exec(toolName);
259
+ if (mcpMatch) {
260
+ const server = mcpMatch[1].toLowerCase();
261
+ const tool = mcpMatch[2].toLowerCase();
262
+ // Surface tools ARE the conversation — never mirror them.
263
+ if (server === "switchroom-telegram") return null;
264
+ if (server === "hindsight") {
265
+ if (tool === "recall" || tool === "reflect") return "Searching memory";
266
+ if (tool === "retain" || tool === "update_memory" || tool === "sync_retain")
267
+ return "Saving to memory";
268
+ return "Working with memory";
269
+ }
270
+ if (
271
+ server === "google-workspace" ||
272
+ server === "claude_ai_google_calendar"
273
+ ) {
274
+ return "Checking your calendar";
275
+ }
276
+ if (server === "claude_ai_gmail") return "Checking your email";
277
+ if (server === "claude_ai_google_drive") return "Looking through your files";
278
+ if (server === "notion" || server === "claude_ai_notion") {
279
+ return "Checking your notes";
280
+ }
281
+ // Unknown MCP tool: prefer a model-authored field, else a humanized name.
282
+ const desc = clip(inp.description, 60) ?? clip(inp.query, 50) ?? clip(inp.title, 50);
283
+ if (desc) return desc;
284
+ return "Using " + tool.replace(/[-_]+/g, " ");
285
+ }
286
+
287
+ switch (toolName) {
288
+ case "Bash": {
289
+ // The model writes a plain-English description for every command.
290
+ return clip(inp.description, 70) ?? "Running a command";
291
+ }
292
+ case "BashOutput":
293
+ case "KillShell":
294
+ return "Managing a background command";
295
+ case "Read": {
296
+ const f = baseName(inp.file_path);
297
+ return f ? `Reading ${f}` : "Reading a file";
298
+ }
299
+ case "Edit":
300
+ case "MultiEdit":
301
+ case "NotebookEdit": {
302
+ const f = baseName(inp.file_path) ?? baseName(inp.notebook_path);
303
+ return f ? `Editing ${f}` : "Editing a file";
304
+ }
305
+ case "Write": {
306
+ const f = baseName(inp.file_path);
307
+ return f ? `Writing ${f}` : "Writing a file";
308
+ }
309
+ case "Grep":
310
+ case "Glob": {
311
+ const p = clip(inp.pattern, 40);
312
+ return p ? `Searching for ${p}` : "Searching files";
313
+ }
314
+ case "WebFetch": {
315
+ const h = hostName(inp.url);
316
+ return h ? `Reading ${h}` : "Reading a web page";
317
+ }
318
+ case "WebSearch": {
319
+ const q = clip(inp.query, 50);
320
+ return q ? `Searching the web for ${q}` : "Searching the web";
321
+ }
322
+ case "Task":
323
+ case "Agent": {
324
+ const d = clip(inp.description, 60);
325
+ return d ? `Delegating: ${d}` : "Delegating to a sub-agent";
326
+ }
327
+ case "TodoWrite":
328
+ case "TaskCreate":
329
+ case "TaskUpdate":
330
+ case "TaskList":
331
+ return "Updating the plan";
332
+ case "ToolSearch":
333
+ return "Finding the right tool";
334
+ default:
335
+ return "Working…";
336
+ }
337
+ }
@@ -0,0 +1,115 @@
1
+ /**
2
+ * JTBD scenario — the agent fetches the web via webkite, transparently.
3
+ *
4
+ * Validates the v0.13.62/63 webkite rollout end-to-end through real
5
+ * Telegram: the user sends a URL and asks about its content WITHOUT
6
+ * ever naming "webkite". The agent must:
7
+ *
8
+ * 1. Reach for webkite on its own (the native WebFetch/WebSearch
9
+ * tools are denied fleet-wide — see scaffold.ts
10
+ * WEBKITE_FLEET_DENY_TOOLS — so the ONLY way the agent can answer
11
+ * a "read this URL" prompt is via the webkite_* MCP tools). If the
12
+ * agent returns the page's content, webkite did the work by
13
+ * construction — there is no other web-fetch tool available.
14
+ *
15
+ * 2. Render JavaScript. The target is `quotes.toscrape.com/js/`, a
16
+ * purpose-built scraping-practice SPA whose quotes are injected by
17
+ * JS at runtime. A raw HTTP fetch (what the old WebFetch did) sees
18
+ * an empty page — `curl` returns zero `class="quote"` nodes. Only
19
+ * a JS-executing renderer (webkite → cloakbrowser headless
20
+ * Chromium) produces the visible quote text. So a correct quote in
21
+ * the reply is positive proof that JS rendering happened.
22
+ *
23
+ * The first quote on that page is Einstein's "The world as we have
24
+ * created it is a process of our thinking…". We assert the reply names
25
+ * Einstein AND carries a recognizable fragment of that quote.
26
+ *
27
+ * ## What this catches that other UATs don't
28
+ *
29
+ * - `jtbd-fast-trivial-dm` proves the agent replies fast, but never
30
+ * touches a tool. This is the first UAT that forces a real web fetch.
31
+ * - The in-container `webkite read` smoke proves the binary works, but
32
+ * not that the *model* chooses webkite unprompted over a denied
33
+ * WebFetch, nor that the full inbound→claude→MCP→outbound path works.
34
+ *
35
+ * ## Failure modes this guards against
36
+ *
37
+ * - A regression that re-enables WebFetch (the model might fetch raw
38
+ * HTML and miss the JS-rendered quotes → wrong/empty answer).
39
+ * - webkite MCP not wired / not trusted (agent says it can't browse).
40
+ * - cloakbrowser broken (agent returns the empty static page → no
41
+ * quote, or a "page had no content" apology).
42
+ * - The glibc regression that the v0.13.62 canary caught (webkite
43
+ * dead-on-arrival → agent can't browse at all).
44
+ */
45
+
46
+ import { describe, it, expect } from "vitest";
47
+ import { spinUp } from "../harness.js";
48
+
49
+ const AGENT = "test-harness";
50
+
51
+ // JS-rendered scraping-practice page. Quotes exist ONLY after JS runs;
52
+ // a raw fetch sees none. Stable, purpose-built, no auth.
53
+ const JS_URL = "https://quotes.toscrape.com/js/";
54
+
55
+ // Deliberately does NOT mention webkite, fetch, browser, or any tool —
56
+ // a natural "read this for me" ask. The agent must pick the tool.
57
+ const PROMPT =
58
+ `Open ${JS_URL} and tell me the exact text of the very first quote ` +
59
+ `on the page and who said it. Just the quote and the author.`;
60
+
61
+ // The first quote's author + a distinctive fragment of its text.
62
+ const EXPECTED_AUTHOR = /einstein/i;
63
+ const EXPECTED_FRAGMENT =
64
+ /world as we have created it|process of our thinking|changing our thinking/i;
65
+
66
+ // Phrases that would indicate the agent FAILED to browse (fell back to
67
+ // "I can't access the web" or got the empty static page).
68
+ const CANT_BROWSE = [
69
+ /can.?t (access|browse|open|reach|fetch)/i,
70
+ /unable to (access|browse|open|reach|fetch)/i,
71
+ /no content|empty page|couldn.?t (find|load)/i,
72
+ /don.?t have (web|internet|browsing)/i,
73
+ ];
74
+
75
+ describe("uat: agent fetches the web via webkite (JS page, unprompted)", () => {
76
+ it(
77
+ "URL prompt → agent returns JS-rendered content (proves webkite + cloakbrowser)",
78
+ async () => {
79
+ const sc = await spinUp({ agent: AGENT });
80
+ try {
81
+ await sc.sendDM(PROMPT);
82
+
83
+ // Generous budget: a real cloakbrowser render of an SPA is
84
+ // slower than a trivial reply (Chromium spawn + JS execution).
85
+ const reply = await sc.expectMessage(EXPECTED_FRAGMENT, {
86
+ from: "bot",
87
+ timeout: 90_000,
88
+ });
89
+
90
+ // Positive proof: the JS-gated quote text came back.
91
+ expect(reply.text).toMatch(EXPECTED_FRAGMENT);
92
+ // And the author — confirms it parsed the actual quote, not noise.
93
+ expect(reply.text).toMatch(EXPECTED_AUTHOR);
94
+
95
+ // Negative proof: no "I can't browse" fallback. (WebFetch is
96
+ // denied, so a failure to use webkite surfaces as an apology,
97
+ // not a wrong fetch.)
98
+ const failedToBrowse = CANT_BROWSE.some((re) => re.test(reply.text));
99
+ expect(
100
+ failedToBrowse,
101
+ `agent reply looks like a can't-browse fallback: ${JSON.stringify(reply.text.slice(0, 300))}`,
102
+ ).toBe(false);
103
+
104
+ console.log(
105
+ `[webkite-read] agent returned JS-rendered quote via webkite — ` +
106
+ `WebFetch denied, cloakbrowser rendered the SPA. ` +
107
+ `reply: ${JSON.stringify(reply.text.slice(0, 200))}`,
108
+ );
109
+ } finally {
110
+ await sc.tearDown();
111
+ }
112
+ },
113
+ 120_000,
114
+ );
115
+ });