switchroom 0.13.64 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/switchroom.js +487 -285
- package/package.json +1 -1
- package/telegram-plugin/dist/gateway/gateway.js +113 -8
- package/telegram-plugin/gateway/gateway.ts +35 -22
- package/telegram-plugin/tests/tool-activity-summary.test.ts +66 -0
- package/telegram-plugin/tool-activity-summary.ts +137 -0
- package/telegram-plugin/uat/scenarios/jtbd-webkite-read-dm.test.ts +115 -0
package/package.json
CHANGED
|
@@ -31685,6 +31685,111 @@ function registerAndRender(state, toolName) {
|
|
|
31685
31685
|
return null;
|
|
31686
31686
|
return formatSummary(state);
|
|
31687
31687
|
}
|
|
31688
|
+
function baseName(p) {
|
|
31689
|
+
if (typeof p !== "string" || p.length === 0)
|
|
31690
|
+
return null;
|
|
31691
|
+
const parts = p.split("/").filter(Boolean);
|
|
31692
|
+
return parts.length > 0 ? parts[parts.length - 1] : p;
|
|
31693
|
+
}
|
|
31694
|
+
function hostName(u) {
|
|
31695
|
+
if (typeof u !== "string" || u.length === 0)
|
|
31696
|
+
return null;
|
|
31697
|
+
try {
|
|
31698
|
+
return new URL(u).hostname.replace(/^www\./, "");
|
|
31699
|
+
} catch {
|
|
31700
|
+
return u.replace(/^https?:\/\//, "").split("/")[0] || null;
|
|
31701
|
+
}
|
|
31702
|
+
}
|
|
31703
|
+
function clip(s, n) {
|
|
31704
|
+
if (typeof s !== "string")
|
|
31705
|
+
return null;
|
|
31706
|
+
const t = s.trim();
|
|
31707
|
+
if (t.length === 0)
|
|
31708
|
+
return null;
|
|
31709
|
+
return t.length > n ? t.slice(0, n - 1) + "\u2026" : t;
|
|
31710
|
+
}
|
|
31711
|
+
function describeToolUse(toolName, input) {
|
|
31712
|
+
if (!toolName)
|
|
31713
|
+
return null;
|
|
31714
|
+
const inp = input ?? {};
|
|
31715
|
+
const mcpMatch = /^mcp__(.+?)__(.+)$/.exec(toolName);
|
|
31716
|
+
if (mcpMatch) {
|
|
31717
|
+
const server = mcpMatch[1].toLowerCase();
|
|
31718
|
+
const tool = mcpMatch[2].toLowerCase();
|
|
31719
|
+
if (server === "switchroom-telegram")
|
|
31720
|
+
return null;
|
|
31721
|
+
if (server === "hindsight") {
|
|
31722
|
+
if (tool === "recall" || tool === "reflect")
|
|
31723
|
+
return "Searching memory";
|
|
31724
|
+
if (tool === "retain" || tool === "update_memory" || tool === "sync_retain")
|
|
31725
|
+
return "Saving to memory";
|
|
31726
|
+
return "Working with memory";
|
|
31727
|
+
}
|
|
31728
|
+
if (server === "google-workspace" || server === "claude_ai_google_calendar") {
|
|
31729
|
+
return "Checking your calendar";
|
|
31730
|
+
}
|
|
31731
|
+
if (server === "claude_ai_gmail")
|
|
31732
|
+
return "Checking your email";
|
|
31733
|
+
if (server === "claude_ai_google_drive")
|
|
31734
|
+
return "Looking through your files";
|
|
31735
|
+
if (server === "notion" || server === "claude_ai_notion") {
|
|
31736
|
+
return "Checking your notes";
|
|
31737
|
+
}
|
|
31738
|
+
const desc = clip(inp.description, 60) ?? clip(inp.query, 50) ?? clip(inp.title, 50);
|
|
31739
|
+
if (desc)
|
|
31740
|
+
return desc;
|
|
31741
|
+
return "Using " + tool.replace(/[-_]+/g, " ");
|
|
31742
|
+
}
|
|
31743
|
+
switch (toolName) {
|
|
31744
|
+
case "Bash": {
|
|
31745
|
+
return clip(inp.description, 70) ?? "Running a command";
|
|
31746
|
+
}
|
|
31747
|
+
case "BashOutput":
|
|
31748
|
+
case "KillShell":
|
|
31749
|
+
return "Managing a background command";
|
|
31750
|
+
case "Read": {
|
|
31751
|
+
const f = baseName(inp.file_path);
|
|
31752
|
+
return f ? `Reading ${f}` : "Reading a file";
|
|
31753
|
+
}
|
|
31754
|
+
case "Edit":
|
|
31755
|
+
case "MultiEdit":
|
|
31756
|
+
case "NotebookEdit": {
|
|
31757
|
+
const f = baseName(inp.file_path) ?? baseName(inp.notebook_path);
|
|
31758
|
+
return f ? `Editing ${f}` : "Editing a file";
|
|
31759
|
+
}
|
|
31760
|
+
case "Write": {
|
|
31761
|
+
const f = baseName(inp.file_path);
|
|
31762
|
+
return f ? `Writing ${f}` : "Writing a file";
|
|
31763
|
+
}
|
|
31764
|
+
case "Grep":
|
|
31765
|
+
case "Glob": {
|
|
31766
|
+
const p = clip(inp.pattern, 40);
|
|
31767
|
+
return p ? `Searching for ${p}` : "Searching files";
|
|
31768
|
+
}
|
|
31769
|
+
case "WebFetch": {
|
|
31770
|
+
const h = hostName(inp.url);
|
|
31771
|
+
return h ? `Reading ${h}` : "Reading a web page";
|
|
31772
|
+
}
|
|
31773
|
+
case "WebSearch": {
|
|
31774
|
+
const q = clip(inp.query, 50);
|
|
31775
|
+
return q ? `Searching the web for ${q}` : "Searching the web";
|
|
31776
|
+
}
|
|
31777
|
+
case "Task":
|
|
31778
|
+
case "Agent": {
|
|
31779
|
+
const d = clip(inp.description, 60);
|
|
31780
|
+
return d ? `Delegating: ${d}` : "Delegating to a sub-agent";
|
|
31781
|
+
}
|
|
31782
|
+
case "TodoWrite":
|
|
31783
|
+
case "TaskCreate":
|
|
31784
|
+
case "TaskUpdate":
|
|
31785
|
+
case "TaskList":
|
|
31786
|
+
return "Updating the plan";
|
|
31787
|
+
case "ToolSearch":
|
|
31788
|
+
return "Finding the right tool";
|
|
31789
|
+
default:
|
|
31790
|
+
return "Working\u2026";
|
|
31791
|
+
}
|
|
31792
|
+
}
|
|
31688
31793
|
|
|
31689
31794
|
// tool-labels.ts
|
|
31690
31795
|
var MAX_LABEL_CHARS = 60;
|
|
@@ -49716,10 +49821,10 @@ function sweepStaleTurnActiveMarker(stateDir, opts) {
|
|
|
49716
49821
|
}
|
|
49717
49822
|
|
|
49718
49823
|
// ../src/build-info.ts
|
|
49719
|
-
var VERSION = "0.
|
|
49720
|
-
var COMMIT_SHA = "
|
|
49721
|
-
var COMMIT_DATE = "2026-05-
|
|
49722
|
-
var LATEST_PR =
|
|
49824
|
+
var VERSION = "0.14.0";
|
|
49825
|
+
var COMMIT_SHA = "d7cd6faa";
|
|
49826
|
+
var COMMIT_DATE = "2026-05-28T06:28:21Z";
|
|
49827
|
+
var LATEST_PR = 1954;
|
|
49723
49828
|
var COMMITS_AHEAD_OF_TAG = 0;
|
|
49724
49829
|
|
|
49725
49830
|
// gateway/boot-version.ts
|
|
@@ -53694,7 +53799,7 @@ async function drainActivitySummary(turn) {
|
|
|
53694
53799
|
const target = turn.activityPendingRender;
|
|
53695
53800
|
if (target == null)
|
|
53696
53801
|
break;
|
|
53697
|
-
const html = `<i>${target}</i>`;
|
|
53802
|
+
const html = `<i>${escapeHtmlForTg(target)}</i>`;
|
|
53698
53803
|
const chat = turn.sessionChatId;
|
|
53699
53804
|
const thread = turn.sessionThreadId;
|
|
53700
53805
|
const useDraft = turn.isDm && thread == null && sendMessageDraftFn != null;
|
|
@@ -53863,8 +53968,8 @@ function handleSessionEvent(ev) {
|
|
|
53863
53968
|
clearActivitySummary(turn);
|
|
53864
53969
|
}
|
|
53865
53970
|
}
|
|
53866
|
-
if (!
|
|
53867
|
-
const rendered = registerAndRender(turn.toolActivity, name);
|
|
53971
|
+
if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
|
|
53972
|
+
const rendered = DRAFT_MIRROR_ENABLED ? describeToolUse(name, ev.input) : registerAndRender(turn.toolActivity, name);
|
|
53868
53973
|
if (rendered != null) {
|
|
53869
53974
|
turn.activityPendingRender = rendered;
|
|
53870
53975
|
if (turn.activityInFlight == null) {
|
|
@@ -53891,7 +53996,7 @@ function handleSessionEvent(ev) {
|
|
|
53891
53996
|
chatId: turn.sessionChatId,
|
|
53892
53997
|
isPrivateChat: turn.isDm,
|
|
53893
53998
|
threadId: turn.sessionThreadId,
|
|
53894
|
-
...
|
|
53999
|
+
...ANSWER_STREAM_VISIBLE_ENABLED ? { minInitialChars: 1 } : { sendMessageDraft: sendMessageDraftFn },
|
|
53895
54000
|
sendMessage: async (chatId, text, params) => {
|
|
53896
54001
|
const tid = params?.message_thread_id;
|
|
53897
54002
|
const silent = params?.purpose !== "materialize";
|
|
@@ -57,6 +57,7 @@ import { allocateDraftId } from '../draft-transport.js'
|
|
|
57
57
|
import {
|
|
58
58
|
makeEmptyActivityState,
|
|
59
59
|
registerAndRender,
|
|
60
|
+
describeToolUse,
|
|
60
61
|
type ActivityState,
|
|
61
62
|
} from '../tool-activity-summary.js'
|
|
62
63
|
import { toolLabel } from '../tool-labels.js'
|
|
@@ -6837,7 +6838,12 @@ async function drainActivitySummary(turn: CurrentTurn): Promise<void> {
|
|
|
6837
6838
|
while (turn.activityPendingRender !== turn.activityLastSentRender) {
|
|
6838
6839
|
const target = turn.activityPendingRender
|
|
6839
6840
|
if (target == null) break
|
|
6840
|
-
|
|
6841
|
+
// Escape before wrapping in <i> + parse_mode HTML. The legacy
|
|
6842
|
+
// verb-count summaries were safe ASCII, but the draft-mirror's
|
|
6843
|
+
// describeToolUse content (file names, Bash descriptions, search
|
|
6844
|
+
// queries) can contain <, >, & — which would break HTML parsing
|
|
6845
|
+
// and surface literal tags (the exact #1942 bug class).
|
|
6846
|
+
const html = `<i>${escapeHtmlForTg(target)}</i>`
|
|
6841
6847
|
const chat = turn.sessionChatId
|
|
6842
6848
|
const thread = turn.sessionThreadId
|
|
6843
6849
|
// sendMessageDraft doesn't support forum threads.
|
|
@@ -7130,14 +7136,21 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
7130
7136
|
// exactly once at a time and re-running until pending matches
|
|
7131
7137
|
// the last-sent. Captures `turn` so a late drain after turn-swap
|
|
7132
7138
|
// can't corrupt the next turn's atom.
|
|
7133
|
-
// DRAFT_MIRROR (RFC draft-mirror-preview
|
|
7134
|
-
//
|
|
7135
|
-
//
|
|
7136
|
-
//
|
|
7137
|
-
// the
|
|
7138
|
-
//
|
|
7139
|
-
|
|
7140
|
-
|
|
7139
|
+
// DRAFT_MIRROR (RFC draft-mirror-preview): render each tool_use as a
|
|
7140
|
+
// human-friendly line in the live preview, using the model-authored
|
|
7141
|
+
// descriptive field (Bash.description, Read/Edit file basename,
|
|
7142
|
+
// hindsight→"Searching memory", etc. — see describeToolUse). Latest
|
|
7143
|
+
// action wins (the draft shows "doing X" live), clears on reply.
|
|
7144
|
+
// Never surfaces raw shell/query syntax — option A, uniform across
|
|
7145
|
+
// code + non-code agents.
|
|
7146
|
+
//
|
|
7147
|
+
// Flag OFF (default): the legacy generic verb-count summary
|
|
7148
|
+
// ("Ran 5 commands") via registerAndRender — byte-identical to
|
|
7149
|
+
// pre-draft-mirror behavior.
|
|
7150
|
+
if (!turn.replyCalled && !isTelegramSurfaceTool(name)) {
|
|
7151
|
+
const rendered = DRAFT_MIRROR_ENABLED
|
|
7152
|
+
? describeToolUse(name, ev.input)
|
|
7153
|
+
: registerAndRender(turn.toolActivity, name)
|
|
7141
7154
|
if (rendered != null) {
|
|
7142
7155
|
turn.activityPendingRender = rendered
|
|
7143
7156
|
if (turn.activityInFlight == null) {
|
|
@@ -7185,19 +7198,19 @@ function handleSessionEvent(ev: SessionEvent): void {
|
|
|
7185
7198
|
isPrivateChat: turn.isDm,
|
|
7186
7199
|
threadId: turn.sessionThreadId,
|
|
7187
7200
|
// Transport selection:
|
|
7188
|
-
// -
|
|
7189
|
-
//
|
|
7190
|
-
//
|
|
7191
|
-
//
|
|
7192
|
-
//
|
|
7193
|
-
//
|
|
7194
|
-
//
|
|
7195
|
-
//
|
|
7196
|
-
|
|
7197
|
-
|
|
7198
|
-
|
|
7199
|
-
|
|
7200
|
-
|
|
7201
|
+
// #869-Phase1 visible-answer-stream: omit the draft API so
|
|
7202
|
+
// the lane edits a user-visible chat-timeline message
|
|
7203
|
+
// (minInitialChars:1 opens it on the first chunk). The
|
|
7204
|
+
// draft-mirror does NOT touch this lane — the canary proved
|
|
7205
|
+
// the model emits almost no interstitial assistant.text
|
|
7206
|
+
// (it thinks→tool→reply), so routing it to the draft just
|
|
7207
|
+
// emptied the preview. The draft-mirror instead renders the
|
|
7208
|
+
// tool_use stream (case 'tool_use' above) where the real
|
|
7209
|
+
// signal lives. assistant.text keeps its visible-message
|
|
7210
|
+
// home; the reply tool stays the canonical answer.
|
|
7211
|
+
...(ANSWER_STREAM_VISIBLE_ENABLED
|
|
7212
|
+
? { minInitialChars: 1 }
|
|
7213
|
+
: { sendMessageDraft: sendMessageDraftFn }),
|
|
7201
7214
|
// #1075: route through robustApiCall so flood-wait,
|
|
7202
7215
|
// benign-400, and THREAD_NOT_FOUND are handled uniformly
|
|
7203
7216
|
// instead of crashing the answer-stream loop on a deleted
|
|
@@ -5,8 +5,74 @@ import {
|
|
|
5
5
|
formatSummary,
|
|
6
6
|
registerAndRender,
|
|
7
7
|
verbForTool,
|
|
8
|
+
describeToolUse,
|
|
8
9
|
} from "../tool-activity-summary.js";
|
|
9
10
|
|
|
11
|
+
describe("describeToolUse — friendly per-tool rendering (draft-mirror)", () => {
|
|
12
|
+
it("Bash uses the model-authored description verbatim, never the command", () => {
|
|
13
|
+
expect(
|
|
14
|
+
describeToolUse("Bash", { command: "ls -la /tmp", description: "List workspace" }),
|
|
15
|
+
).toBe("List workspace");
|
|
16
|
+
// No description → safe generic, still never the raw command.
|
|
17
|
+
expect(describeToolUse("Bash", { command: "grep -r foo ." })).toBe("Running a command");
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it("Read/Edit/Write render the file basename, not the full path", () => {
|
|
21
|
+
expect(describeToolUse("Read", { file_path: "/home/ken/code/switchroom/gateway.ts" })).toBe(
|
|
22
|
+
"Reading gateway.ts",
|
|
23
|
+
);
|
|
24
|
+
expect(describeToolUse("Edit", { file_path: "/a/b/CLAUDE.md" })).toBe("Editing CLAUDE.md");
|
|
25
|
+
expect(describeToolUse("Write", { file_path: "notes.txt" })).toBe("Writing notes.txt");
|
|
26
|
+
expect(describeToolUse("Read", {})).toBe("Reading a file");
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
it("Grep/Glob show the pattern; WebFetch shows the hostname", () => {
|
|
30
|
+
expect(describeToolUse("Grep", { pattern: "TODO" })).toBe("Searching for TODO");
|
|
31
|
+
expect(describeToolUse("WebFetch", { url: "https://www.example.com/path?q=1" })).toBe(
|
|
32
|
+
"Reading example.com",
|
|
33
|
+
);
|
|
34
|
+
expect(describeToolUse("WebSearch", { query: "best running shoes" })).toBe(
|
|
35
|
+
"Searching the web for best running shoes",
|
|
36
|
+
);
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
it("Task/Agent surface the sub-agent task description", () => {
|
|
40
|
+
expect(describeToolUse("Task", { description: "Review the migration" })).toBe(
|
|
41
|
+
"Delegating: Review the migration",
|
|
42
|
+
);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it("domain MCP tools render human-meaningful labels (no jargon)", () => {
|
|
46
|
+
expect(describeToolUse("mcp__hindsight__reflect", { query: "x" })).toBe("Searching memory");
|
|
47
|
+
expect(describeToolUse("mcp__hindsight__retain", {})).toBe("Saving to memory");
|
|
48
|
+
expect(describeToolUse("mcp__claude_ai_Google_Calendar__list_events", {})).toBe(
|
|
49
|
+
"Checking your calendar",
|
|
50
|
+
);
|
|
51
|
+
expect(describeToolUse("mcp__claude_ai_Gmail__search", {})).toBe("Checking your email");
|
|
52
|
+
expect(describeToolUse("mcp__claude_ai_Google_Drive__search_files", {})).toBe(
|
|
53
|
+
"Looking through your files",
|
|
54
|
+
);
|
|
55
|
+
expect(describeToolUse("mcp__claude_ai_Notion__notion-search", {})).toBe("Checking your notes");
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
it("surface tools (reply/stream_reply) return null — never mirrored", () => {
|
|
59
|
+
expect(describeToolUse("mcp__switchroom-telegram__reply", { text: "hi" })).toBeNull();
|
|
60
|
+
expect(describeToolUse("mcp__switchroom-telegram__stream_reply", {})).toBeNull();
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("unknown MCP tool prefers a model-authored field, else humanizes the name", () => {
|
|
64
|
+
expect(describeToolUse("mcp__acme__do_thing", { description: "Fetched the report" })).toBe(
|
|
65
|
+
"Fetched the report",
|
|
66
|
+
);
|
|
67
|
+
expect(describeToolUse("mcp__acme__do_thing", {})).toBe("Using do thing");
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("unknown built-in falls back to a generic working line, never raw syntax", () => {
|
|
71
|
+
expect(describeToolUse("SomeFutureTool", {})).toBe("Working…");
|
|
72
|
+
expect(describeToolUse("", {})).toBeNull();
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
10
76
|
describe("verbForTool — tool name → past-tense verb", () => {
|
|
11
77
|
it("maps standard CLI tools to readable verbs", () => {
|
|
12
78
|
expect(verbForTool("Read")).toBe("read");
|
|
@@ -198,3 +198,140 @@ export function registerAndRender(
|
|
|
198
198
|
if (!changed) return null;
|
|
199
199
|
return formatSummary(state);
|
|
200
200
|
}
|
|
201
|
+
|
|
202
|
+
// ─── Friendly per-tool rendering (draft-mirror, RFC draft-mirror-preview) ───
|
|
203
|
+
//
|
|
204
|
+
// Claude Code's own UI reads human-friendly because the model AUTHORS the
|
|
205
|
+
// descriptive text inside each tool_use.input — verified against a real
|
|
206
|
+
// session JSONL (1360 Bash calls etc.):
|
|
207
|
+
// Bash → input.description ("Get CLAUDE.md size and recent history")
|
|
208
|
+
// Read → input.file_path (basename → "Reading CLAUDE.md")
|
|
209
|
+
// Edit/Write → input.file_path (basename)
|
|
210
|
+
// Grep/Glob → input.pattern
|
|
211
|
+
// Task/Agent → input.description (the sub-agent's task)
|
|
212
|
+
// WebFetch → input.url (hostname → "Reading example.com")
|
|
213
|
+
// hindsight → friendly label ("Searching memory")
|
|
214
|
+
// There is never a raw `grep`/`jq`/`ls` to surface — only the model's own
|
|
215
|
+
// plain-English description or a domain label. This is the signal the
|
|
216
|
+
// draft-mirror renders (option A: uniform across code + non-code agents).
|
|
217
|
+
|
|
218
|
+
/** Strip a path to its basename for display. */
|
|
219
|
+
function baseName(p: unknown): string | null {
|
|
220
|
+
if (typeof p !== "string" || p.length === 0) return null;
|
|
221
|
+
const parts = p.split("/").filter(Boolean);
|
|
222
|
+
return parts.length > 0 ? parts[parts.length - 1] : p;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/** Extract a bare hostname from a URL for display (no scheme/path). */
|
|
226
|
+
function hostName(u: unknown): string | null {
|
|
227
|
+
if (typeof u !== "string" || u.length === 0) return null;
|
|
228
|
+
try {
|
|
229
|
+
return new URL(u).hostname.replace(/^www\./, "");
|
|
230
|
+
} catch {
|
|
231
|
+
return u.replace(/^https?:\/\//, "").split("/")[0] || null;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function clip(s: unknown, n: number): string | null {
|
|
236
|
+
if (typeof s !== "string") return null;
|
|
237
|
+
const t = s.trim();
|
|
238
|
+
if (t.length === 0) return null;
|
|
239
|
+
return t.length > n ? t.slice(0, n - 1) + "…" : t;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Render a single tool_use into a human-friendly, present-tense activity
|
|
244
|
+
* line for the live draft preview — or null when the tool should NOT be
|
|
245
|
+
* surfaced (the Telegram-plugin surface tools, which ARE the conversation).
|
|
246
|
+
*
|
|
247
|
+
* Leads with the model-authored descriptive field per the map above; falls
|
|
248
|
+
* back to a domain label, then to a humanized tool name. Never emits raw
|
|
249
|
+
* shell/query syntax.
|
|
250
|
+
*/
|
|
251
|
+
export function describeToolUse(
|
|
252
|
+
toolName: string,
|
|
253
|
+
input: Record<string, unknown> | undefined,
|
|
254
|
+
): string | null {
|
|
255
|
+
if (!toolName) return null;
|
|
256
|
+
const inp = input ?? {};
|
|
257
|
+
|
|
258
|
+
const mcpMatch = /^mcp__(.+?)__(.+)$/.exec(toolName);
|
|
259
|
+
if (mcpMatch) {
|
|
260
|
+
const server = mcpMatch[1].toLowerCase();
|
|
261
|
+
const tool = mcpMatch[2].toLowerCase();
|
|
262
|
+
// Surface tools ARE the conversation — never mirror them.
|
|
263
|
+
if (server === "switchroom-telegram") return null;
|
|
264
|
+
if (server === "hindsight") {
|
|
265
|
+
if (tool === "recall" || tool === "reflect") return "Searching memory";
|
|
266
|
+
if (tool === "retain" || tool === "update_memory" || tool === "sync_retain")
|
|
267
|
+
return "Saving to memory";
|
|
268
|
+
return "Working with memory";
|
|
269
|
+
}
|
|
270
|
+
if (
|
|
271
|
+
server === "google-workspace" ||
|
|
272
|
+
server === "claude_ai_google_calendar"
|
|
273
|
+
) {
|
|
274
|
+
return "Checking your calendar";
|
|
275
|
+
}
|
|
276
|
+
if (server === "claude_ai_gmail") return "Checking your email";
|
|
277
|
+
if (server === "claude_ai_google_drive") return "Looking through your files";
|
|
278
|
+
if (server === "notion" || server === "claude_ai_notion") {
|
|
279
|
+
return "Checking your notes";
|
|
280
|
+
}
|
|
281
|
+
// Unknown MCP tool: prefer a model-authored field, else a humanized name.
|
|
282
|
+
const desc = clip(inp.description, 60) ?? clip(inp.query, 50) ?? clip(inp.title, 50);
|
|
283
|
+
if (desc) return desc;
|
|
284
|
+
return "Using " + tool.replace(/[-_]+/g, " ");
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
switch (toolName) {
|
|
288
|
+
case "Bash": {
|
|
289
|
+
// The model writes a plain-English description for every command.
|
|
290
|
+
return clip(inp.description, 70) ?? "Running a command";
|
|
291
|
+
}
|
|
292
|
+
case "BashOutput":
|
|
293
|
+
case "KillShell":
|
|
294
|
+
return "Managing a background command";
|
|
295
|
+
case "Read": {
|
|
296
|
+
const f = baseName(inp.file_path);
|
|
297
|
+
return f ? `Reading ${f}` : "Reading a file";
|
|
298
|
+
}
|
|
299
|
+
case "Edit":
|
|
300
|
+
case "MultiEdit":
|
|
301
|
+
case "NotebookEdit": {
|
|
302
|
+
const f = baseName(inp.file_path) ?? baseName(inp.notebook_path);
|
|
303
|
+
return f ? `Editing ${f}` : "Editing a file";
|
|
304
|
+
}
|
|
305
|
+
case "Write": {
|
|
306
|
+
const f = baseName(inp.file_path);
|
|
307
|
+
return f ? `Writing ${f}` : "Writing a file";
|
|
308
|
+
}
|
|
309
|
+
case "Grep":
|
|
310
|
+
case "Glob": {
|
|
311
|
+
const p = clip(inp.pattern, 40);
|
|
312
|
+
return p ? `Searching for ${p}` : "Searching files";
|
|
313
|
+
}
|
|
314
|
+
case "WebFetch": {
|
|
315
|
+
const h = hostName(inp.url);
|
|
316
|
+
return h ? `Reading ${h}` : "Reading a web page";
|
|
317
|
+
}
|
|
318
|
+
case "WebSearch": {
|
|
319
|
+
const q = clip(inp.query, 50);
|
|
320
|
+
return q ? `Searching the web for ${q}` : "Searching the web";
|
|
321
|
+
}
|
|
322
|
+
case "Task":
|
|
323
|
+
case "Agent": {
|
|
324
|
+
const d = clip(inp.description, 60);
|
|
325
|
+
return d ? `Delegating: ${d}` : "Delegating to a sub-agent";
|
|
326
|
+
}
|
|
327
|
+
case "TodoWrite":
|
|
328
|
+
case "TaskCreate":
|
|
329
|
+
case "TaskUpdate":
|
|
330
|
+
case "TaskList":
|
|
331
|
+
return "Updating the plan";
|
|
332
|
+
case "ToolSearch":
|
|
333
|
+
return "Finding the right tool";
|
|
334
|
+
default:
|
|
335
|
+
return "Working…";
|
|
336
|
+
}
|
|
337
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD scenario — the agent fetches the web via webkite, transparently.
|
|
3
|
+
*
|
|
4
|
+
* Validates the v0.13.62/63 webkite rollout end-to-end through real
|
|
5
|
+
* Telegram: the user sends a URL and asks about its content WITHOUT
|
|
6
|
+
* ever naming "webkite". The agent must:
|
|
7
|
+
*
|
|
8
|
+
* 1. Reach for webkite on its own (the native WebFetch/WebSearch
|
|
9
|
+
* tools are denied fleet-wide — see scaffold.ts
|
|
10
|
+
* WEBKITE_FLEET_DENY_TOOLS — so the ONLY way the agent can answer
|
|
11
|
+
* a "read this URL" prompt is via the webkite_* MCP tools). If the
|
|
12
|
+
* agent returns the page's content, webkite did the work by
|
|
13
|
+
* construction — there is no other web-fetch tool available.
|
|
14
|
+
*
|
|
15
|
+
* 2. Render JavaScript. The target is `quotes.toscrape.com/js/`, a
|
|
16
|
+
* purpose-built scraping-practice SPA whose quotes are injected by
|
|
17
|
+
* JS at runtime. A raw HTTP fetch (what the old WebFetch did) sees
|
|
18
|
+
* an empty page — `curl` returns zero `class="quote"` nodes. Only
|
|
19
|
+
* a JS-executing renderer (webkite → cloakbrowser headless
|
|
20
|
+
* Chromium) produces the visible quote text. So a correct quote in
|
|
21
|
+
* the reply is positive proof that JS rendering happened.
|
|
22
|
+
*
|
|
23
|
+
* The first quote on that page is Einstein's "The world as we have
|
|
24
|
+
* created it is a process of our thinking…". We assert the reply names
|
|
25
|
+
* Einstein AND carries a recognizable fragment of that quote.
|
|
26
|
+
*
|
|
27
|
+
* ## What this catches that other UATs don't
|
|
28
|
+
*
|
|
29
|
+
* - `jtbd-fast-trivial-dm` proves the agent replies fast, but never
|
|
30
|
+
* touches a tool. This is the first UAT that forces a real web fetch.
|
|
31
|
+
* - The in-container `webkite read` smoke proves the binary works, but
|
|
32
|
+
* not that the *model* chooses webkite unprompted over a denied
|
|
33
|
+
* WebFetch, nor that the full inbound→claude→MCP→outbound path works.
|
|
34
|
+
*
|
|
35
|
+
* ## Failure modes this guards against
|
|
36
|
+
*
|
|
37
|
+
* - A regression that re-enables WebFetch (the model might fetch raw
|
|
38
|
+
* HTML and miss the JS-rendered quotes → wrong/empty answer).
|
|
39
|
+
* - webkite MCP not wired / not trusted (agent says it can't browse).
|
|
40
|
+
* - cloakbrowser broken (agent returns the empty static page → no
|
|
41
|
+
* quote, or a "page had no content" apology).
|
|
42
|
+
* - The glibc regression that the v0.13.62 canary caught (webkite
|
|
43
|
+
* dead-on-arrival → agent can't browse at all).
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
import { describe, it, expect } from "vitest";
|
|
47
|
+
import { spinUp } from "../harness.js";
|
|
48
|
+
|
|
49
|
+
const AGENT = "test-harness";
|
|
50
|
+
|
|
51
|
+
// JS-rendered scraping-practice page. Quotes exist ONLY after JS runs;
|
|
52
|
+
// a raw fetch sees none. Stable, purpose-built, no auth.
|
|
53
|
+
const JS_URL = "https://quotes.toscrape.com/js/";
|
|
54
|
+
|
|
55
|
+
// Deliberately does NOT mention webkite, fetch, browser, or any tool —
|
|
56
|
+
// a natural "read this for me" ask. The agent must pick the tool.
|
|
57
|
+
const PROMPT =
|
|
58
|
+
`Open ${JS_URL} and tell me the exact text of the very first quote ` +
|
|
59
|
+
`on the page and who said it. Just the quote and the author.`;
|
|
60
|
+
|
|
61
|
+
// The first quote's author + a distinctive fragment of its text.
|
|
62
|
+
const EXPECTED_AUTHOR = /einstein/i;
|
|
63
|
+
const EXPECTED_FRAGMENT =
|
|
64
|
+
/world as we have created it|process of our thinking|changing our thinking/i;
|
|
65
|
+
|
|
66
|
+
// Phrases that would indicate the agent FAILED to browse (fell back to
|
|
67
|
+
// "I can't access the web" or got the empty static page).
|
|
68
|
+
const CANT_BROWSE = [
|
|
69
|
+
/can.?t (access|browse|open|reach|fetch)/i,
|
|
70
|
+
/unable to (access|browse|open|reach|fetch)/i,
|
|
71
|
+
/no content|empty page|couldn.?t (find|load)/i,
|
|
72
|
+
/don.?t have (web|internet|browsing)/i,
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
describe("uat: agent fetches the web via webkite (JS page, unprompted)", () => {
|
|
76
|
+
it(
|
|
77
|
+
"URL prompt → agent returns JS-rendered content (proves webkite + cloakbrowser)",
|
|
78
|
+
async () => {
|
|
79
|
+
const sc = await spinUp({ agent: AGENT });
|
|
80
|
+
try {
|
|
81
|
+
await sc.sendDM(PROMPT);
|
|
82
|
+
|
|
83
|
+
// Generous budget: a real cloakbrowser render of an SPA is
|
|
84
|
+
// slower than a trivial reply (Chromium spawn + JS execution).
|
|
85
|
+
const reply = await sc.expectMessage(EXPECTED_FRAGMENT, {
|
|
86
|
+
from: "bot",
|
|
87
|
+
timeout: 90_000,
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// Positive proof: the JS-gated quote text came back.
|
|
91
|
+
expect(reply.text).toMatch(EXPECTED_FRAGMENT);
|
|
92
|
+
// And the author — confirms it parsed the actual quote, not noise.
|
|
93
|
+
expect(reply.text).toMatch(EXPECTED_AUTHOR);
|
|
94
|
+
|
|
95
|
+
// Negative proof: no "I can't browse" fallback. (WebFetch is
|
|
96
|
+
// denied, so a failure to use webkite surfaces as an apology,
|
|
97
|
+
// not a wrong fetch.)
|
|
98
|
+
const failedToBrowse = CANT_BROWSE.some((re) => re.test(reply.text));
|
|
99
|
+
expect(
|
|
100
|
+
failedToBrowse,
|
|
101
|
+
`agent reply looks like a can't-browse fallback: ${JSON.stringify(reply.text.slice(0, 300))}`,
|
|
102
|
+
).toBe(false);
|
|
103
|
+
|
|
104
|
+
console.log(
|
|
105
|
+
`[webkite-read] agent returned JS-rendered quote via webkite — ` +
|
|
106
|
+
`WebFetch denied, cloakbrowser rendered the SPA. ` +
|
|
107
|
+
`reply: ${JSON.stringify(reply.text.slice(0, 200))}`,
|
|
108
|
+
);
|
|
109
|
+
} finally {
|
|
110
|
+
await sc.tearDown();
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
120_000,
|
|
114
|
+
);
|
|
115
|
+
});
|