switchroom 0.13.64 → 0.13.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent-scheduler/index.js +80 -80
- package/dist/auth-broker/index.js +80 -80
- package/dist/cli/drive-write-pretool.mjs +10 -10
- package/dist/cli/notion-write-pretool.mjs +82 -82
- package/dist/cli/skill-validate-pretool.mjs +72 -72
- package/dist/cli/switchroom.js +367 -360
- package/dist/host-control/main.js +148 -148
- package/dist/vault/approvals/kernel-server.js +82 -82
- package/dist/vault/broker/server.js +83 -83
- package/package.json +1 -1
- package/telegram-plugin/dist/bridge/bridge.js +112 -112
- package/telegram-plugin/dist/gateway/gateway.js +191 -191
- package/telegram-plugin/dist/server.js +160 -160
- package/telegram-plugin/uat/scenarios/jtbd-webkite-read-dm.test.ts +115 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JTBD scenario — the agent fetches the web via webkite, transparently.
|
|
3
|
+
*
|
|
4
|
+
* Validates the v0.13.62/63 webkite rollout end-to-end through real
|
|
5
|
+
* Telegram: the user sends a URL and asks about its content WITHOUT
|
|
6
|
+
* ever naming "webkite". The agent must:
|
|
7
|
+
*
|
|
8
|
+
* 1. Reach for webkite on its own (the native WebFetch/WebSearch
|
|
9
|
+
* tools are denied fleet-wide — see scaffold.ts
|
|
10
|
+
* WEBKITE_FLEET_DENY_TOOLS — so the ONLY way the agent can answer
|
|
11
|
+
* a "read this URL" prompt is via the webkite_* MCP tools). If the
|
|
12
|
+
* agent returns the page's content, webkite did the work by
|
|
13
|
+
* construction — there is no other web-fetch tool available.
|
|
14
|
+
*
|
|
15
|
+
* 2. Render JavaScript. The target is `quotes.toscrape.com/js/`, a
|
|
16
|
+
* purpose-built scraping-practice SPA whose quotes are injected by
|
|
17
|
+
* JS at runtime. A raw HTTP fetch (what the old WebFetch did) sees
|
|
18
|
+
* an empty page — `curl` returns zero `class="quote"` nodes. Only
|
|
19
|
+
* a JS-executing renderer (webkite → cloakbrowser headless
|
|
20
|
+
* Chromium) produces the visible quote text. So a correct quote in
|
|
21
|
+
* the reply is positive proof that JS rendering happened.
|
|
22
|
+
*
|
|
23
|
+
* The first quote on that page is Einstein's "The world as we have
|
|
24
|
+
* created it is a process of our thinking…". We assert the reply names
|
|
25
|
+
* Einstein AND carries a recognizable fragment of that quote.
|
|
26
|
+
*
|
|
27
|
+
* ## What this catches that other UATs don't
|
|
28
|
+
*
|
|
29
|
+
* - `jtbd-fast-trivial-dm` proves the agent replies fast, but never
|
|
30
|
+
* touches a tool. This is the first UAT that forces a real web fetch.
|
|
31
|
+
* - The in-container `webkite read` smoke proves the binary works, but
|
|
32
|
+
* not that the *model* chooses webkite unprompted over a denied
|
|
33
|
+
* WebFetch, nor that the full inbound→claude→MCP→outbound path works.
|
|
34
|
+
*
|
|
35
|
+
* ## Failure modes this guards against
|
|
36
|
+
*
|
|
37
|
+
* - A regression that re-enables WebFetch (the model might fetch raw
|
|
38
|
+
* HTML and miss the JS-rendered quotes → wrong/empty answer).
|
|
39
|
+
* - webkite MCP not wired / not trusted (agent says it can't browse).
|
|
40
|
+
* - cloakbrowser broken (agent returns the empty static page → no
|
|
41
|
+
* quote, or a "page had no content" apology).
|
|
42
|
+
* - The glibc regression that the v0.13.62 canary caught (webkite
|
|
43
|
+
* dead-on-arrival → agent can't browse at all).
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
import { describe, it, expect } from "vitest";
|
|
47
|
+
import { spinUp } from "../harness.js";
|
|
48
|
+
|
|
49
|
+
const AGENT = "test-harness";
|
|
50
|
+
|
|
51
|
+
// JS-rendered scraping-practice page. Quotes exist ONLY after JS runs;
|
|
52
|
+
// a raw fetch sees none. Stable, purpose-built, no auth.
|
|
53
|
+
const JS_URL = "https://quotes.toscrape.com/js/";
|
|
54
|
+
|
|
55
|
+
// Deliberately does NOT mention webkite, fetch, browser, or any tool —
|
|
56
|
+
// a natural "read this for me" ask. The agent must pick the tool.
|
|
57
|
+
const PROMPT =
|
|
58
|
+
`Open ${JS_URL} and tell me the exact text of the very first quote ` +
|
|
59
|
+
`on the page and who said it. Just the quote and the author.`;
|
|
60
|
+
|
|
61
|
+
// The first quote's author + a distinctive fragment of its text.
|
|
62
|
+
const EXPECTED_AUTHOR = /einstein/i;
|
|
63
|
+
const EXPECTED_FRAGMENT =
|
|
64
|
+
/world as we have created it|process of our thinking|changing our thinking/i;
|
|
65
|
+
|
|
66
|
+
// Phrases that would indicate the agent FAILED to browse (fell back to
|
|
67
|
+
// "I can't access the web" or got the empty static page).
|
|
68
|
+
const CANT_BROWSE = [
|
|
69
|
+
/can.?t (access|browse|open|reach|fetch)/i,
|
|
70
|
+
/unable to (access|browse|open|reach|fetch)/i,
|
|
71
|
+
/no content|empty page|couldn.?t (find|load)/i,
|
|
72
|
+
/don.?t have (web|internet|browsing)/i,
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
describe("uat: agent fetches the web via webkite (JS page, unprompted)", () => {
|
|
76
|
+
it(
|
|
77
|
+
"URL prompt → agent returns JS-rendered content (proves webkite + cloakbrowser)",
|
|
78
|
+
async () => {
|
|
79
|
+
const sc = await spinUp({ agent: AGENT });
|
|
80
|
+
try {
|
|
81
|
+
await sc.sendDM(PROMPT);
|
|
82
|
+
|
|
83
|
+
// Generous budget: a real cloakbrowser render of an SPA is
|
|
84
|
+
// slower than a trivial reply (Chromium spawn + JS execution).
|
|
85
|
+
const reply = await sc.expectMessage(EXPECTED_FRAGMENT, {
|
|
86
|
+
from: "bot",
|
|
87
|
+
timeout: 90_000,
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// Positive proof: the JS-gated quote text came back.
|
|
91
|
+
expect(reply.text).toMatch(EXPECTED_FRAGMENT);
|
|
92
|
+
// And the author — confirms it parsed the actual quote, not noise.
|
|
93
|
+
expect(reply.text).toMatch(EXPECTED_AUTHOR);
|
|
94
|
+
|
|
95
|
+
// Negative proof: no "I can't browse" fallback. (WebFetch is
|
|
96
|
+
// denied, so a failure to use webkite surfaces as an apology,
|
|
97
|
+
// not a wrong fetch.)
|
|
98
|
+
const failedToBrowse = CANT_BROWSE.some((re) => re.test(reply.text));
|
|
99
|
+
expect(
|
|
100
|
+
failedToBrowse,
|
|
101
|
+
`agent reply looks like a can't-browse fallback: ${JSON.stringify(reply.text.slice(0, 300))}`,
|
|
102
|
+
).toBe(false);
|
|
103
|
+
|
|
104
|
+
console.log(
|
|
105
|
+
`[webkite-read] agent returned JS-rendered quote via webkite — ` +
|
|
106
|
+
`WebFetch denied, cloakbrowser rendered the SPA. ` +
|
|
107
|
+
`reply: ${JSON.stringify(reply.text.slice(0, 200))}`,
|
|
108
|
+
);
|
|
109
|
+
} finally {
|
|
110
|
+
await sc.tearDown();
|
|
111
|
+
}
|
|
112
|
+
},
|
|
113
|
+
120_000,
|
|
114
|
+
);
|
|
115
|
+
});
|