@yuzc-001/grasp 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +327 -0
  3. package/README.zh-CN.md +324 -0
  4. package/examples/README.md +31 -0
  5. package/examples/claude-desktop.json +8 -0
  6. package/examples/codex-config.toml +4 -0
  7. package/grasp.skill +0 -0
  8. package/index.js +87 -0
  9. package/package.json +48 -0
  10. package/scripts/grasp_openclaw_ctl.sh +122 -0
  11. package/scripts/run-search-benchmark.mjs +287 -0
  12. package/scripts/update-star-history.mjs +274 -0
  13. package/skill/SKILL.md +61 -0
  14. package/skill/references/tools.md +306 -0
  15. package/src/cli/auto-configure.js +116 -0
  16. package/src/cli/cmd-connect.js +148 -0
  17. package/src/cli/cmd-explain.js +42 -0
  18. package/src/cli/cmd-logs.js +55 -0
  19. package/src/cli/cmd-status.js +119 -0
  20. package/src/cli/config.js +27 -0
  21. package/src/cli/detect-chrome.js +58 -0
  22. package/src/grasp/handoff/events.js +67 -0
  23. package/src/grasp/handoff/persist.js +48 -0
  24. package/src/grasp/handoff/state.js +28 -0
  25. package/src/grasp/page/capture.js +34 -0
  26. package/src/grasp/page/state.js +273 -0
  27. package/src/grasp/verify/evidence.js +40 -0
  28. package/src/grasp/verify/pipeline.js +52 -0
  29. package/src/layer1-bridge/chrome.js +416 -0
  30. package/src/layer1-bridge/webmcp.js +143 -0
  31. package/src/layer2-perception/hints.js +284 -0
  32. package/src/layer3-action/actions.js +400 -0
  33. package/src/runtime/browser-instance.js +65 -0
  34. package/src/runtime/truth/model.js +94 -0
  35. package/src/runtime/truth/snapshot.js +51 -0
  36. package/src/server/affordances.js +47 -0
  37. package/src/server/audit.js +122 -0
  38. package/src/server/boss-fast-path.js +164 -0
  39. package/src/server/boundary-guard.js +53 -0
  40. package/src/server/content.js +97 -0
  41. package/src/server/continuity.js +256 -0
  42. package/src/server/engine-selection.js +29 -0
  43. package/src/server/entry-orchestrator.js +115 -0
  44. package/src/server/error-codes.js +7 -0
  45. package/src/server/explain-share-card.js +113 -0
  46. package/src/server/fast-path-router.js +134 -0
  47. package/src/server/form-runtime.js +602 -0
  48. package/src/server/form-tasks.js +254 -0
  49. package/src/server/gateway-response.js +62 -0
  50. package/src/server/index.js +22 -0
  51. package/src/server/observe.js +52 -0
  52. package/src/server/page-projection.js +31 -0
  53. package/src/server/page-state.js +27 -0
  54. package/src/server/postconditions.js +128 -0
  55. package/src/server/prompt-assembly.js +148 -0
  56. package/src/server/responses.js +44 -0
  57. package/src/server/route-boundary.js +174 -0
  58. package/src/server/route-policy.js +168 -0
  59. package/src/server/runtime-confirmation.js +87 -0
  60. package/src/server/runtime-status.js +7 -0
  61. package/src/server/share-artifacts.js +284 -0
  62. package/src/server/state.js +132 -0
  63. package/src/server/structured-extraction.js +131 -0
  64. package/src/server/surface-prompts.js +166 -0
  65. package/src/server/task-frame.js +11 -0
  66. package/src/server/tasks/search-task.js +321 -0
  67. package/src/server/tools.actions.js +1361 -0
  68. package/src/server/tools.form.js +526 -0
  69. package/src/server/tools.gateway.js +757 -0
  70. package/src/server/tools.handoff.js +210 -0
  71. package/src/server/tools.js +20 -0
  72. package/src/server/tools.legacy.js +983 -0
  73. package/src/server/tools.strategy.js +250 -0
  74. package/src/server/tools.task-surface.js +66 -0
  75. package/src/server/tools.workspace.js +873 -0
  76. package/src/server/workspace-runtime.js +1138 -0
  77. package/src/server/workspace-tasks.js +735 -0
  78. package/start-chrome.bat +84 -0
@@ -0,0 +1,31 @@
1
+ ## Example Client Configs
2
+
3
+ Use these examples when you want to connect an AI client to the local Grasp runtime:
4
+
5
+ - `claude-desktop.json` for Claude Desktop / Cursor style JSON MCP config
6
+ - `codex-config.toml` for Codex CLI TOML MCP config
7
+
8
+ All examples point to the same local runtime entry:
9
+
10
+ ```text
11
+ command = npx
12
+ args = -y grasp
13
+ ```
14
+
15
+ Set up the runtime first with:
16
+
17
+ ```bash
18
+ npx -y @yuzc-001/grasp
19
+ ```
20
+
21
+ ## Hero Demo Intent Mapping
22
+
23
+ These examples are not only config snippets. They map to the current Route by Evidence live smoke routes:
24
+
25
+ - public URL (`https://example.com/`) -> `public_read`
26
+ - public form (`https://httpbin.org/forms/post`) -> `form_runtime`
27
+ - logged-in task page (`https://mp.weixin.qq.com/`) -> `live_session`
28
+ - authenticated workspace (`https://mp.weixin.qq.com/cgi-bin/message?...`) -> `workspace_runtime`
29
+ - blocked challenge page (`https://www.scrapingcourse.com/cloudflare-challenge`) -> `handoff`, then `resume_after_handoff`
30
+
31
+ The demo goal is not “show more tools.” It is “show that one URL gets one best path first.”
@@ -0,0 +1,8 @@
1
+ {
2
+ "mcpServers": {
3
+ "grasp": {
4
+ "command": "npx",
5
+ "args": ["-y", "@yuzc-001/grasp"]
6
+ }
7
+ }
8
+ }
@@ -0,0 +1,4 @@
1
+ [mcp_servers.grasp]
2
+ type = "stdio"
3
+ command = "npx"
4
+ args = ["-y", "@yuzc-001/grasp"]
package/grasp.skill ADDED
Binary file
package/index.js ADDED
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Grasp CLI entry point
4
+ *
5
+ * grasp — start MCP server (for Claude Desktop / Cursor)
6
+ * grasp status — show Chrome connection status
7
+ * grasp logs — show audit log (--lines N, --follow)
8
+ * grasp --version — print version
9
+ * grasp --help — print help
10
+ */
11
+
12
+ import { pathToFileURL } from 'node:url';
13
+
14
+ export async function main(argv = process.argv.slice(2)) {
15
+ const [cmd, ...rest] = argv;
16
+
17
+ if (cmd === 'connect' || cmd === undefined) {
18
+ // 'connect' = explicit setup wizard
19
+ // no args = also run connect when called by human (not piped to MCP)
20
+ const isMcpMode = !process.stdin.isTTY && cmd === undefined;
21
+ if (isMcpMode) {
22
+ // stdin is a pipe — AI client is calling us, start MCP server
23
+ const { StdioServerTransport } = await import('@modelcontextprotocol/sdk/server/stdio.js');
24
+ const { createGraspServer, SERVER_INFO } = await import('./src/server/index.js');
25
+ try {
26
+ const { server } = createGraspServer();
27
+ const transport = new StdioServerTransport();
28
+ await server.connect(transport);
29
+ console.error(`[Grasp] MCP Server v${SERVER_INFO.version} started.`);
30
+ } catch (err) {
31
+ console.error(`[Grasp] Failed to start MCP server: ${err.message}`);
32
+ process.exit(1);
33
+ }
34
+ } else {
35
+ const { runConnect } = await import('./src/cli/cmd-connect.js');
36
+ await runConnect();
37
+ }
38
+ } else if (cmd === 'status') {
39
+ const { runStatus } = await import('./src/cli/cmd-status.js');
40
+ await runStatus();
41
+ } else if (cmd === 'logs') {
42
+ const { runLogs } = await import('./src/cli/cmd-logs.js');
43
+ await runLogs(rest);
44
+ } else if (cmd === 'explain') {
45
+ const { runExplain } = await import('./src/cli/cmd-explain.js');
46
+ await runExplain();
47
+ } else if (cmd === '--version' || cmd === '-v') {
48
+ const { SERVER_INFO } = await import('./src/server/index.js');
49
+ console.log(SERVER_INFO.version);
50
+ } else if (cmd === '--help' || cmd === '-h') {
51
+ printHelp();
52
+ }
53
+ }
54
+
55
+ export function renderHelpText() {
56
+ return `
57
+ Grasp — route-aware Agent Web Runtime
58
+ Connect Chrome once. Let agents work inside a visible browser runtime, extract structured results, and resume real pages.
59
+
60
+ Usage:
61
+ grasp Bootstrap the runtime and connect Chrome for first use
62
+ grasp connect Same as above
63
+ grasp status Show runtime state, current page, and recent activity
64
+ grasp logs Show recent audit log
65
+ --lines N Number of lines to show (default: 50)
66
+ --follow, -f Stream new entries in real-time
67
+ grasp explain Explain the latest route decision
68
+ grasp --version Print version
69
+ grasp --help Print this help
70
+
71
+ First runtime steps:
72
+ 1. npx -y @yuzc-001/grasp Bootstrap the runtime and connect your AI client
73
+ 2. Open any real page Keep using the dedicated chrome-grasp profile
74
+ This runtime profile is separate from arbitrary browser windows you may already have open
75
+ 3. Ask your AI Call get_status / entry(url, intent) / inspect / extract or continue / explain_route
76
+ Use extract_structured(fields=[...]) or extract_batch(urls=[...], fields=[...]) for structured exports
77
+ Use share_page(format="markdown" | "screenshot" | "pdf") when the result needs a shareable artifact
78
+ `;
79
+ }
80
+
81
+ export function printHelp() {
82
+ console.log(renderHelpText());
83
+ }
84
+
85
+ if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
86
+ await main();
87
+ }
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "@yuzc-001/grasp",
3
+ "version": "0.6.6",
4
+ "description": "Visible AI browser runtime with persistent sessions, verified actions, structured extraction, and recoverable handoff.",
5
+ "main": "index.js",
6
+ "bin": {
7
+ "grasp": "./index.js"
8
+ },
9
+ "files": [
10
+ "index.js",
11
+ "src",
12
+ "examples",
13
+ "scripts",
14
+ "skill",
15
+ "start-chrome.bat",
16
+ "grasp.skill"
17
+ ],
18
+ "type": "module",
19
+ "scripts": {
20
+ "start": "node index.js",
21
+ "dev": "node --watch index.js",
22
+ "test": "node --test",
23
+ "test:watch": "node --test --watch"
24
+ },
25
+ "dependencies": {
26
+ "@chenglou/pretext": "^0.0.3",
27
+ "@modelcontextprotocol/sdk": "^1.8.0",
28
+ "playwright-core": "^1.58.2",
29
+ "zod": "^3.25.76"
30
+ },
31
+ "keywords": [
32
+ "mcp",
33
+ "route-aware-agent-web-runtime",
34
+ "browser-runtime-for-agents",
35
+ "browser-runtime",
36
+ "session-continuity",
37
+ "agent-browser"
38
+ ],
39
+ "repository": {
40
+ "type": "git",
41
+ "url": "https://github.com/Yuzc-001/grasp.git"
42
+ },
43
+ "homepage": "https://github.com/Yuzc-001/grasp#readme",
44
+ "bugs": {
45
+ "url": "https://github.com/Yuzc-001/grasp/issues"
46
+ },
47
+ "license": "MIT"
48
+ }
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
5
+ RUNTIME_DIR="${GRASP_RUNTIME_DIR:-$ROOT_DIR/.runtime/openclaw}"
6
+ PROFILE_DIR="${GRASP_PROFILE_DIR:-/root/snap/chromium/common/grasp-openclaw-profile}"
7
+ LOG_DIR="$RUNTIME_DIR/logs"
8
+ PID_DIR="$RUNTIME_DIR/pids"
9
+ CHROME_LOG="$LOG_DIR/chromium.log"
10
+ GRASP_LOG="$LOG_DIR/grasp.log"
11
+ CHROME_PID_FILE="$PID_DIR/chromium.pid"
12
+ GRASP_PID_FILE="$PID_DIR/grasp.pid"
13
+ CHROME_BIN="${CHROME_BIN:-/usr/bin/chromium-browser}"
14
+ CDP_URL="${CHROME_CDP_URL:-http://127.0.0.1:9222}"
15
+ CDP_PORT="${CHROME_CDP_PORT:-9222}"
16
+
17
+ mkdir -p "$PROFILE_DIR" "$LOG_DIR" "$PID_DIR"
18
+
19
+ is_pid_running() {
20
+ local file="$1"
21
+ [[ -f "$file" ]] || return 1
22
+ local pid
23
+ pid="$(cat "$file" 2>/dev/null || true)"
24
+ [[ -n "$pid" ]] || return 1
25
+ kill -0 "$pid" 2>/dev/null
26
+ }
27
+
28
+ start_chrome() {
29
+ if is_pid_running "$CHROME_PID_FILE"; then
30
+ return 0
31
+ fi
32
+ nohup "$CHROME_BIN" \
33
+ --headless=new \
34
+ --no-sandbox \
35
+ --disable-dev-shm-usage \
36
+ --remote-debugging-address=127.0.0.1 \
37
+ --remote-debugging-port="$CDP_PORT" \
38
+ --user-data-dir="$PROFILE_DIR" \
39
+ about:blank \
40
+ >"$CHROME_LOG" 2>&1 &
41
+ echo $! > "$CHROME_PID_FILE"
42
+ }
43
+
44
+ wait_for_cdp() {
45
+ for _ in $(seq 1 20); do
46
+ if curl -fsS "$CDP_URL/json/version" >/dev/null 2>&1; then
47
+ return 0
48
+ fi
49
+ sleep 1
50
+ done
51
+ return 1
52
+ }
53
+
54
+ start_grasp_probe() {
55
+ if is_pid_running "$GRASP_PID_FILE"; then
56
+ return 0
57
+ fi
58
+ nohup bash -lc "cd '$ROOT_DIR' && node index.js status" >"$GRASP_LOG" 2>&1 &
59
+ echo $! > "$GRASP_PID_FILE"
60
+ }
61
+
62
+ cmd_start() {
63
+ start_chrome
64
+ if ! wait_for_cdp; then
65
+ echo "CDP_UNREACHABLE"
66
+ exit 1
67
+ fi
68
+ start_grasp_probe
69
+ echo "started"
70
+ }
71
+
72
+ cmd_status() {
73
+ echo "runtime_dir=$RUNTIME_DIR"
74
+ echo "profile_dir=$PROFILE_DIR"
75
+ echo "cdp_url=$CDP_URL"
76
+ if is_pid_running "$CHROME_PID_FILE"; then
77
+ echo "chromium=running"
78
+ else
79
+ echo "chromium=stopped"
80
+ fi
81
+ if curl -fsS "$CDP_URL/json/version" >/dev/null 2>&1; then
82
+ echo "cdp=connected"
83
+ else
84
+ echo "cdp=disconnected"
85
+ fi
86
+ if is_pid_running "$GRASP_PID_FILE"; then
87
+ echo "grasp_probe=running"
88
+ else
89
+ echo "grasp_probe=stopped"
90
+ fi
91
+ if [[ -f "$GRASP_LOG" ]]; then
92
+ echo "--- grasp_status ---"
93
+ tail -n 20 "$GRASP_LOG" || true
94
+ fi
95
+ }
96
+
97
+ cmd_logs() {
98
+ echo "--- chromium.log ---"
99
+ tail -n 60 "$CHROME_LOG" 2>/dev/null || true
100
+ echo
101
+ echo "--- grasp.log ---"
102
+ tail -n 60 "$GRASP_LOG" 2>/dev/null || true
103
+ }
104
+
105
+ cmd_stop() {
106
+ if is_pid_running "$GRASP_PID_FILE"; then
107
+ kill "$(cat "$GRASP_PID_FILE")" 2>/dev/null || true
108
+ fi
109
+ if is_pid_running "$CHROME_PID_FILE"; then
110
+ kill "$(cat "$CHROME_PID_FILE")" 2>/dev/null || true
111
+ fi
112
+ rm -f "$GRASP_PID_FILE" "$CHROME_PID_FILE"
113
+ echo "stopped"
114
+ }
115
+
116
+ case "${1:-status}" in
117
+ start) cmd_start ;;
118
+ status) cmd_status ;;
119
+ logs) cmd_logs ;;
120
+ stop) cmd_stop ;;
121
+ *) echo "usage: $0 {start|status|logs|stop}" >&2; exit 2 ;;
122
+ esac
@@ -0,0 +1,287 @@
1
+ import path from 'node:path';
2
+ import { fileURLToPath } from 'node:url';
3
+ import { runSearchTaskTool } from '../src/server/tasks/search-task.js';
4
+ import { createServerState } from '../src/server/state.js';
5
+
6
+ function mean(items) {
7
+ if (items.length === 0) return 0;
8
+ return items.reduce((sum, value) => sum + value, 0) / items.length;
9
+ }
10
+
11
+ export function summarizeSearchBenchmark(results = [], options = {}) {
12
+ const suite = options.suite ?? 'search-task';
13
+ const total = results.length;
14
+ const successCount = results.filter((r) => r.status === 'completed').length;
15
+ const successRate = total === 0 ? 0 : successCount / total;
16
+ const avgToolCalls = mean(results.map((r) => r.toolCalls ?? 0));
17
+ const avgRetries = mean(results.map((r) => r.retries ?? 0));
18
+ const totalRetryAttempts = results.reduce((sum, result) => sum + (result.retries ?? 0), 0);
19
+ const successfulRetryAttempts = results.reduce((sum, result) => {
20
+ if (result.status === 'completed' && (result.retries ?? 0) > 0) {
21
+ return sum + 1;
22
+ }
23
+ return sum;
24
+ }, 0);
25
+ const recoverySuccessRate = totalRetryAttempts === 0
26
+ ? 1
27
+ : successfulRetryAttempts / totalRetryAttempts;
28
+
29
+ return {
30
+ suite,
31
+ successRate,
32
+ avgToolCalls,
33
+ avgRetries,
34
+ recoverySuccessRate,
35
+ };
36
+ }
37
+
38
+ function createFakeBenchmarkPage({ url = 'https://example.com/', title = 'Benchmark Page' } = {}) {
39
+ return {
40
+ url: () => url,
41
+ title: async () => title,
42
+ evaluate: async () => 'complete',
43
+ };
44
+ }
45
+
46
+ function createSnapshot({
47
+ query = 'pi agent 是啥',
48
+ title = 'Search',
49
+ url = 'https://example.com/search',
50
+ contentText = '',
51
+ domRevision = 0,
52
+ searchInput = { id: 'I1', type: 'textbox', label: 'Search' },
53
+ submitControl = { id: 'B1', type: 'button', label: 'Search' },
54
+ } = {}) {
55
+ const ranking = {
56
+ search_input: searchInput ? [{ ...searchInput }] : [],
57
+ command_button: submitControl ? [{ ...submitControl }] : [],
58
+ };
59
+ const hints = [
60
+ ...(searchInput ? [{ ...searchInput, semantic: 'search_input' }] : []),
61
+ ...(submitControl ? [{ ...submitControl, semantic: 'submit_control' }] : []),
62
+ ];
63
+ return {
64
+ query,
65
+ title,
66
+ url,
67
+ hints,
68
+ ranking,
69
+ content: { text: contentText },
70
+ domRevision,
71
+ submitCandidate: submitControl ? { ...submitControl } : null,
72
+ };
73
+ }
74
+
75
+ async function runToolScenario({
76
+ name,
77
+ description,
78
+ query = 'pi agent 是啥',
79
+ maxAttempts = 3,
80
+ pageUrl,
81
+ pageTitle,
82
+ observer,
83
+ verifier,
84
+ waitThenReverify,
85
+ }) {
86
+ const state = createServerState();
87
+ const page = createFakeBenchmarkPage({ url: pageUrl, title: pageTitle });
88
+ const actionBreakdown = {
89
+ type: 0,
90
+ typeWithEnter: 0,
91
+ click: 0,
92
+ pressKey: 0,
93
+ waitStable: 0,
94
+ };
95
+
96
+ const result = await runSearchTaskTool({
97
+ state,
98
+ query,
99
+ max_attempts: maxAttempts,
100
+ deps: {
101
+ getActivePage: async () => page,
102
+ observer,
103
+ verifier,
104
+ waitThenReverify,
105
+ typeAction: async (_page, _hintId, _text, pressEnter) => {
106
+ actionBreakdown.type += 1;
107
+ if (pressEnter) actionBreakdown.typeWithEnter += 1;
108
+ },
109
+ clickAction: async () => {
110
+ actionBreakdown.click += 1;
111
+ },
112
+ pressKeyAction: async () => {
113
+ actionBreakdown.pressKey += 1;
114
+ },
115
+ waitStableAction: async () => {
116
+ actionBreakdown.waitStable += 1;
117
+ return { stable: true, attempts: 1 };
118
+ },
119
+ extractContentAction: async () => ({ text: 'fixture content' }),
120
+ syncStateAction: async () => undefined,
121
+ },
122
+ });
123
+
124
+ return {
125
+ scenario: name,
126
+ description,
127
+ status: result.status,
128
+ attempts: result.attempts,
129
+ toolCalls: result.toolCalls,
130
+ retries: result.retries,
131
+ recovered: result.recovered,
132
+ actionBreakdown,
133
+ };
134
+ }
135
+
136
+ export function createSearchBenchmarkScenarios() {
137
+ return [
138
+ {
139
+ name: 'grok-question',
140
+ description: 'Grok 搜索提问',
141
+ async run() {
142
+ return runToolScenario({
143
+ name: 'grok-question',
144
+ description: 'Grok 搜索提问',
145
+ pageUrl: 'https://grok.com/',
146
+ pageTitle: 'Grok',
147
+ observer: async () => ({
148
+ snapshot: createSnapshot({
149
+ title: 'Grok',
150
+ url: 'https://grok.com/',
151
+ searchInput: { id: 'I1', type: 'textbox', label: '向 Grok 提问' },
152
+ submitControl: { id: 'B2', type: 'button', label: '发送' },
153
+ }),
154
+ }),
155
+ verifier: async () => ({ ok: true, evidence: { answerStarted: true } }),
156
+ });
157
+ },
158
+ },
159
+ {
160
+ name: 'google-search',
161
+ description: 'Google 搜索',
162
+ async run() {
163
+ return runToolScenario({
164
+ name: 'google-search',
165
+ description: 'Google 搜索',
166
+ pageUrl: 'https://www.google.com/',
167
+ pageTitle: 'Google',
168
+ observer: async () => ({
169
+ snapshot: createSnapshot({
170
+ title: 'Google',
171
+ url: 'https://www.google.com/',
172
+ searchInput: { id: 'I1', type: 'searchbox', label: 'Search Google' },
173
+ submitControl: null,
174
+ }),
175
+ }),
176
+ verifier: async () => ({ ok: true, evidence: { resultsVisible: true } }),
177
+ });
178
+ },
179
+ },
180
+ {
181
+ name: 'overlay-site-search',
182
+ description: '带弹层干扰的站内搜索',
183
+ async run() {
184
+ return runToolScenario({
185
+ name: 'overlay-site-search',
186
+ description: '带弹层干扰的站内搜索',
187
+ pageUrl: 'https://docs.example.com/',
188
+ pageTitle: 'Docs Search',
189
+ observer: async () => ({
190
+ snapshot: createSnapshot({
191
+ title: 'Docs Search',
192
+ url: 'https://docs.example.com/',
193
+ searchInput: { id: 'I3', type: 'combobox', label: '站内搜索' },
194
+ submitControl: { id: 'B4', type: 'button', label: '搜索' },
195
+ }),
196
+ }),
197
+ verifier: (() => {
198
+ let attempts = 0;
199
+ return async ({ plan }) => {
200
+ attempts += 1;
201
+ if (attempts === 1 && plan.mode === 'primary_submit') {
202
+ return { ok: false, error_code: 'NO_EFFECT', evidence: { overlay: true } };
203
+ }
204
+ return { ok: true, evidence: { resultPaneChanged: true } };
205
+ };
206
+ })(),
207
+ });
208
+ },
209
+ },
210
+ {
211
+ name: 'streaming-answer',
212
+ description: '流式回答页面等待稳定',
213
+ async run() {
214
+ let verifyCount = 0;
215
+ return runToolScenario({
216
+ name: 'streaming-answer',
217
+ description: '流式回答页面等待稳定',
218
+ pageUrl: 'https://chat.example.com/',
219
+ pageTitle: 'Streaming Answer',
220
+ observer: async () => ({
221
+ snapshot: createSnapshot({
222
+ title: 'Streaming Answer',
223
+ url: 'https://chat.example.com/',
224
+ searchInput: { id: 'I9', type: 'textbox', label: 'Ask anything' },
225
+ submitControl: { id: 'B9', type: 'button', label: 'Send' },
226
+ }),
227
+ }),
228
+ verifier: async () => {
229
+ verifyCount += 1;
230
+ return verifyCount === 1
231
+ ? { ok: false, error_code: 'LOADING_PENDING', evidence: { streamOpen: true } }
232
+ : { ok: true, evidence: { streamSettled: true } };
233
+ },
234
+ });
235
+ },
236
+ },
237
+ {
238
+ name: 'result-content-extract',
239
+ description: '结果页正文抽取',
240
+ async run() {
241
+ return runToolScenario({
242
+ name: 'result-content-extract',
243
+ description: '结果页正文抽取',
244
+ pageUrl: 'https://example.com/pi-agent',
245
+ pageTitle: 'Pi Agent',
246
+ observer: async () => ({
247
+ snapshot: createSnapshot({
248
+ title: 'Pi Agent',
249
+ url: 'https://example.com/pi-agent',
250
+ contentText: 'Pi Agent is a minimal coding-agent runtime.',
251
+ searchInput: { id: 'I6', type: 'textbox', label: 'Search docs' },
252
+ submitControl: { id: 'B6', type: 'button', label: 'Search docs' },
253
+ }),
254
+ }),
255
+ verifier: async ({ snapshot }) => ({
256
+ ok: true,
257
+ evidence: { extractedText: snapshot.content.text },
258
+ }),
259
+ });
260
+ },
261
+ },
262
+ ];
263
+ }
264
+
265
+ export async function runSearchBenchmark(scenarios = createSearchBenchmarkScenarios(), options = {}) {
266
+ const { silent = false } = options;
267
+ const results = [];
268
+ for (const scenario of scenarios) {
269
+ const result = await scenario.run();
270
+ results.push(result);
271
+ }
272
+ const summary = summarizeSearchBenchmark(results, options);
273
+ const payload = {
274
+ summary,
275
+ results,
276
+ };
277
+ if (!silent) {
278
+ console.log(JSON.stringify(payload, null, 2));
279
+ }
280
+ return payload;
281
+ }
282
+
283
+ const isMain = process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url);
284
+
285
+ if (isMain) {
286
+ runSearchBenchmark();
287
+ }