@yuzc-001/grasp 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +327 -0
- package/README.zh-CN.md +324 -0
- package/examples/README.md +31 -0
- package/examples/claude-desktop.json +8 -0
- package/examples/codex-config.toml +4 -0
- package/grasp.skill +0 -0
- package/index.js +87 -0
- package/package.json +48 -0
- package/scripts/grasp_openclaw_ctl.sh +122 -0
- package/scripts/run-search-benchmark.mjs +287 -0
- package/scripts/update-star-history.mjs +274 -0
- package/skill/SKILL.md +61 -0
- package/skill/references/tools.md +306 -0
- package/src/cli/auto-configure.js +116 -0
- package/src/cli/cmd-connect.js +148 -0
- package/src/cli/cmd-explain.js +42 -0
- package/src/cli/cmd-logs.js +55 -0
- package/src/cli/cmd-status.js +119 -0
- package/src/cli/config.js +27 -0
- package/src/cli/detect-chrome.js +58 -0
- package/src/grasp/handoff/events.js +67 -0
- package/src/grasp/handoff/persist.js +48 -0
- package/src/grasp/handoff/state.js +28 -0
- package/src/grasp/page/capture.js +34 -0
- package/src/grasp/page/state.js +273 -0
- package/src/grasp/verify/evidence.js +40 -0
- package/src/grasp/verify/pipeline.js +52 -0
- package/src/layer1-bridge/chrome.js +416 -0
- package/src/layer1-bridge/webmcp.js +143 -0
- package/src/layer2-perception/hints.js +284 -0
- package/src/layer3-action/actions.js +400 -0
- package/src/runtime/browser-instance.js +65 -0
- package/src/runtime/truth/model.js +94 -0
- package/src/runtime/truth/snapshot.js +51 -0
- package/src/server/affordances.js +47 -0
- package/src/server/audit.js +122 -0
- package/src/server/boss-fast-path.js +164 -0
- package/src/server/boundary-guard.js +53 -0
- package/src/server/content.js +97 -0
- package/src/server/continuity.js +256 -0
- package/src/server/engine-selection.js +29 -0
- package/src/server/entry-orchestrator.js +115 -0
- package/src/server/error-codes.js +7 -0
- package/src/server/explain-share-card.js +113 -0
- package/src/server/fast-path-router.js +134 -0
- package/src/server/form-runtime.js +602 -0
- package/src/server/form-tasks.js +254 -0
- package/src/server/gateway-response.js +62 -0
- package/src/server/index.js +22 -0
- package/src/server/observe.js +52 -0
- package/src/server/page-projection.js +31 -0
- package/src/server/page-state.js +27 -0
- package/src/server/postconditions.js +128 -0
- package/src/server/prompt-assembly.js +148 -0
- package/src/server/responses.js +44 -0
- package/src/server/route-boundary.js +174 -0
- package/src/server/route-policy.js +168 -0
- package/src/server/runtime-confirmation.js +87 -0
- package/src/server/runtime-status.js +7 -0
- package/src/server/share-artifacts.js +284 -0
- package/src/server/state.js +132 -0
- package/src/server/structured-extraction.js +131 -0
- package/src/server/surface-prompts.js +166 -0
- package/src/server/task-frame.js +11 -0
- package/src/server/tasks/search-task.js +321 -0
- package/src/server/tools.actions.js +1361 -0
- package/src/server/tools.form.js +526 -0
- package/src/server/tools.gateway.js +757 -0
- package/src/server/tools.handoff.js +210 -0
- package/src/server/tools.js +20 -0
- package/src/server/tools.legacy.js +983 -0
- package/src/server/tools.strategy.js +250 -0
- package/src/server/tools.task-surface.js +66 -0
- package/src/server/tools.workspace.js +873 -0
- package/src/server/workspace-runtime.js +1138 -0
- package/src/server/workspace-tasks.js +735 -0
- package/start-chrome.bat +84 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
## Example Client Configs
|
|
2
|
+
|
|
3
|
+
Use these examples when you want to connect an AI client to the local Grasp runtime:
|
|
4
|
+
|
|
5
|
+
- `claude-desktop.json` for Claude Desktop / Cursor style JSON MCP config
|
|
6
|
+
- `codex-config.toml` for Codex CLI TOML MCP config
|
|
7
|
+
|
|
8
|
+
All examples point to the same local runtime entry:
|
|
9
|
+
|
|
10
|
+
```text
|
|
11
|
+
command = npx
|
|
12
|
+
args = -y grasp
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Set up the runtime first with:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
npx -y @yuzc-001/grasp
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Hero Demo Intent Mapping
|
|
22
|
+
|
|
23
|
+
These examples are not only config snippets. They map to the current Route by Evidence live smoke routes:
|
|
24
|
+
|
|
25
|
+
- public URL (`https://example.com/`) -> `public_read`
|
|
26
|
+
- public form (`https://httpbin.org/forms/post`) -> `form_runtime`
|
|
27
|
+
- logged-in task page (`https://mp.weixin.qq.com/`) -> `live_session`
|
|
28
|
+
- authenticated workspace (`https://mp.weixin.qq.com/cgi-bin/message?...`) -> `workspace_runtime`
|
|
29
|
+
- blocked challenge page (`https://www.scrapingcourse.com/cloudflare-challenge`) -> `handoff`, then `resume_after_handoff`
|
|
30
|
+
|
|
31
|
+
The demo goal is not “show more tools.” It is “show that one URL gets one best path first.”
|
package/grasp.skill
ADDED
|
Binary file
|
package/index.js
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Grasp CLI entry point
|
|
4
|
+
*
|
|
5
|
+
* grasp — start MCP server (for Claude Desktop / Cursor)
|
|
6
|
+
* grasp status — show Chrome connection status
|
|
7
|
+
* grasp logs — show audit log (--lines N, --follow)
|
|
8
|
+
* grasp --version — print version
|
|
9
|
+
* grasp --help — print help
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { pathToFileURL } from 'node:url';
|
|
13
|
+
|
|
14
|
+
export async function main(argv = process.argv.slice(2)) {
|
|
15
|
+
const [cmd, ...rest] = argv;
|
|
16
|
+
|
|
17
|
+
if (cmd === 'connect' || cmd === undefined) {
|
|
18
|
+
// 'connect' = explicit setup wizard
|
|
19
|
+
// no args = also run connect when called by human (not piped to MCP)
|
|
20
|
+
const isMcpMode = !process.stdin.isTTY && cmd === undefined;
|
|
21
|
+
if (isMcpMode) {
|
|
22
|
+
// stdin is a pipe — AI client is calling us, start MCP server
|
|
23
|
+
const { StdioServerTransport } = await import('@modelcontextprotocol/sdk/server/stdio.js');
|
|
24
|
+
const { createGraspServer, SERVER_INFO } = await import('./src/server/index.js');
|
|
25
|
+
try {
|
|
26
|
+
const { server } = createGraspServer();
|
|
27
|
+
const transport = new StdioServerTransport();
|
|
28
|
+
await server.connect(transport);
|
|
29
|
+
console.error(`[Grasp] MCP Server v${SERVER_INFO.version} started.`);
|
|
30
|
+
} catch (err) {
|
|
31
|
+
console.error(`[Grasp] Failed to start MCP server: ${err.message}`);
|
|
32
|
+
process.exit(1);
|
|
33
|
+
}
|
|
34
|
+
} else {
|
|
35
|
+
const { runConnect } = await import('./src/cli/cmd-connect.js');
|
|
36
|
+
await runConnect();
|
|
37
|
+
}
|
|
38
|
+
} else if (cmd === 'status') {
|
|
39
|
+
const { runStatus } = await import('./src/cli/cmd-status.js');
|
|
40
|
+
await runStatus();
|
|
41
|
+
} else if (cmd === 'logs') {
|
|
42
|
+
const { runLogs } = await import('./src/cli/cmd-logs.js');
|
|
43
|
+
await runLogs(rest);
|
|
44
|
+
} else if (cmd === 'explain') {
|
|
45
|
+
const { runExplain } = await import('./src/cli/cmd-explain.js');
|
|
46
|
+
await runExplain();
|
|
47
|
+
} else if (cmd === '--version' || cmd === '-v') {
|
|
48
|
+
const { SERVER_INFO } = await import('./src/server/index.js');
|
|
49
|
+
console.log(SERVER_INFO.version);
|
|
50
|
+
} else if (cmd === '--help' || cmd === '-h') {
|
|
51
|
+
printHelp();
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function renderHelpText() {
|
|
56
|
+
return `
|
|
57
|
+
Grasp — route-aware Agent Web Runtime
|
|
58
|
+
Connect Chrome once. Let agents work inside a visible browser runtime, extract structured results, and resume real pages.
|
|
59
|
+
|
|
60
|
+
Usage:
|
|
61
|
+
grasp Bootstrap the runtime and connect Chrome for first use
|
|
62
|
+
grasp connect Same as above
|
|
63
|
+
grasp status Show runtime state, current page, and recent activity
|
|
64
|
+
grasp logs Show recent audit log
|
|
65
|
+
--lines N Number of lines to show (default: 50)
|
|
66
|
+
--follow, -f Stream new entries in real-time
|
|
67
|
+
grasp explain Explain the latest route decision
|
|
68
|
+
grasp --version Print version
|
|
69
|
+
grasp --help Print this help
|
|
70
|
+
|
|
71
|
+
First runtime steps:
|
|
72
|
+
1. npx -y @yuzc-001/grasp Bootstrap the runtime and connect your AI client
|
|
73
|
+
2. Open any real page Keep using the dedicated chrome-grasp profile
|
|
74
|
+
This runtime profile is separate from arbitrary browser windows you may already have open
|
|
75
|
+
3. Ask your AI Call get_status / entry(url, intent) / inspect / extract or continue / explain_route
|
|
76
|
+
Use extract_structured(fields=[...]) or extract_batch(urls=[...], fields=[...]) for structured exports
|
|
77
|
+
Use share_page(format="markdown" | "screenshot" | "pdf") when the result needs a shareable artifact
|
|
78
|
+
`;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function printHelp() {
|
|
82
|
+
console.log(renderHelpText());
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href) {
|
|
86
|
+
await main();
|
|
87
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@yuzc-001/grasp",
|
|
3
|
+
"version": "0.6.6",
|
|
4
|
+
"description": "Visible AI browser runtime with persistent sessions, verified actions, structured extraction, and recoverable handoff.",
|
|
5
|
+
"main": "index.js",
|
|
6
|
+
"bin": {
|
|
7
|
+
"grasp": "./index.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"index.js",
|
|
11
|
+
"src",
|
|
12
|
+
"examples",
|
|
13
|
+
"scripts",
|
|
14
|
+
"skill",
|
|
15
|
+
"start-chrome.bat",
|
|
16
|
+
"grasp.skill"
|
|
17
|
+
],
|
|
18
|
+
"type": "module",
|
|
19
|
+
"scripts": {
|
|
20
|
+
"start": "node index.js",
|
|
21
|
+
"dev": "node --watch index.js",
|
|
22
|
+
"test": "node --test",
|
|
23
|
+
"test:watch": "node --test --watch"
|
|
24
|
+
},
|
|
25
|
+
"dependencies": {
|
|
26
|
+
"@chenglou/pretext": "^0.0.3",
|
|
27
|
+
"@modelcontextprotocol/sdk": "^1.8.0",
|
|
28
|
+
"playwright-core": "^1.58.2",
|
|
29
|
+
"zod": "^3.25.76"
|
|
30
|
+
},
|
|
31
|
+
"keywords": [
|
|
32
|
+
"mcp",
|
|
33
|
+
"route-aware-agent-web-runtime",
|
|
34
|
+
"browser-runtime-for-agents",
|
|
35
|
+
"browser-runtime",
|
|
36
|
+
"session-continuity",
|
|
37
|
+
"agent-browser"
|
|
38
|
+
],
|
|
39
|
+
"repository": {
|
|
40
|
+
"type": "git",
|
|
41
|
+
"url": "https://github.com/Yuzc-001/grasp.git"
|
|
42
|
+
},
|
|
43
|
+
"homepage": "https://github.com/Yuzc-001/grasp#readme",
|
|
44
|
+
"bugs": {
|
|
45
|
+
"url": "https://github.com/Yuzc-001/grasp/issues"
|
|
46
|
+
},
|
|
47
|
+
"license": "MIT"
|
|
48
|
+
}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
|
|
5
|
+
RUNTIME_DIR="${GRASP_RUNTIME_DIR:-$ROOT_DIR/.runtime/openclaw}"
|
|
6
|
+
PROFILE_DIR="${GRASP_PROFILE_DIR:-/root/snap/chromium/common/grasp-openclaw-profile}"
|
|
7
|
+
LOG_DIR="$RUNTIME_DIR/logs"
|
|
8
|
+
PID_DIR="$RUNTIME_DIR/pids"
|
|
9
|
+
CHROME_LOG="$LOG_DIR/chromium.log"
|
|
10
|
+
GRASP_LOG="$LOG_DIR/grasp.log"
|
|
11
|
+
CHROME_PID_FILE="$PID_DIR/chromium.pid"
|
|
12
|
+
GRASP_PID_FILE="$PID_DIR/grasp.pid"
|
|
13
|
+
CHROME_BIN="${CHROME_BIN:-/usr/bin/chromium-browser}"
|
|
14
|
+
CDP_URL="${CHROME_CDP_URL:-http://127.0.0.1:9222}"
|
|
15
|
+
CDP_PORT="${CHROME_CDP_PORT:-9222}"
|
|
16
|
+
|
|
17
|
+
mkdir -p "$PROFILE_DIR" "$LOG_DIR" "$PID_DIR"
|
|
18
|
+
|
|
19
|
+
is_pid_running() {
|
|
20
|
+
local file="$1"
|
|
21
|
+
[[ -f "$file" ]] || return 1
|
|
22
|
+
local pid
|
|
23
|
+
pid="$(cat "$file" 2>/dev/null || true)"
|
|
24
|
+
[[ -n "$pid" ]] || return 1
|
|
25
|
+
kill -0 "$pid" 2>/dev/null
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
start_chrome() {
|
|
29
|
+
if is_pid_running "$CHROME_PID_FILE"; then
|
|
30
|
+
return 0
|
|
31
|
+
fi
|
|
32
|
+
nohup "$CHROME_BIN" \
|
|
33
|
+
--headless=new \
|
|
34
|
+
--no-sandbox \
|
|
35
|
+
--disable-dev-shm-usage \
|
|
36
|
+
--remote-debugging-address=127.0.0.1 \
|
|
37
|
+
--remote-debugging-port="$CDP_PORT" \
|
|
38
|
+
--user-data-dir="$PROFILE_DIR" \
|
|
39
|
+
about:blank \
|
|
40
|
+
>"$CHROME_LOG" 2>&1 &
|
|
41
|
+
echo $! > "$CHROME_PID_FILE"
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
wait_for_cdp() {
|
|
45
|
+
for _ in $(seq 1 20); do
|
|
46
|
+
if curl -fsS "$CDP_URL/json/version" >/dev/null 2>&1; then
|
|
47
|
+
return 0
|
|
48
|
+
fi
|
|
49
|
+
sleep 1
|
|
50
|
+
done
|
|
51
|
+
return 1
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
start_grasp_probe() {
|
|
55
|
+
if is_pid_running "$GRASP_PID_FILE"; then
|
|
56
|
+
return 0
|
|
57
|
+
fi
|
|
58
|
+
nohup bash -lc "cd '$ROOT_DIR' && node index.js status" >"$GRASP_LOG" 2>&1 &
|
|
59
|
+
echo $! > "$GRASP_PID_FILE"
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
cmd_start() {
|
|
63
|
+
start_chrome
|
|
64
|
+
if ! wait_for_cdp; then
|
|
65
|
+
echo "CDP_UNREACHABLE"
|
|
66
|
+
exit 1
|
|
67
|
+
fi
|
|
68
|
+
start_grasp_probe
|
|
69
|
+
echo "started"
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
cmd_status() {
|
|
73
|
+
echo "runtime_dir=$RUNTIME_DIR"
|
|
74
|
+
echo "profile_dir=$PROFILE_DIR"
|
|
75
|
+
echo "cdp_url=$CDP_URL"
|
|
76
|
+
if is_pid_running "$CHROME_PID_FILE"; then
|
|
77
|
+
echo "chromium=running"
|
|
78
|
+
else
|
|
79
|
+
echo "chromium=stopped"
|
|
80
|
+
fi
|
|
81
|
+
if curl -fsS "$CDP_URL/json/version" >/dev/null 2>&1; then
|
|
82
|
+
echo "cdp=connected"
|
|
83
|
+
else
|
|
84
|
+
echo "cdp=disconnected"
|
|
85
|
+
fi
|
|
86
|
+
if is_pid_running "$GRASP_PID_FILE"; then
|
|
87
|
+
echo "grasp_probe=running"
|
|
88
|
+
else
|
|
89
|
+
echo "grasp_probe=stopped"
|
|
90
|
+
fi
|
|
91
|
+
if [[ -f "$GRASP_LOG" ]]; then
|
|
92
|
+
echo "--- grasp_status ---"
|
|
93
|
+
tail -n 20 "$GRASP_LOG" || true
|
|
94
|
+
fi
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
cmd_logs() {
|
|
98
|
+
echo "--- chromium.log ---"
|
|
99
|
+
tail -n 60 "$CHROME_LOG" 2>/dev/null || true
|
|
100
|
+
echo
|
|
101
|
+
echo "--- grasp.log ---"
|
|
102
|
+
tail -n 60 "$GRASP_LOG" 2>/dev/null || true
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
cmd_stop() {
|
|
106
|
+
if is_pid_running "$GRASP_PID_FILE"; then
|
|
107
|
+
kill "$(cat "$GRASP_PID_FILE")" 2>/dev/null || true
|
|
108
|
+
fi
|
|
109
|
+
if is_pid_running "$CHROME_PID_FILE"; then
|
|
110
|
+
kill "$(cat "$CHROME_PID_FILE")" 2>/dev/null || true
|
|
111
|
+
fi
|
|
112
|
+
rm -f "$GRASP_PID_FILE" "$CHROME_PID_FILE"
|
|
113
|
+
echo "stopped"
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
case "${1:-status}" in
|
|
117
|
+
start) cmd_start ;;
|
|
118
|
+
status) cmd_status ;;
|
|
119
|
+
logs) cmd_logs ;;
|
|
120
|
+
stop) cmd_stop ;;
|
|
121
|
+
*) echo "usage: $0 {start|status|logs|stop}" >&2; exit 2 ;;
|
|
122
|
+
esac
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { fileURLToPath } from 'node:url';
|
|
3
|
+
import { runSearchTaskTool } from '../src/server/tasks/search-task.js';
|
|
4
|
+
import { createServerState } from '../src/server/state.js';
|
|
5
|
+
|
|
6
|
+
function mean(items) {
|
|
7
|
+
if (items.length === 0) return 0;
|
|
8
|
+
return items.reduce((sum, value) => sum + value, 0) / items.length;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function summarizeSearchBenchmark(results = [], options = {}) {
|
|
12
|
+
const suite = options.suite ?? 'search-task';
|
|
13
|
+
const total = results.length;
|
|
14
|
+
const successCount = results.filter((r) => r.status === 'completed').length;
|
|
15
|
+
const successRate = total === 0 ? 0 : successCount / total;
|
|
16
|
+
const avgToolCalls = mean(results.map((r) => r.toolCalls ?? 0));
|
|
17
|
+
const avgRetries = mean(results.map((r) => r.retries ?? 0));
|
|
18
|
+
const totalRetryAttempts = results.reduce((sum, result) => sum + (result.retries ?? 0), 0);
|
|
19
|
+
const successfulRetryAttempts = results.reduce((sum, result) => {
|
|
20
|
+
if (result.status === 'completed' && (result.retries ?? 0) > 0) {
|
|
21
|
+
return sum + 1;
|
|
22
|
+
}
|
|
23
|
+
return sum;
|
|
24
|
+
}, 0);
|
|
25
|
+
const recoverySuccessRate = totalRetryAttempts === 0
|
|
26
|
+
? 1
|
|
27
|
+
: successfulRetryAttempts / totalRetryAttempts;
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
suite,
|
|
31
|
+
successRate,
|
|
32
|
+
avgToolCalls,
|
|
33
|
+
avgRetries,
|
|
34
|
+
recoverySuccessRate,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function createFakeBenchmarkPage({ url = 'https://example.com/', title = 'Benchmark Page' } = {}) {
|
|
39
|
+
return {
|
|
40
|
+
url: () => url,
|
|
41
|
+
title: async () => title,
|
|
42
|
+
evaluate: async () => 'complete',
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function createSnapshot({
|
|
47
|
+
query = 'pi agent 是啥',
|
|
48
|
+
title = 'Search',
|
|
49
|
+
url = 'https://example.com/search',
|
|
50
|
+
contentText = '',
|
|
51
|
+
domRevision = 0,
|
|
52
|
+
searchInput = { id: 'I1', type: 'textbox', label: 'Search' },
|
|
53
|
+
submitControl = { id: 'B1', type: 'button', label: 'Search' },
|
|
54
|
+
} = {}) {
|
|
55
|
+
const ranking = {
|
|
56
|
+
search_input: searchInput ? [{ ...searchInput }] : [],
|
|
57
|
+
command_button: submitControl ? [{ ...submitControl }] : [],
|
|
58
|
+
};
|
|
59
|
+
const hints = [
|
|
60
|
+
...(searchInput ? [{ ...searchInput, semantic: 'search_input' }] : []),
|
|
61
|
+
...(submitControl ? [{ ...submitControl, semantic: 'submit_control' }] : []),
|
|
62
|
+
];
|
|
63
|
+
return {
|
|
64
|
+
query,
|
|
65
|
+
title,
|
|
66
|
+
url,
|
|
67
|
+
hints,
|
|
68
|
+
ranking,
|
|
69
|
+
content: { text: contentText },
|
|
70
|
+
domRevision,
|
|
71
|
+
submitCandidate: submitControl ? { ...submitControl } : null,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async function runToolScenario({
|
|
76
|
+
name,
|
|
77
|
+
description,
|
|
78
|
+
query = 'pi agent 是啥',
|
|
79
|
+
maxAttempts = 3,
|
|
80
|
+
pageUrl,
|
|
81
|
+
pageTitle,
|
|
82
|
+
observer,
|
|
83
|
+
verifier,
|
|
84
|
+
waitThenReverify,
|
|
85
|
+
}) {
|
|
86
|
+
const state = createServerState();
|
|
87
|
+
const page = createFakeBenchmarkPage({ url: pageUrl, title: pageTitle });
|
|
88
|
+
const actionBreakdown = {
|
|
89
|
+
type: 0,
|
|
90
|
+
typeWithEnter: 0,
|
|
91
|
+
click: 0,
|
|
92
|
+
pressKey: 0,
|
|
93
|
+
waitStable: 0,
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
const result = await runSearchTaskTool({
|
|
97
|
+
state,
|
|
98
|
+
query,
|
|
99
|
+
max_attempts: maxAttempts,
|
|
100
|
+
deps: {
|
|
101
|
+
getActivePage: async () => page,
|
|
102
|
+
observer,
|
|
103
|
+
verifier,
|
|
104
|
+
waitThenReverify,
|
|
105
|
+
typeAction: async (_page, _hintId, _text, pressEnter) => {
|
|
106
|
+
actionBreakdown.type += 1;
|
|
107
|
+
if (pressEnter) actionBreakdown.typeWithEnter += 1;
|
|
108
|
+
},
|
|
109
|
+
clickAction: async () => {
|
|
110
|
+
actionBreakdown.click += 1;
|
|
111
|
+
},
|
|
112
|
+
pressKeyAction: async () => {
|
|
113
|
+
actionBreakdown.pressKey += 1;
|
|
114
|
+
},
|
|
115
|
+
waitStableAction: async () => {
|
|
116
|
+
actionBreakdown.waitStable += 1;
|
|
117
|
+
return { stable: true, attempts: 1 };
|
|
118
|
+
},
|
|
119
|
+
extractContentAction: async () => ({ text: 'fixture content' }),
|
|
120
|
+
syncStateAction: async () => undefined,
|
|
121
|
+
},
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
scenario: name,
|
|
126
|
+
description,
|
|
127
|
+
status: result.status,
|
|
128
|
+
attempts: result.attempts,
|
|
129
|
+
toolCalls: result.toolCalls,
|
|
130
|
+
retries: result.retries,
|
|
131
|
+
recovered: result.recovered,
|
|
132
|
+
actionBreakdown,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function createSearchBenchmarkScenarios() {
|
|
137
|
+
return [
|
|
138
|
+
{
|
|
139
|
+
name: 'grok-question',
|
|
140
|
+
description: 'Grok 搜索提问',
|
|
141
|
+
async run() {
|
|
142
|
+
return runToolScenario({
|
|
143
|
+
name: 'grok-question',
|
|
144
|
+
description: 'Grok 搜索提问',
|
|
145
|
+
pageUrl: 'https://grok.com/',
|
|
146
|
+
pageTitle: 'Grok',
|
|
147
|
+
observer: async () => ({
|
|
148
|
+
snapshot: createSnapshot({
|
|
149
|
+
title: 'Grok',
|
|
150
|
+
url: 'https://grok.com/',
|
|
151
|
+
searchInput: { id: 'I1', type: 'textbox', label: '向 Grok 提问' },
|
|
152
|
+
submitControl: { id: 'B2', type: 'button', label: '发送' },
|
|
153
|
+
}),
|
|
154
|
+
}),
|
|
155
|
+
verifier: async () => ({ ok: true, evidence: { answerStarted: true } }),
|
|
156
|
+
});
|
|
157
|
+
},
|
|
158
|
+
},
|
|
159
|
+
{
|
|
160
|
+
name: 'google-search',
|
|
161
|
+
description: 'Google 搜索',
|
|
162
|
+
async run() {
|
|
163
|
+
return runToolScenario({
|
|
164
|
+
name: 'google-search',
|
|
165
|
+
description: 'Google 搜索',
|
|
166
|
+
pageUrl: 'https://www.google.com/',
|
|
167
|
+
pageTitle: 'Google',
|
|
168
|
+
observer: async () => ({
|
|
169
|
+
snapshot: createSnapshot({
|
|
170
|
+
title: 'Google',
|
|
171
|
+
url: 'https://www.google.com/',
|
|
172
|
+
searchInput: { id: 'I1', type: 'searchbox', label: 'Search Google' },
|
|
173
|
+
submitControl: null,
|
|
174
|
+
}),
|
|
175
|
+
}),
|
|
176
|
+
verifier: async () => ({ ok: true, evidence: { resultsVisible: true } }),
|
|
177
|
+
});
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
name: 'overlay-site-search',
|
|
182
|
+
description: '带弹层干扰的站内搜索',
|
|
183
|
+
async run() {
|
|
184
|
+
return runToolScenario({
|
|
185
|
+
name: 'overlay-site-search',
|
|
186
|
+
description: '带弹层干扰的站内搜索',
|
|
187
|
+
pageUrl: 'https://docs.example.com/',
|
|
188
|
+
pageTitle: 'Docs Search',
|
|
189
|
+
observer: async () => ({
|
|
190
|
+
snapshot: createSnapshot({
|
|
191
|
+
title: 'Docs Search',
|
|
192
|
+
url: 'https://docs.example.com/',
|
|
193
|
+
searchInput: { id: 'I3', type: 'combobox', label: '站内搜索' },
|
|
194
|
+
submitControl: { id: 'B4', type: 'button', label: '搜索' },
|
|
195
|
+
}),
|
|
196
|
+
}),
|
|
197
|
+
verifier: (() => {
|
|
198
|
+
let attempts = 0;
|
|
199
|
+
return async ({ plan }) => {
|
|
200
|
+
attempts += 1;
|
|
201
|
+
if (attempts === 1 && plan.mode === 'primary_submit') {
|
|
202
|
+
return { ok: false, error_code: 'NO_EFFECT', evidence: { overlay: true } };
|
|
203
|
+
}
|
|
204
|
+
return { ok: true, evidence: { resultPaneChanged: true } };
|
|
205
|
+
};
|
|
206
|
+
})(),
|
|
207
|
+
});
|
|
208
|
+
},
|
|
209
|
+
},
|
|
210
|
+
{
|
|
211
|
+
name: 'streaming-answer',
|
|
212
|
+
description: '流式回答页面等待稳定',
|
|
213
|
+
async run() {
|
|
214
|
+
let verifyCount = 0;
|
|
215
|
+
return runToolScenario({
|
|
216
|
+
name: 'streaming-answer',
|
|
217
|
+
description: '流式回答页面等待稳定',
|
|
218
|
+
pageUrl: 'https://chat.example.com/',
|
|
219
|
+
pageTitle: 'Streaming Answer',
|
|
220
|
+
observer: async () => ({
|
|
221
|
+
snapshot: createSnapshot({
|
|
222
|
+
title: 'Streaming Answer',
|
|
223
|
+
url: 'https://chat.example.com/',
|
|
224
|
+
searchInput: { id: 'I9', type: 'textbox', label: 'Ask anything' },
|
|
225
|
+
submitControl: { id: 'B9', type: 'button', label: 'Send' },
|
|
226
|
+
}),
|
|
227
|
+
}),
|
|
228
|
+
verifier: async () => {
|
|
229
|
+
verifyCount += 1;
|
|
230
|
+
return verifyCount === 1
|
|
231
|
+
? { ok: false, error_code: 'LOADING_PENDING', evidence: { streamOpen: true } }
|
|
232
|
+
: { ok: true, evidence: { streamSettled: true } };
|
|
233
|
+
},
|
|
234
|
+
});
|
|
235
|
+
},
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
name: 'result-content-extract',
|
|
239
|
+
description: '结果页正文抽取',
|
|
240
|
+
async run() {
|
|
241
|
+
return runToolScenario({
|
|
242
|
+
name: 'result-content-extract',
|
|
243
|
+
description: '结果页正文抽取',
|
|
244
|
+
pageUrl: 'https://example.com/pi-agent',
|
|
245
|
+
pageTitle: 'Pi Agent',
|
|
246
|
+
observer: async () => ({
|
|
247
|
+
snapshot: createSnapshot({
|
|
248
|
+
title: 'Pi Agent',
|
|
249
|
+
url: 'https://example.com/pi-agent',
|
|
250
|
+
contentText: 'Pi Agent is a minimal coding-agent runtime.',
|
|
251
|
+
searchInput: { id: 'I6', type: 'textbox', label: 'Search docs' },
|
|
252
|
+
submitControl: { id: 'B6', type: 'button', label: 'Search docs' },
|
|
253
|
+
}),
|
|
254
|
+
}),
|
|
255
|
+
verifier: async ({ snapshot }) => ({
|
|
256
|
+
ok: true,
|
|
257
|
+
evidence: { extractedText: snapshot.content.text },
|
|
258
|
+
}),
|
|
259
|
+
});
|
|
260
|
+
},
|
|
261
|
+
},
|
|
262
|
+
];
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
export async function runSearchBenchmark(scenarios = createSearchBenchmarkScenarios(), options = {}) {
|
|
266
|
+
const { silent = false } = options;
|
|
267
|
+
const results = [];
|
|
268
|
+
for (const scenario of scenarios) {
|
|
269
|
+
const result = await scenario.run();
|
|
270
|
+
results.push(result);
|
|
271
|
+
}
|
|
272
|
+
const summary = summarizeSearchBenchmark(results, options);
|
|
273
|
+
const payload = {
|
|
274
|
+
summary,
|
|
275
|
+
results,
|
|
276
|
+
};
|
|
277
|
+
if (!silent) {
|
|
278
|
+
console.log(JSON.stringify(payload, null, 2));
|
|
279
|
+
}
|
|
280
|
+
return payload;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const isMain = process.argv[1] && path.resolve(process.argv[1]) === fileURLToPath(import.meta.url);
|
|
284
|
+
|
|
285
|
+
if (isMain) {
|
|
286
|
+
runSearchBenchmark();
|
|
287
|
+
}
|