@dyyz1993/agent-browser 0.11.5 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/dist/__tests__/utils/parseCli.d.ts.map +1 -1
- package/dist/__tests__/utils/parseCli.js +97 -2
- package/dist/__tests__/utils/parseCli.js.map +1 -1
- package/dist/actions.d.ts.map +1 -1
- package/dist/actions.js +117 -81
- package/dist/actions.js.map +1 -1
- package/dist/browser.d.ts +1 -0
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +24 -29
- package/dist/browser.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +102 -3
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/connection.d.ts.map +1 -1
- package/dist/cli/connection.js +12 -29
- package/dist/cli/connection.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +35 -25
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/output.d.ts.map +1 -1
- package/dist/cli/output.js +3 -0
- package/dist/cli/output.js.map +1 -1
- package/dist/cli.js +117 -3
- package/dist/cli.js.map +1 -1
- package/dist/daemon.d.ts +18 -2
- package/dist/daemon.d.ts.map +1 -1
- package/dist/daemon.js +46 -32
- package/dist/daemon.js.map +1 -1
- package/dist/message-bridge.d.ts.map +1 -1
- package/dist/message-bridge.js +4 -1
- package/dist/message-bridge.js.map +1 -1
- package/dist/rc-config.d.ts +42 -0
- package/dist/rc-config.d.ts.map +1 -0
- package/dist/rc-config.js +170 -0
- package/dist/rc-config.js.map +1 -0
- package/dist/recorder/inject.js +30 -24
- package/package.json +1 -1
- package/scripts/check_goods_container.js +35 -0
- package/scripts/check_page_content.js +36 -0
- package/scripts/click_applause_rate.js +30 -0
- package/scripts/explore_jd_page.js +31 -0
- package/scripts/extract_all_jd_data.js +80 -0
- package/scripts/extract_jd_product_detail.js +62 -0
- package/scripts/extract_jd_products_correct_links.js +78 -0
- package/scripts/extract_jd_products_final.js +80 -0
- package/scripts/extract_jd_reviews.js +48 -0
- package/scripts/extract_jd_seafood_final.js +78 -0
- package/scripts/extract_multiple_products.js +77 -0
- package/scripts/extract_products_no_scroll.js +68 -0
- package/scripts/extract_products_simple.js +68 -0
- package/scripts/find_applause_rate.js +26 -0
- package/scripts/find_jd_links.js +28 -0
- package/scripts/find_main_content.js +20 -0
- package/scripts/find_product_cards.js +38 -0
- package/scripts/find_root_content.js +26 -0
- package/scripts/find_unique_products.js +55 -0
- package/scripts/generate-skill.cjs +303 -0
- package/scripts/get_jd_product_detail.js +16 -0
- package/scripts/get_jd_products.js +23 -0
- package/scripts/get_jd_seafood_products.js +44 -0
- package/scripts/get_product_details_from_images.js +54 -0
- package/scripts/verify-form.sh +67 -0
- package/scripts/verify-login.sh +65 -0
- package/scripts/verify-recording.sh +80 -0
- package/scripts/verify-upload.sh +41 -0
- package/skills/agent-browser/SKILL.md +135 -370
- package/bin/agent-browser-linux-x64 +0 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# 场景3: 文件上传验证脚本
|
|
3
|
+
|
|
4
|
+
set -e
|
|
5
|
+
export AGENT_BROWSER_HUMAN=bezier
|
|
6
|
+
SESSION="upload-verify-$(date +%s)"
|
|
7
|
+
TEST_PAGE="file:///Users/xuyingzhou/Project/temporary/agent-browser/test-pages/upload-test.html"
|
|
8
|
+
TEST_FILE="/Users/xuyingzhou/Project/temporary/agent-browser/test-pages/upload-test.txt"
|
|
9
|
+
|
|
10
|
+
echo "=== 开始执行文件上传脚本 ==="
|
|
11
|
+
echo "Session: $SESSION"
|
|
12
|
+
|
|
13
|
+
# 打开测试页面
|
|
14
|
+
echo "1. 打开文件上传测试页面..."
|
|
15
|
+
agent-browser --session $SESSION --allow-file-access --headed open "$TEST_PAGE"
|
|
16
|
+
agent-browser --session $SESSION wait --load networkidle
|
|
17
|
+
|
|
18
|
+
# 上传文件
|
|
19
|
+
echo "2. 上传文件..."
|
|
20
|
+
agent-browser --session $SESSION upload "#fileInput" "$TEST_FILE"
|
|
21
|
+
|
|
22
|
+
# 等待上传完成
|
|
23
|
+
agent-browser --session $SESSION wait 1000
|
|
24
|
+
|
|
25
|
+
# 验证结果
|
|
26
|
+
echo "3. 验证结果..."
|
|
27
|
+
URL=$(agent-browser --session $SESSION get url)
|
|
28
|
+
if [[ "$URL" == *"upload-test.html"* ]]; then
|
|
29
|
+
echo "✓ URL验证成功"
|
|
30
|
+
else
|
|
31
|
+
echo "✗ URL验证失败"
|
|
32
|
+
agent-browser --session $SESSION close
|
|
33
|
+
exit 1
|
|
34
|
+
fi
|
|
35
|
+
|
|
36
|
+
# 清理
|
|
37
|
+
echo "4. 关闭浏览器..."
|
|
38
|
+
agent-browser --session $SESSION close
|
|
39
|
+
|
|
40
|
+
echo "=== 脚本执行成功 ==="
|
|
41
|
+
exit 0
|
|
@@ -6,442 +6,207 @@ allowed-tools: Bash(agent-browser:*)
|
|
|
6
6
|
|
|
7
7
|
# Browser Automation with agent-browser
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
Every browser automation follows this pattern:
|
|
12
|
-
|
|
13
|
-
1. **Navigate**: `agent-browser open <url>`
|
|
14
|
-
2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
|
|
15
|
-
3. **Interact**: Use refs to click, fill, select
|
|
16
|
-
4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
|
|
17
|
-
|
|
18
|
-
```bash
|
|
19
|
-
agent-browser open https://example.com/form
|
|
20
|
-
agent-browser snapshot -i
|
|
21
|
-
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
|
|
22
|
-
|
|
23
|
-
agent-browser fill @e1 "user@example.com"
|
|
24
|
-
agent-browser fill @e2 "password123"
|
|
25
|
-
agent-browser click @e3
|
|
26
|
-
agent-browser wait --load networkidle
|
|
27
|
-
agent-browser snapshot -i # Check result
|
|
28
|
-
```
|
|
9
|
+
Fast CLI for browser automation. Works headlessly by default, supports named sessions, proxy, and remote streaming.
|
|
29
10
|
|
|
30
|
-
##
|
|
11
|
+
## Browser Setup (macOS)
|
|
31
12
|
|
|
32
|
-
|
|
13
|
+
Set the browser path to avoid Playwright downloading Chromium:
|
|
33
14
|
|
|
34
15
|
```bash
|
|
35
|
-
|
|
36
|
-
agent-browser back # Go back
|
|
37
|
-
agent-browser forward # Go forward
|
|
38
|
-
agent-browser reload # Reload page
|
|
39
|
-
agent-browser close # Close browser (alias: quit, exit)
|
|
16
|
+
export AGENT_BROWSER_EXECUTABLE_PATH=/Applications/Chromium.app/Contents/MacOS/Chromium
|
|
40
17
|
```
|
|
41
18
|
|
|
42
|
-
|
|
19
|
+
Or per-command: `agent-browser --executable-path /Applications/Chromium.app/Contents/MacOS/Chromium open <url>`
|
|
43
20
|
|
|
44
|
-
|
|
45
|
-
agent-browser click @e1 # Click element
|
|
46
|
-
agent-browser dblclick @e1 # Double-click
|
|
47
|
-
agent-browser fill @e2 "text" # Clear and type text
|
|
48
|
-
agent-browser type @e2 "text" # Type without clearing
|
|
49
|
-
agent-browser select @e1 "option" # Select dropdown option
|
|
50
|
-
agent-browser check @e1 # Check checkbox
|
|
51
|
-
agent-browser uncheck @e1 # Uncheck checkbox
|
|
52
|
-
agent-browser press Enter # Press key (alias: key)
|
|
53
|
-
agent-browser keydown / keyup # Raw key down / up
|
|
54
|
-
agent-browser hover @e1 # Hover over element
|
|
55
|
-
agent-browser focus @e1 # Focus element
|
|
56
|
-
agent-browser drag @e1 @e2 # Drag from e1 to e2
|
|
57
|
-
agent-browser upload @e1 "/path" # Upload file
|
|
58
|
-
agent-browser download @e1 "/path" # Download resource
|
|
59
|
-
```
|
|
21
|
+
Verify: `agent-browser config`
|
|
60
22
|
|
|
61
|
-
|
|
23
|
+
**Important**: The daemon persists across commands. If the env var is not inherited by your shell (common in sub-processes), pass `--executable-path` on the first `open` — it's stored in the daemon for the session lifetime. If the daemon restarts (crash/timeout), you must pass it again.
|
|
62
24
|
|
|
63
|
-
|
|
64
|
-
agent-browser scroll down 500 # Scroll pixels
|
|
65
|
-
agent-browser scrollintoview @e1 # Scroll element into view
|
|
66
|
-
```
|
|
25
|
+
## Daemon Management
|
|
67
26
|
|
|
68
|
-
|
|
27
|
+
agent-browser runs a background daemon per session. If commands hang or timeout, the daemon may be stale:
|
|
69
28
|
|
|
70
29
|
```bash
|
|
71
|
-
agent-browser
|
|
72
|
-
agent-browser
|
|
73
|
-
agent-browser
|
|
74
|
-
agent-browser snapshot -s "body" --path # Include xpath and cssPath in refs
|
|
75
|
-
agent-browser snapshot -s "body" --attrs # Include element attributes in refs
|
|
76
|
-
agent-browser snapshot -i --json # JSON output for parsing
|
|
30
|
+
agent-browser kill # Kill all daemons + stream server
|
|
31
|
+
agent-browser kill --session myname # Kill specific session daemon
|
|
32
|
+
agent-browser session list # Check active sessions
|
|
77
33
|
```
|
|
78
34
|
|
|
79
|
-
|
|
35
|
+
Common recovery pattern:
|
|
80
36
|
|
|
81
37
|
```bash
|
|
82
|
-
agent-browser
|
|
83
|
-
agent-browser get url # Get current URL
|
|
84
|
-
agent-browser get title # Get page title
|
|
85
|
-
agent-browser get count ".item" # Count matching elements
|
|
86
|
-
agent-browser get box @e1 # Bounding box {x,y,width,height}
|
|
87
|
-
agent-browser get styles @e1 # Computed styles
|
|
88
|
-
agent-browser is visible @e1 # Visibility check
|
|
89
|
-
agent-browser is enabled @e1 # Enabled check
|
|
90
|
-
agent-browser is checked @e1 # Checked state
|
|
38
|
+
agent-browser kill && agent-browser open https://example.com # Fresh start
|
|
91
39
|
```
|
|
92
40
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
```bash
|
|
96
|
-
agent-browser wait @e1 # Wait for element to appear
|
|
97
|
-
agent-browser wait --load networkidle # Wait for network idle
|
|
98
|
-
agent-browser wait --load domcontentloaded # Wait for DOM ready
|
|
99
|
-
agent-browser wait --url "**/page" # Wait for URL pattern match
|
|
100
|
-
agent-browser wait --text "Hello" # Wait for text on page
|
|
101
|
-
agent-browser wait --fn "document.hidden === false" # Wait for JS expression
|
|
102
|
-
agent-browser wait --download # Wait for download to complete
|
|
103
|
-
agent-browser wait 2000 # Wait milliseconds (fixed delay)
|
|
104
|
-
agent-browser wait --request "api/data" # Wait for specific network request (background listener)
|
|
105
|
-
```
|
|
41
|
+
**`tab new <url>`** waits for full page load and may timeout on slow sites. If it fails, the tab is usually created — run `tab list` to check, then `tab <index>` to switch.
|
|
106
42
|
|
|
107
|
-
|
|
43
|
+
## Quick Start
|
|
108
44
|
|
|
109
45
|
```bash
|
|
110
|
-
agent-browser
|
|
111
|
-
agent-browser
|
|
112
|
-
agent-browser
|
|
113
|
-
agent-browser
|
|
46
|
+
agent-browser open https://example.com
|
|
47
|
+
agent-browser snapshot -i # Get refs: @e1, @e2, ...
|
|
48
|
+
agent-browser fill @e1 "user@example.com" # Interact via refs
|
|
49
|
+
agent-browser click @e2
|
|
50
|
+
agent-browser snapshot -i # Re-snapshot after page change
|
|
114
51
|
```
|
|
115
52
|
|
|
116
|
-
|
|
53
|
+
## Discovering Commands
|
|
117
54
|
|
|
118
55
|
```bash
|
|
119
|
-
agent-browser
|
|
120
|
-
agent-browser
|
|
121
|
-
agent-browser
|
|
122
|
-
agent-browser network requests --capture-response # Capture response bodies
|
|
123
|
-
agent-browser network requests --capture-response --type json # Filter captured by content type
|
|
124
|
-
agent-browser network requests --output ./captures/ # Save captures to directory
|
|
125
|
-
agent-browser network route "**/api/**" --abort # Block requests
|
|
126
|
-
agent-browser network route "**/api/**" --body '{"users": []}' # Mock response
|
|
127
|
-
agent-browser network route "**/api/**" --status 404 # Mock status code
|
|
128
|
-
agent-browser network unroute "**/api/**" # Remove route
|
|
56
|
+
agent-browser --help # All commands & options
|
|
57
|
+
agent-browser snapshot --help # Command-specific help
|
|
58
|
+
agent-browser config # Current config & env vars
|
|
129
59
|
```
|
|
130
60
|
|
|
131
|
-
|
|
61
|
+
The CLI is self-documenting. When unsure about a command, run `--help` first.
|
|
132
62
|
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
```bash
|
|
136
|
-
agent-browser tab list # List all tabs
|
|
137
|
-
agent-browser tab new # Open new tab
|
|
138
|
-
agent-browser tab close 2 # Close tab by index
|
|
139
|
-
agent-browser tab switch 0 # Switch to tab
|
|
140
|
-
agent-browser window new # Open new window
|
|
141
|
-
```
|
|
63
|
+
Global flags (`--session`, `--proxy`, `--state`, etc.) work at any position — before or after the subcommand.
|
|
142
64
|
|
|
143
|
-
|
|
65
|
+
## Basic Info Commands
|
|
144
66
|
|
|
145
67
|
```bash
|
|
146
|
-
agent-browser
|
|
147
|
-
agent-browser
|
|
68
|
+
agent-browser get title # Page title
|
|
69
|
+
agent-browser get url # Current URL
|
|
70
|
+
agent-browser get text @e1 # Element text
|
|
71
|
+
agent-browser get text body # All page text
|
|
72
|
+
agent-browser is visible @e1 # Visibility check
|
|
148
73
|
```
|
|
149
74
|
|
|
150
|
-
|
|
75
|
+
## Network Monitoring Pattern
|
|
151
76
|
|
|
152
|
-
|
|
153
|
-
agent-browser state save auth.json # Save cookies/localStorage/session
|
|
154
|
-
agent-browser state clear # Clear all state
|
|
155
|
-
agent-browser storage session dump # Dump session storage
|
|
156
|
-
agent-browser storage session load # Load session storage
|
|
157
|
-
agent-browser cookies set name value domain # Set cookie
|
|
158
|
-
agent-browser cookies export # Export all cookies
|
|
159
|
-
```
|
|
160
|
-
|
|
161
|
-
### Debugging
|
|
77
|
+
Request tracking activates on first use. After `open`, run `network requests` twice — once to activate, once after triggering requests:
|
|
162
78
|
|
|
163
79
|
```bash
|
|
164
|
-
agent-browser
|
|
165
|
-
agent-browser
|
|
166
|
-
agent-browser
|
|
167
|
-
agent-browser
|
|
168
|
-
agent-browser trace stop ./trace.json # Stop and save trace
|
|
169
|
-
```
|
|
170
|
-
|
|
171
|
-
### Session Management
|
|
172
|
-
|
|
173
|
-
```bash
|
|
174
|
-
agent-browser --session site1 open https://a.com # Named session
|
|
175
|
-
agent-browser --session site2 open https://b.com # Parallel session
|
|
176
|
-
agent-browser session list # List active sessions
|
|
177
|
-
agent-browser connect ws://localhost:9222 # Connect to remote CDP browser
|
|
178
|
-
agent-browser kill # Kill daemon process
|
|
179
|
-
agent-browser config # Show/edit config
|
|
180
|
-
agent-browser config [--json] # Config as JSON
|
|
80
|
+
agent-browser open https://example.com
|
|
81
|
+
agent-browser network requests # Activates tracking (may show hint)
|
|
82
|
+
agent-browser reload # Trigger requests
|
|
83
|
+
agent-browser network requests # Now shows captured requests
|
|
181
84
|
```
|
|
182
85
|
|
|
183
|
-
##
|
|
184
|
-
|
|
185
|
-
These flags work with most commands:
|
|
186
|
-
|
|
187
|
-
| Flag | Description |
|
|
188
|
-
| -------------------------- | ---------------------------------------------- |
|
|
189
|
-
| `--session <name>` | Named browser session |
|
|
190
|
-
| `--json` | JSON output format |
|
|
191
|
-
| `--headed` | Show visible browser window |
|
|
192
|
-
| `--cdp <url>` | Connect via Chrome DevTools Protocol directly |
|
|
193
|
-
| `-p/--provider` | Provider: ios, browserbase, kernel, browseruse |
|
|
194
|
-
| `--proxy <url>` | HTTP/SOCKS5 proxy |
|
|
195
|
-
| `--proxy-bypass <rules>` | Proxy bypass rules |
|
|
196
|
-
| `--headers 'K: V'` | Extra HTTP headers per request |
|
|
197
|
-
| `--state <path>` | Restore browser state from file |
|
|
198
|
-
| `--profile <path>` | Chrome profile directory |
|
|
199
|
-
| `--args "<args>"` | Extra Chromium launch arguments |
|
|
200
|
-
| `--user-agent <ua>` | Custom User-Agent string |
|
|
201
|
-
| `--executable-path <path>` | Browser binary path |
|
|
202
|
-
| `--extension <path>` | Load .crx Chrome extension |
|
|
203
|
-
| `--ignore-https-errors` | Ignore HTTPS certificate errors |
|
|
204
|
-
| `--allow-file-access` | Allow file:// URLs |
|
|
205
|
-
| `--timeout <ms>` | Global operation timeout |
|
|
206
|
-
| `--debug` | Verbose debug logging |
|
|
207
|
-
|
|
208
|
-
Examples:
|
|
209
|
-
|
|
210
|
-
```bash
|
|
211
|
-
agent-browser --proxy http://proxy:8080 open https://example.com
|
|
212
|
-
agent-browser --headed --debug open https://example.com
|
|
213
|
-
agent-browser --user-agent "MyBot/1.0" open https://example.com
|
|
214
|
-
```
|
|
86
|
+
## Capabilities
|
|
215
87
|
|
|
216
|
-
|
|
88
|
+
| Area | Key Commands | Deep Dive |
|
|
89
|
+
|------|-------------|-----------|
|
|
90
|
+
| Page Navigation & Interaction | `open`, `click`, `dblclick`, `type`, `fill` | See `agent-browser --help` |
|
|
91
|
+
| Snapshot & Element Inspection | `snapshot` | [snapshot-refs](references/snapshot-refs.md) |
|
|
92
|
+
| Finding Elements | `find by role`, `text`, `label` | [commands](references/commands.md) |
|
|
93
|
+
| Data Extraction | `eval` | [data-extraction](references/data-extraction.md) |
|
|
94
|
+
| Network Control | `request monitoring`, `API mocking`, `URL blocking` | [network-monitoring](references/network-monitoring.md) |
|
|
95
|
+
| Session & State | `connect`, `close`, `cookies`, `storage`, `tab` | [session-management](references/session-management.md) |
|
|
96
|
+
| Authentication | `login flows`, `OAuth`, `2FA` | [authentication](references/authentication.md) |
|
|
97
|
+
| Recording & Replay | `record`, `recorder` | [recorder](references/recorder.md) |
|
|
98
|
+
| Visual Remote Control (Viewer) | `viewer` | [viewer-mode](references/viewer-mode.md) |
|
|
99
|
+
| Mobile Remote Control | `touchpad gestures`, `input panel`, `IME/CJK` | [mobile-viewer](references/mobile-viewer.md) |
|
|
100
|
+
| iOS Simulator (Appium) | `native iOS automation via Xcode + Appium` | See `agent-browser -p ios --help` |
|
|
101
|
+
| Cloud Browser Providers | `browserbase`, `kernel`, `browseruse` | See `agent-browser --help` |
|
|
102
|
+
| Proxy & Network Config | `install` | [proxy-support](references/proxy-support.md) |
|
|
217
103
|
|
|
218
|
-
###
|
|
104
|
+
### Core Workflow Pattern
|
|
219
105
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
agent-browser fill @e2 "jane@example.com"
|
|
225
|
-
agent-browser select @e3 "California"
|
|
226
|
-
agent-browser check @e4
|
|
227
|
-
agent-browser click @e5
|
|
228
|
-
agent-browser wait --load networkidle
|
|
229
|
-
```
|
|
106
|
+
1. `open <url>` → navigate
|
|
107
|
+
2. `snapshot -i` → get element refs (`@e1`, `@e2`, ...)
|
|
108
|
+
3. `fill` / `click` / `select` → interact using refs
|
|
109
|
+
4. Re-`snapshot` after any page change (refs are invalidated)
|
|
230
110
|
|
|
231
|
-
###
|
|
111
|
+
### Session Isolation
|
|
232
112
|
|
|
233
|
-
|
|
234
|
-
# Login once and save state
|
|
235
|
-
agent-browser open https://app.example.com/login
|
|
236
|
-
agent-browser snapshot -i
|
|
237
|
-
agent-browser fill @e1 "$USERNAME"
|
|
238
|
-
agent-browser fill @e2 "$PASSWORD"
|
|
239
|
-
agent-browser click @e3
|
|
240
|
-
agent-browser wait --url "**/dashboard"
|
|
241
|
-
agent-browser state save auth.json
|
|
242
|
-
|
|
243
|
-
# Reuse in future sessions
|
|
244
|
-
agent-browser --state auth.json open https://app.example.com/dashboard
|
|
245
|
-
```
|
|
113
|
+
Refs live within a **session scope** (default: `default`). Multiple Bash processes sharing the same session share the same refs and browser state — one process navigating away invalidates another's refs.
|
|
246
114
|
|
|
247
|
-
|
|
115
|
+
**When running parallel tasks**, assign each a unique session:
|
|
248
116
|
|
|
249
117
|
```bash
|
|
250
|
-
agent-browser open https://
|
|
251
|
-
agent-browser
|
|
252
|
-
agent-browser get text @e5 # Specific element
|
|
253
|
-
agent-browser get text body > page.txt # All page text
|
|
254
|
-
agent-browser snapshot -i --json # JSON for parsing
|
|
255
|
-
agent-browser get text @e1 --json # Element as JSON
|
|
118
|
+
agent-browser --session task1 open https://site-a.com
|
|
119
|
+
agent-browser --session task2 open https://site-b.com
|
|
256
120
|
```
|
|
257
121
|
|
|
258
|
-
|
|
122
|
+
If ref errors occur unexpectedly, check whether another process is operating on the same session with `agent-browser session list`.
|
|
259
123
|
|
|
260
|
-
|
|
124
|
+
### Refs
|
|
261
125
|
|
|
262
|
-
|
|
263
|
-
agent-browser open "about:blank"
|
|
264
|
-
(agent-browser wait --request "api/users" --timeout 30000 > response.json) &
|
|
265
|
-
sleep 1
|
|
266
|
-
agent-browser open "https://example.com/user/profile"
|
|
267
|
-
wait $!
|
|
268
|
-
jq '.body' response.json
|
|
269
|
-
```
|
|
126
|
+
Refs (`@e1`, `@e2`) are **session-scoped** — valid across Bash processes within the same session, but invalidated by any page change (navigation, form submit, dynamic load). Always re-snapshot after DOM mutations. See [snapshot-refs.md](references/snapshot-refs.md).
|
|
270
127
|
|
|
271
|
-
|
|
128
|
+
When multiple elements share the same role+name, each gets a unique ref with a `[nth=N]` annotation. Just use the ref — the nth index is built in:
|
|
272
129
|
|
|
273
|
-
```bash
|
|
274
|
-
agent-browser network requests --filter "**/api/**"
|
|
275
|
-
agent-browser network route "**/api/users" --body '{"users": []}'
|
|
276
|
-
agent-browser network route "**/ads/**" --abort
|
|
277
|
-
agent-browser network unroute "**/api/users"
|
|
278
130
|
```
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
```bash
|
|
283
|
-
agent-browser --session site1 open https://site-a.com
|
|
284
|
-
agent-browser --session site2 open https://site-b.com
|
|
285
|
-
agent-browser --session site1 snapshot -i
|
|
286
|
-
agent-browser session list
|
|
131
|
+
- button "Submit" [ref=e1]
|
|
132
|
+
- button "Submit" [ref=e5] [nth=1] # Use @e5, no need to specify nth
|
|
287
133
|
```
|
|
288
134
|
|
|
289
|
-
###
|
|
135
|
+
### Iframes
|
|
290
136
|
|
|
291
137
|
```bash
|
|
292
|
-
agent-browser --
|
|
293
|
-
agent-browser --
|
|
294
|
-
agent-browser screenshot output.png
|
|
138
|
+
agent-browser snapshot --in-frame "#my-iframe" # Single iframe
|
|
139
|
+
agent-browser click @e1 --in-frame "#outer/inner" # Nested
|
|
295
140
|
```
|
|
296
141
|
|
|
297
|
-
###
|
|
298
|
-
|
|
299
|
-
Use `--in-frame` to operate inside iframes:
|
|
300
|
-
|
|
301
|
-
```bash
|
|
302
|
-
agent-browser snapshot --in-frame "#my-iframe"
|
|
303
|
-
agent-browser snapshot --in-frame "#outer/inner" # Nested path
|
|
304
|
-
agent-browser click @e1 --in-frame "#container/frame"
|
|
305
|
-
agent-browser fill #user "admin" --in-frame "#container/login-frame"
|
|
306
|
-
```
|
|
307
|
-
|
|
308
|
-
Frame path syntax: `#id-or-name`, `#index` (position), `#parent/child` (nested).
|
|
309
|
-
|
|
310
|
-
### Semantic Locators (Alternative to Refs)
|
|
311
|
-
|
|
312
|
-
When refs are unavailable, use semantic locators:
|
|
142
|
+
### Semantic Locators (No Refs Needed)
|
|
313
143
|
|
|
314
144
|
```bash
|
|
315
145
|
agent-browser find text "Sign In" click
|
|
316
146
|
agent-browser find label "Email" fill "user@test.com"
|
|
317
147
|
agent-browser find role button click --name "Submit"
|
|
318
|
-
agent-browser find placeholder "Search" type "query"
|
|
319
|
-
agent-browser find testid "submit-btn" click
|
|
320
|
-
```
|
|
321
|
-
|
|
322
|
-
### Proxy Configuration
|
|
323
|
-
|
|
324
|
-
```bash
|
|
325
|
-
agent-browser --proxy http://proxy:8080 open https://example.com
|
|
326
|
-
agent-browser --proxy socks5://proxy:1080 open https://example.com
|
|
327
|
-
agent-browser --proxy http://user:pass@proxy:8080 --proxy-bypass "localhost,*.internal" open https://example.com
|
|
328
|
-
```
|
|
329
|
-
|
|
330
|
-
## Advanced Features
|
|
331
|
-
|
|
332
|
-
### Recording & Replaying Workflows
|
|
333
|
-
|
|
334
|
-
For test automation and workflow capture:
|
|
335
|
-
|
|
336
|
-
```bash
|
|
337
|
-
agent-browser recorder start --session my-test
|
|
338
|
-
agent-browser open https://example.com/form
|
|
339
|
-
agent-browser snapshot -i
|
|
340
|
-
agent-browser fill @e1 "user@example.com"
|
|
341
|
-
agent-browser click @e3
|
|
342
|
-
agent-browser recorder stop --output test-workflow.yaml
|
|
343
|
-
agent-browser recorder replay test-workflow.yaml
|
|
344
|
-
```
|
|
345
|
-
|
|
346
|
-
See [recorder.md](references/recorder.md) for details.
|
|
347
|
-
|
|
348
|
-
### Human-like Mouse Movement
|
|
349
|
-
|
|
350
|
-
Simulate natural mouse trajectories via environment variable:
|
|
351
|
-
|
|
352
|
-
```bash
|
|
353
|
-
export AGENT_BROWSER_HUMAN=1 # Enable (default: arc path)
|
|
354
|
-
export AGENT_BROWSER_HUMAN=bezier # Bezier curve with overshoot
|
|
355
|
-
export AGENT_BROWSER_HUMAN=random # Random path with jitter
|
|
356
|
-
export AGENT_BROWSER_HUMAN=linear # Straight line (fastest)
|
|
357
|
-
|
|
358
|
-
agent-browser click @e1 # Uses human trajectory
|
|
359
|
-
agent-browser wait 3000 # Mouse wandering while waiting
|
|
360
|
-
unset AGENT_BROWSER_HUMAN # Disable
|
|
361
|
-
```
|
|
362
|
-
|
|
363
|
-
Features: continuous position tracking, acceleration curves, 4 trajectory types, auto-wandering on wait.
|
|
364
|
-
|
|
365
|
-
### Viewer / Streaming Mode
|
|
366
|
-
|
|
367
|
-
Real-time remote browser visualization with frame streaming over WebSocket.
|
|
368
|
-
|
|
369
|
-
```bash
|
|
370
|
-
# Start viewer after opening a page
|
|
371
|
-
agent-browser open https://example.com
|
|
372
|
-
agent-browser viewer # Opens viewer URL in browser
|
|
373
|
-
agent-browser viewer --json # Get connection details as JSON
|
|
374
|
-
```
|
|
375
|
-
|
|
376
|
-
**Architecture:** Browser -> Daemon (IPC) -> Standalone Server (:5005) -> Viewer (WebSocket)
|
|
377
|
-
|
|
378
|
-
**Element Crop Mode:** Stream can be cropped to a specific DOM element's bounds. Coordinates auto-map to element-local space.
|
|
379
|
-
|
|
380
|
-
See [viewer-mode.md](references/viewer-mode.md) for architecture details, troubleshooting, and element mode.
|
|
381
|
-
|
|
382
|
-
### Mobile Remote Control (Touch Devices)
|
|
383
|
-
|
|
384
|
-
When viewer is opened on a phone/tablet, it automatically enters **mobile mode** with touch-optimized UI:
|
|
385
|
-
|
|
386
|
-
- **Touchpad**: Bottom-area gesture surface (tap=click, drag=move cursor, long-press=drag, 2-finger=scroll)
|
|
387
|
-
- **Input Panel**: Tap remote input field -> local text input appears -> syncs to remote via `input_fill`
|
|
388
|
-
- **Virtual Keyboard Toolbar**: Tab, Arrows, Enter, Backspace, Escape
|
|
389
|
-
- **IME Support**: Chinese/Japanese composition (pinyin etc.) — intermediate input NOT sent to remote
|
|
390
|
-
- **DeviceMode**: Auto-detects device type, switches UI dynamically on resize/orientationchange/matchMedia
|
|
391
|
-
|
|
392
|
-
See [mobile-viewer.md](references/mobile-viewer.md) for touchpad gestures, input panel flow, DeviceMode architecture.
|
|
393
|
-
|
|
394
|
-
### iOS Simulator (Appium)
|
|
395
|
-
|
|
396
|
-
Native iOS automation via Xcode + Appium:
|
|
397
|
-
|
|
398
|
-
```bash
|
|
399
|
-
agent-browser device list # List simulators
|
|
400
|
-
agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
|
|
401
|
-
agent-browser -p ios snapshot -i && agent-browser -p ios click @e1
|
|
402
|
-
agent-browser -p ios close # Shuts down simulator
|
|
403
|
-
```
|
|
404
|
-
|
|
405
|
-
Requires: macOS + Xcode + `npm install -g appium && appium driver install xcuitest`.
|
|
406
|
-
|
|
407
|
-
Note: Mobile viewer mode (above) works on ANY phone browser via web viewer — no simulator needed.
|
|
408
|
-
|
|
409
|
-
### Cloud Browser Providers
|
|
410
|
-
|
|
411
|
-
Connect to managed browser services:
|
|
412
|
-
|
|
413
|
-
```bash
|
|
414
|
-
BROWSERBASE_API_KEY=key agent-browser --provider browserbase open https://example.com
|
|
415
|
-
KERNEL_API_KEY=key agent-browser --provider kernel open https://example.com
|
|
416
|
-
BROWSERUSE_API_KEY=key agent-browser --provider browseruse open https://example.com
|
|
417
|
-
```
|
|
418
|
-
|
|
419
|
-
Useful for: geo-distributed testing, IP diversity, team sharing, parallel scaling.
|
|
420
|
-
|
|
421
|
-
## Ref Lifecycle (Important)
|
|
422
|
-
|
|
423
|
-
Refs (`@e1`, `@e2`) are invalidated when the page changes. Always re-snapshot after navigation, form submission, or dynamic content loading:
|
|
424
|
-
|
|
425
|
-
```bash
|
|
426
|
-
agent-browser click @e5 # Navigates to new page
|
|
427
|
-
agent-browser snapshot -i # MUST re-snapshot
|
|
428
|
-
agent-browser click @e1 # Use new refs
|
|
429
148
|
```
|
|
430
149
|
|
|
431
|
-
|
|
150
|
+
## Key Flags
|
|
151
|
+
|
|
152
|
+
- `--session <name>` — Isolated session (or AGENT_BROWSER_SESSION env)
|
|
153
|
+
- `--profile <path>` — Persistent browser profile
|
|
154
|
+
- `--state <path>` — Load storage state from JSON file
|
|
155
|
+
- `--headers <json>` — HTTP headers scoped to URL's origin
|
|
156
|
+
- `--executable-path <path>` — Custom browser executable
|
|
157
|
+
- `--extension <path>` — Load browser extensions (repeatable)
|
|
158
|
+
- `--args <args>` — Browser launch args
|
|
159
|
+
- `--user-agent <ua>` — Custom User-Agent
|
|
160
|
+
- `--proxy <server>` — Proxy server URL
|
|
161
|
+
- `--proxy-bypass <hosts>` — Bypass proxy for these hosts
|
|
162
|
+
- `--ignore-https-errors` — Ignore HTTPS certificate errors
|
|
163
|
+
- `--allow-file-access` — Allow file:// URLs to access local files
|
|
164
|
+
- `-p, --provider <name>` — Browser provider: ios, browserbase, kernel
|
|
165
|
+
- `--device <name>` — iOS device name
|
|
166
|
+
- `--json` — JSON output
|
|
167
|
+
- `--full, -f` — Full page screenshot
|
|
168
|
+
- `--headed` — Show browser window
|
|
169
|
+
- `--cdp <port>` — Connect via CDP
|
|
170
|
+
- `--debug` — Debug output
|
|
171
|
+
- `--version, -V` — Show version
|
|
172
|
+
- `--help, -h` — Show this help
|
|
173
|
+
|
|
174
|
+
## Environment Variables
|
|
175
|
+
|
|
176
|
+
- `AGENT_BROWSER_SESSION` — Session name (default: "default")
|
|
177
|
+
- `AGENT_BROWSER_EXECUTABLE_PATH` — Custom browser executable path
|
|
178
|
+
- `AGENT_BROWSER_PROVIDER` — Browser provider (ios, browserbase, kernel, browseruse)
|
|
179
|
+
- `AGENT_BROWSER_IOS_DEVICE` — Default iOS device name
|
|
180
|
+
- `AGENT_BROWSER_IOS_UDID` — iOS device UDID
|
|
181
|
+
- `AGENT_BROWSER_STREAM_PORT` — Stream Server port (default: 5005)
|
|
182
|
+
- `AGENT_BROWSER_SOCKET_DIR` — Custom socket directory
|
|
183
|
+
- `AGENT_BROWSER_HOME` — Installation directory
|
|
184
|
+
- `AGENT_BROWSER_PROFILE` — Persistent browser profile path
|
|
185
|
+
- `AGENT_BROWSER_STATE` — Storage state JSON file path
|
|
186
|
+
- `AGENT_BROWSER_EXTENSIONS` — Browser extensions (comma-separated)
|
|
187
|
+
- `AGENT_BROWSER_ARGS` — Browser launch args
|
|
188
|
+
- `AGENT_BROWSER_USER_AGENT` — Custom User-Agent
|
|
189
|
+
- `AGENT_BROWSER_PROXY` — Proxy server URL
|
|
190
|
+
- `AGENT_BROWSER_PROXY_BYPASS` — Proxy bypass hosts
|
|
191
|
+
- `AGENT_BROWSER_IGNORE_HTTPS_ERRORS` — Set to "1" to ignore HTTPS errors
|
|
192
|
+
- `AGENT_BROWSER_ALLOW_FILE_ACCESS` — Set to "1" to allow file:// access
|
|
193
|
+
- `AGENT_BROWSER_HEADED` — Set to "1" for headed mode
|
|
194
|
+
- `AGENT_BROWSER_HUMAN` — Enable human-like mouse movement (1, bezier, arc, random, linear)
|
|
195
|
+
- `MESSAGE_BRIDGE_URL` — Message Bridge URL for 'ask' command
|
|
196
|
+
- `HTTP_PROXY` / `HTTPS_PROXY` — Proxy for Message Bridge requests
|
|
432
197
|
|
|
433
198
|
## Reference Docs
|
|
434
199
|
|
|
435
|
-
|
|
|
436
|
-
|
|
437
|
-
| [
|
|
438
|
-
| [
|
|
439
|
-
| [
|
|
440
|
-
| [
|
|
441
|
-
| [
|
|
442
|
-
| [
|
|
443
|
-
| [
|
|
444
|
-
| [
|
|
445
|
-
| [
|
|
446
|
-
| [
|
|
447
|
-
| [
|
|
200
|
+
| Doc | Content |
|
|
201
|
+
|-----|---------|
|
|
202
|
+
| [Complete Command Reference](references/commands.md) | All commands with options and examples |
|
|
203
|
+
| [Snapshot & Refs](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, shell scripts |
|
|
204
|
+
| [Data Extraction](references/data-extraction.md) | DOM scraping, JS eval, API interception, infinite scroll |
|
|
205
|
+
| [Session & State](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
|
|
206
|
+
| [Authentication](references/authentication.md) | Login flows, OAuth, 2FA, state reuse |
|
|
207
|
+
| [Network Control](references/network-monitoring.md) | Request monitoring, API mocking, URL blocking |
|
|
208
|
+
| [Recording & Replay](references/recorder.md) | Step recorder, video recording, trace |
|
|
209
|
+
| [Proxy Config](references/proxy-support.md) | HTTP/SOCKS5 proxy, geo-testing, rotating proxies |
|
|
210
|
+
| [Viewer / Streaming](references/viewer-mode.md) | Frame streaming, element crop, architecture |
|
|
211
|
+
| [Mobile Remote Control](references/mobile-viewer.md) | Touchpad, input panel, IME/CJK, DeviceMode |
|
|
212
|
+
| [Video Recording](references/video-recording.md) | WebM video capture for debugging |
|
|
File without changes
|