@dyyz1993/agent-browser 0.9.2 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/utils/parseCli.d.ts +1 -0
- package/dist/__tests__/utils/parseCli.d.ts.map +1 -1
- package/dist/__tests__/utils/parseCli.js +18 -10
- package/dist/__tests__/utils/parseCli.js.map +1 -1
- package/dist/actions.d.ts.map +1 -1
- package/dist/actions.js +63 -3
- package/dist/actions.js.map +1 -1
- package/dist/browser.d.ts +46 -2
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +343 -13
- package/dist/browser.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +8 -3
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/connection.d.ts.map +1 -1
- package/dist/cli/connection.js +39 -1
- package/dist/cli/connection.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +27 -20
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/output.d.ts.map +1 -1
- package/dist/cli/output.js +5 -0
- package/dist/cli/output.js.map +1 -1
- package/dist/cli.js +20 -0
- package/dist/cli.js.map +1 -1
- package/dist/daemon.d.ts.map +1 -1
- package/dist/daemon.js +147 -1
- package/dist/daemon.js.map +1 -1
- package/dist/message-bridge.d.ts.map +1 -1
- package/dist/message-bridge.js +22 -4
- package/dist/message-bridge.js.map +1 -1
- package/dist/openapi.d.ts +22 -0
- package/dist/openapi.d.ts.map +1 -0
- package/dist/openapi.js +382 -0
- package/dist/openapi.js.map +1 -0
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +18 -0
- package/dist/protocol.js.map +1 -1
- package/dist/recorder/inject.js +61 -134
- package/dist/stream-server-standalone.d.ts +10 -0
- package/dist/stream-server-standalone.d.ts.map +1 -1
- package/dist/stream-server-standalone.js +594 -74
- package/dist/stream-server-standalone.js.map +1 -1
- package/dist/stream-server.d.ts +67 -2
- package/dist/stream-server.d.ts.map +1 -1
- package/dist/stream-server.js +371 -51
- package/dist/stream-server.js.map +1 -1
- package/dist/swagger-ui.d.ts +6 -0
- package/dist/swagger-ui.d.ts.map +1 -0
- package/dist/swagger-ui.js +51 -0
- package/dist/swagger-ui.js.map +1 -0
- package/dist/test-live.d.ts +2 -0
- package/dist/test-live.d.ts.map +1 -0
- package/dist/test-live.js +333 -0
- package/dist/test-live.js.map +1 -0
- package/dist/types.d.ts +7 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/viewer-html.d.ts.map +1 -1
- package/dist/viewer-html.js +270 -58
- package/dist/viewer-html.js.map +1 -1
- package/dist/viewer-script.d.ts +20 -2
- package/dist/viewer-script.d.ts.map +1 -1
- package/dist/viewer-script.js +911 -154
- package/dist/viewer-script.js.map +1 -1
- package/package.json +1 -1
- package/scripts/postinstall.js +6 -32
- package/scripts/test-cli-help.sh +51 -0
- package/scripts/verify-form.sh +67 -0
- package/scripts/verify-login.sh +65 -0
- package/scripts/verify-recording.sh +80 -0
- package/scripts/verify-upload.sh +41 -0
- package/skills/agent-browser/SKILL.md +297 -160
- package/skills/agent-browser/references/commands.md +3 -0
- package/skills/agent-browser/references/mobile-viewer.md +188 -0
- package/skills/agent-browser/references/network-monitoring.md +232 -0
- package/skills/agent-browser/references/recorder.md +319 -0
- package/skills/agent-browser/references/viewer-mode.md +148 -0
- package/skills/agent-browser/templates/api-interception.sh +3 -1
- package/skills/agent-browser/templates/data-extraction.sh +8 -4
- package/skills/agent-browser/templates/form-automation.sh +18 -23
- package/skills/agent-browser/templates/network-intercept-crawl.sh +256 -0
- package/skills/agent-browser/templates/recorder-workflow.sh +51 -0
- package/skills/agent-browser/templates/viewer-remote.sh +41 -0
- package/dist/__tests__/test-iframe.d.ts +0 -2
- package/dist/__tests__/test-iframe.d.ts.map +0 -1
- package/dist/__tests__/test-iframe.js +0 -52
- package/dist/__tests__/test-iframe.js.map +0 -1
- package/dist/cli-new.d.ts +0 -3
- package/dist/cli-new.d.ts.map +0 -1
- package/dist/cli-new.js +0 -308
- package/dist/cli-new.js.map +0 -1
- package/dist/cli-old.d.ts +0 -3
- package/dist/cli-old.d.ts.map +0 -1
- package/dist/cli-old.js +0 -1101
- package/dist/cli-old.js.map +0 -1
- package/dist/recorder/binding.d.ts +0 -24
- package/dist/recorder/binding.d.ts.map +0 -1
- package/dist/recorder/binding.js +0 -215
- package/dist/recorder/binding.js.map +0 -1
- package/dist/recorder/index.d.ts +0 -4
- package/dist/recorder/index.d.ts.map +0 -1
- package/dist/recorder/index.js +0 -4
- package/dist/recorder/index.js.map +0 -1
- package/dist/recorder/recorder.d.ts +0 -19
- package/dist/recorder/recorder.d.ts.map +0 -1
- package/dist/recorder/recorder.js +0 -101
- package/dist/recorder/recorder.js.map +0 -1
- package/dist/recorder/store.d.ts +0 -22
- package/dist/recorder/store.d.ts.map +0 -1
- package/dist/recorder/store.js +0 -150
- package/dist/recorder/store.js.map +0 -1
- package/dist/recorder/types.d.ts +0 -73
- package/dist/recorder/types.d.ts.map +0 -1
- package/dist/recorder/types.js +0 -5
- package/dist/recorder/types.js.map +0 -1
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: agent-browser
|
|
3
|
-
description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", or any task requiring programmatic web interaction.
|
|
3
|
+
description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, viewer/streaming mode, mobile remote control, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", "view remote browser", "mobile browsing", or any task requiring programmatic web interaction.
|
|
4
4
|
allowed-tools: Bash(agent-browser:*)
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
# Browser Automation with agent-browser
|
|
8
8
|
|
|
9
|
-
##
|
|
9
|
+
## Quick Start
|
|
10
10
|
|
|
11
11
|
Every browser automation follows this pattern:
|
|
12
12
|
|
|
@@ -27,112 +27,191 @@ agent-browser wait --load networkidle
|
|
|
27
27
|
agent-browser snapshot -i # Check result
|
|
28
28
|
```
|
|
29
29
|
|
|
30
|
-
##
|
|
30
|
+
## Essential Commands
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
### Navigation
|
|
33
33
|
|
|
34
34
|
```bash
|
|
35
|
-
|
|
36
|
-
agent-browser
|
|
35
|
+
agent-browser open <url> # Navigate (aliases: goto, navigate)
|
|
36
|
+
agent-browser back # Go back
|
|
37
|
+
agent-browser forward # Go forward
|
|
38
|
+
agent-browser reload # Reload page
|
|
39
|
+
agent-browser close # Close browser (alias: quit, exit)
|
|
40
|
+
```
|
|
37
41
|
|
|
38
|
-
|
|
39
|
-
agent-browser snapshot --in-frame "#outer-frame/inner-frame"
|
|
42
|
+
### Element Interaction
|
|
40
43
|
|
|
41
|
-
|
|
42
|
-
agent-browser
|
|
43
|
-
agent-browser
|
|
44
|
-
agent-browser
|
|
45
|
-
agent-browser
|
|
46
|
-
agent-browser
|
|
44
|
+
```bash
|
|
45
|
+
agent-browser click @e1 # Click element
|
|
46
|
+
agent-browser dblclick @e1 # Double-click
|
|
47
|
+
agent-browser fill @e2 "text" # Clear and type text
|
|
48
|
+
agent-browser type @e2 "text" # Type without clearing
|
|
49
|
+
agent-browser select @e1 "option" # Select dropdown option
|
|
50
|
+
agent-browser check @e1 # Check checkbox
|
|
51
|
+
agent-browser uncheck @e1 # Uncheck checkbox
|
|
52
|
+
agent-browser press Enter # Press key (alias: key)
|
|
53
|
+
agent-browser keydown / keyup # Raw key down / up
|
|
54
|
+
agent-browser hover @e1 # Hover over element
|
|
55
|
+
agent-browser focus @e1 # Focus element
|
|
56
|
+
agent-browser drag @e1 @e2 # Drag from e1 to e2
|
|
57
|
+
agent-browser upload @e1 "/path" # Upload file
|
|
58
|
+
agent-browser download @e1 "/path" # Download resource
|
|
47
59
|
```
|
|
48
60
|
|
|
49
|
-
###
|
|
61
|
+
### Scrolling
|
|
50
62
|
|
|
51
|
-
|
|
52
|
-
-
|
|
53
|
-
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
Examples:
|
|
57
|
-
- `#my-iframe` - Single iframe
|
|
58
|
-
- `#0` - First iframe
|
|
59
|
-
- `#outer-iframe/login-frame` - Nested iframes by name
|
|
60
|
-
- `#0/1` - First iframe's second child
|
|
63
|
+
```bash
|
|
64
|
+
agent-browser scroll down 500 # Scroll pixels
|
|
65
|
+
agent-browser scrollintoview @e1 # Scroll element into view
|
|
66
|
+
```
|
|
61
67
|
|
|
62
|
-
|
|
68
|
+
### Snapshot & Inspection
|
|
63
69
|
|
|
64
70
|
```bash
|
|
65
|
-
# Navigation
|
|
66
|
-
agent-browser open <url> # Navigate (aliases: goto, navigate)
|
|
67
|
-
agent-browser close # Close browser
|
|
68
|
-
|
|
69
|
-
# Snapshot
|
|
70
71
|
agent-browser snapshot -i # Interactive elements with refs (recommended)
|
|
71
|
-
agent-browser snapshot -i -C # Include cursor-interactive elements
|
|
72
|
+
agent-browser snapshot -i -C # Include cursor-interactive elements
|
|
72
73
|
agent-browser snapshot -s "#selector" # Scope to CSS selector
|
|
73
74
|
agent-browser snapshot -s "body" --path # Include xpath and cssPath in refs
|
|
74
75
|
agent-browser snapshot -s "body" --attrs # Include element attributes in refs
|
|
76
|
+
agent-browser snapshot -i --json # JSON output for parsing
|
|
77
|
+
```
|
|
75
78
|
|
|
76
|
-
|
|
77
|
-
agent-browser click @e1 # Click element
|
|
78
|
-
agent-browser fill @e2 "text" # Clear and type text
|
|
79
|
-
agent-browser type @e2 "text" # Type without clearing
|
|
80
|
-
agent-browser select @e1 "option" # Select dropdown option
|
|
81
|
-
agent-browser check @e1 # Check checkbox
|
|
82
|
-
agent-browser press Enter # Press key
|
|
83
|
-
agent-browser scroll down 500 # Scroll page
|
|
79
|
+
### Getting Information
|
|
84
80
|
|
|
85
|
-
|
|
86
|
-
agent-browser get text @e1 # Get element text
|
|
81
|
+
```bash
|
|
82
|
+
agent-browser get text @e1 # Get element text content
|
|
87
83
|
agent-browser get url # Get current URL
|
|
88
84
|
agent-browser get title # Get page title
|
|
85
|
+
agent-browser get count ".item" # Count matching elements
|
|
86
|
+
agent-browser get box @e1 # Bounding box {x,y,width,height}
|
|
87
|
+
agent-browser get styles @e1 # Computed styles
|
|
88
|
+
agent-browser is visible @e1 # Visibility check
|
|
89
|
+
agent-browser is enabled @e1 # Enabled check
|
|
90
|
+
agent-browser is checked @e1 # Checked state
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### Waiting
|
|
89
94
|
|
|
90
|
-
|
|
91
|
-
agent-browser wait @e1 # Wait for element
|
|
95
|
+
```bash
|
|
96
|
+
agent-browser wait @e1 # Wait for element to appear
|
|
92
97
|
agent-browser wait --load networkidle # Wait for network idle
|
|
93
|
-
agent-browser wait --
|
|
94
|
-
agent-browser wait
|
|
98
|
+
agent-browser wait --load domcontentloaded # Wait for DOM ready
|
|
99
|
+
agent-browser wait --url "**/page" # Wait for URL pattern match
|
|
100
|
+
agent-browser wait --text "Hello" # Wait for text on page
|
|
101
|
+
agent-browser wait --fn "document.hidden === false" # Wait for JS expression
|
|
102
|
+
agent-browser wait --download # Wait for download to complete
|
|
103
|
+
agent-browser wait 2000 # Wait milliseconds (fixed delay)
|
|
104
|
+
agent-browser wait --request "api/data" # Wait for specific network request (background listener)
|
|
105
|
+
```
|
|
95
106
|
|
|
96
|
-
|
|
107
|
+
### Capture
|
|
108
|
+
|
|
109
|
+
```bash
|
|
97
110
|
agent-browser screenshot # Screenshot to temp dir
|
|
98
111
|
agent-browser screenshot --full # Full page screenshot
|
|
112
|
+
agent-browser screenshot output.png # Save to file
|
|
99
113
|
agent-browser pdf output.pdf # Save as PDF
|
|
100
114
|
```
|
|
101
115
|
|
|
102
|
-
|
|
116
|
+
### Network Monitoring
|
|
103
117
|
|
|
104
|
-
|
|
118
|
+
```bash
|
|
119
|
+
agent-browser network requests # View all network requests
|
|
120
|
+
agent-browser network requests --filter "**/api/**" # Filter by URL pattern
|
|
121
|
+
agent-browser network requests --clear # Clear request history
|
|
122
|
+
agent-browser network requests --capture-response # Capture response bodies
|
|
123
|
+
agent-browser network requests --capture-response --type json # Filter captured by content type
|
|
124
|
+
agent-browser network requests --output ./captures/ # Save captures to directory
|
|
125
|
+
agent-browser network route "**/api/**" --abort # Block requests
|
|
126
|
+
agent-browser network route "**/api/**" --body '{"users": []}' # Mock response
|
|
127
|
+
agent-browser network route "**/api/**" --status 404 # Mock status code
|
|
128
|
+
agent-browser network unroute "**/api/**" # Remove route
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
See [network-monitoring.md](references/network-monitoring.md) for advanced patterns.
|
|
132
|
+
|
|
133
|
+
### Tabs & Windows
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
agent-browser tab list # List all tabs
|
|
137
|
+
agent-browser tab new # Open new tab
|
|
138
|
+
agent-browser tab close 2 # Close tab by index
|
|
139
|
+
agent-browser tab switch 0 # Switch to tab
|
|
140
|
+
agent-browser window new # Open new window
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Dialogs & Alerts
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
agent-browser dialog accept # Accept alert/dialog
|
|
147
|
+
agent-browser dialog dismiss # Dismiss alert/dialog
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Browser State
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
agent-browser state save auth.json # Save cookies/localStorage/session
|
|
154
|
+
agent-browser state clear # Clear all state
|
|
155
|
+
agent-browser storage session dump # Dump session storage
|
|
156
|
+
agent-browser storage session load # Load session storage
|
|
157
|
+
agent-browser cookies set name value domain # Set cookie
|
|
158
|
+
agent-browser cookies export # Export all cookies
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Debugging
|
|
162
|
+
|
|
163
|
+
```bash
|
|
164
|
+
agent-browser console "1+1" # Evaluate JS in browser console
|
|
165
|
+
agent-browser errors # Show recent page errors
|
|
166
|
+
agent-browser highlight @e1 # Highlight element on page
|
|
167
|
+
agent-browser trace start # Start Chrome trace
|
|
168
|
+
agent-browser trace stop ./trace.json # Stop and save trace
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Session Management
|
|
105
172
|
|
|
106
173
|
```bash
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
#
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
export AGENT_BROWSER_HUMAN=linear # Straight line (fastest)
|
|
115
|
-
|
|
116
|
-
# All interactions will use human-like movement
|
|
117
|
-
agent-browser click @e1
|
|
118
|
-
agent-browser fill @e1 "text"
|
|
119
|
-
agent-browser type @e1 "text"
|
|
120
|
-
agent-browser hover @e1
|
|
121
|
-
agent-browser dblclick @e1
|
|
122
|
-
|
|
123
|
-
# Wait with mouse wandering (when human mode enabled)
|
|
124
|
-
agent-browser wait 3000 # Wanders mouse while waiting
|
|
125
|
-
|
|
126
|
-
# Disable human mode
|
|
127
|
-
unset AGENT_BROWSER_HUMAN
|
|
174
|
+
agent-browser --session site1 open https://a.com # Named session
|
|
175
|
+
agent-browser --session site2 open https://b.com # Parallel session
|
|
176
|
+
agent-browser session list # List active sessions
|
|
177
|
+
agent-browser connect ws://localhost:9222 # Connect to remote CDP browser
|
|
178
|
+
agent-browser kill # Kill daemon process
|
|
179
|
+
agent-browser config # Show/edit config
|
|
180
|
+
agent-browser config [--json] # Config as JSON
|
|
128
181
|
```
|
|
129
182
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
183
|
+
## Global Options
|
|
184
|
+
|
|
185
|
+
These flags work with most commands:
|
|
186
|
+
|
|
187
|
+
| Flag | Description |
|
|
188
|
+
| -------------------------- | ---------------------------------------------- |
|
|
189
|
+
| `--session <name>` | Named browser session |
|
|
190
|
+
| `--json` | JSON output format |
|
|
191
|
+
| `--headed` | Show visible browser window |
|
|
192
|
+
| `--cdp <url>` | Connect via Chrome DevTools Protocol directly |
|
|
193
|
+
| `-p/--provider` | Provider: ios, browserbase, kernel, browseruse |
|
|
194
|
+
| `--proxy <url>` | HTTP/SOCKS5 proxy |
|
|
195
|
+
| `--proxy-bypass <rules>` | Proxy bypass rules |
|
|
196
|
+
| `--headers 'K: V'` | Extra HTTP headers per request |
|
|
197
|
+
| `--state <path>` | Restore browser state from file |
|
|
198
|
+
| `--profile <path>` | Chrome profile directory |
|
|
199
|
+
| `--args "<args>"` | Extra Chromium launch arguments |
|
|
200
|
+
| `--user-agent <ua>` | Custom User-Agent string |
|
|
201
|
+
| `--executable-path <path>` | Browser binary path |
|
|
202
|
+
| `--extension <path>` | Load .crx Chrome extension |
|
|
203
|
+
| `--ignore-https-errors` | Ignore HTTPS certificate errors |
|
|
204
|
+
| `--allow-file-access` | Allow file:// URLs |
|
|
205
|
+
| `--timeout <ms>` | Global operation timeout |
|
|
206
|
+
| `--debug` | Verbose debug logging |
|
|
207
|
+
|
|
208
|
+
Examples:
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
agent-browser --proxy http://proxy:8080 open https://example.com
|
|
212
|
+
agent-browser --headed --debug open https://example.com
|
|
213
|
+
agent-browser --user-agent "MyBot/1.0" open https://example.com
|
|
214
|
+
```
|
|
136
215
|
|
|
137
216
|
## Common Patterns
|
|
138
217
|
|
|
@@ -161,7 +240,7 @@ agent-browser click @e3
|
|
|
161
240
|
agent-browser wait --url "**/dashboard"
|
|
162
241
|
agent-browser state save auth.json
|
|
163
242
|
|
|
164
|
-
# Reuse in future sessions
|
|
243
|
+
# Reuse in future sessions
|
|
165
244
|
agent-browser --state auth.json open https://app.example.com/dashboard
|
|
166
245
|
```
|
|
167
246
|
|
|
@@ -170,46 +249,32 @@ agent-browser --state auth.json open https://app.example.com/dashboard
|
|
|
170
249
|
```bash
|
|
171
250
|
agent-browser open https://example.com/products
|
|
172
251
|
agent-browser snapshot -i
|
|
173
|
-
agent-browser get text @e5 #
|
|
174
|
-
agent-browser get text body > page.txt #
|
|
175
|
-
|
|
176
|
-
#
|
|
177
|
-
agent-browser snapshot -i --json
|
|
178
|
-
agent-browser get text @e1 --json
|
|
252
|
+
agent-browser get text @e5 # Specific element
|
|
253
|
+
agent-browser get text body > page.txt # All page text
|
|
254
|
+
agent-browser snapshot -i --json # JSON for parsing
|
|
255
|
+
agent-browser get text @e1 --json # Element as JSON
|
|
179
256
|
```
|
|
180
257
|
|
|
181
|
-
### API Interception
|
|
258
|
+
### API Interception (Passive Capture)
|
|
182
259
|
|
|
183
|
-
|
|
260
|
+
Capture API responses without making direct requests:
|
|
184
261
|
|
|
185
262
|
```bash
|
|
186
|
-
# 1. Open blank page first
|
|
187
263
|
agent-browser open "about:blank"
|
|
188
|
-
|
|
189
|
-
# 2. Start request listener in background
|
|
190
264
|
(agent-browser wait --request "api/users" --timeout 30000 > response.json) &
|
|
191
|
-
WAIT_PID=$!
|
|
192
265
|
sleep 1
|
|
193
|
-
|
|
194
|
-
# 3. Navigate to trigger the API call
|
|
195
266
|
agent-browser open "https://example.com/user/profile"
|
|
196
|
-
|
|
197
|
-
# 4. Wait for response
|
|
198
|
-
wait $WAIT_PID
|
|
199
|
-
|
|
200
|
-
# 5. Process captured data
|
|
267
|
+
wait $!
|
|
201
268
|
jq '.body' response.json
|
|
202
269
|
```
|
|
203
270
|
|
|
204
|
-
|
|
271
|
+
### Network Monitoring & API Mocking
|
|
272
|
+
|
|
205
273
|
```bash
|
|
206
|
-
agent-browser
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
agent-browser
|
|
210
|
-
sleep 5
|
|
211
|
-
wait
|
|
212
|
-
jq '.body.aweme_list[:10] | map({id, desc, stats})' /tmp/douyin.json
|
|
274
|
+
agent-browser network requests --filter "**/api/**"
|
|
275
|
+
agent-browser network route "**/api/users" --body '{"users": []}'
|
|
276
|
+
agent-browser network route "**/ads/**" --abort
|
|
277
|
+
agent-browser network unroute "**/api/users"
|
|
213
278
|
```
|
|
214
279
|
|
|
215
280
|
### Parallel Sessions
|
|
@@ -217,94 +282,166 @@ jq '.body.aweme_list[:10] | map({id, desc, stats})' /tmp/douyin.json
|
|
|
217
282
|
```bash
|
|
218
283
|
agent-browser --session site1 open https://site-a.com
|
|
219
284
|
agent-browser --session site2 open https://site-b.com
|
|
220
|
-
|
|
221
285
|
agent-browser --session site1 snapshot -i
|
|
222
|
-
agent-browser --session site2 snapshot -i
|
|
223
|
-
|
|
224
286
|
agent-browser session list
|
|
225
287
|
```
|
|
226
288
|
|
|
227
|
-
###
|
|
289
|
+
### Local Files (PDFs, HTML)
|
|
228
290
|
|
|
229
291
|
```bash
|
|
230
|
-
agent-browser --
|
|
231
|
-
agent-browser
|
|
232
|
-
agent-browser
|
|
292
|
+
agent-browser --allow-file-access open file:///path/to/doc.pdf
|
|
293
|
+
agent-browser --allow-file-access open file:///path/to/page.html
|
|
294
|
+
agent-browser screenshot output.png
|
|
233
295
|
```
|
|
234
296
|
|
|
235
|
-
###
|
|
297
|
+
### Working with Iframes
|
|
298
|
+
|
|
299
|
+
Use `--in-frame` to operate inside iframes:
|
|
236
300
|
|
|
237
301
|
```bash
|
|
238
|
-
|
|
239
|
-
agent-browser --
|
|
240
|
-
agent-browser --
|
|
241
|
-
agent-browser
|
|
302
|
+
agent-browser snapshot --in-frame "#my-iframe"
|
|
303
|
+
agent-browser snapshot --in-frame "#outer/inner" # Nested path
|
|
304
|
+
agent-browser click @e1 --in-frame "#container/frame"
|
|
305
|
+
agent-browser fill #user "admin" --in-frame "#container/login-frame"
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
Frame path syntax: `#id-or-name`, `#index` (position), `#parent/child` (nested).
|
|
309
|
+
|
|
310
|
+
### Semantic Locators (Alternative to Refs)
|
|
311
|
+
|
|
312
|
+
When refs are unavailable, use semantic locators:
|
|
313
|
+
|
|
314
|
+
```bash
|
|
315
|
+
agent-browser find text "Sign In" click
|
|
316
|
+
agent-browser find label "Email" fill "user@test.com"
|
|
317
|
+
agent-browser find role button click --name "Submit"
|
|
318
|
+
agent-browser find placeholder "Search" type "query"
|
|
319
|
+
agent-browser find testid "submit-btn" click
|
|
242
320
|
```
|
|
243
321
|
|
|
244
|
-
###
|
|
322
|
+
### Proxy Configuration
|
|
245
323
|
|
|
246
324
|
```bash
|
|
247
|
-
|
|
248
|
-
agent-browser
|
|
325
|
+
agent-browser --proxy http://proxy:8080 open https://example.com
|
|
326
|
+
agent-browser --proxy socks5://proxy:1080 open https://example.com
|
|
327
|
+
agent-browser --proxy http://user:pass@proxy:8080 --proxy-bypass "localhost,*.internal" open https://example.com
|
|
328
|
+
```
|
|
249
329
|
|
|
250
|
-
|
|
251
|
-
agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
|
|
330
|
+
## Advanced Features
|
|
252
331
|
|
|
253
|
-
|
|
254
|
-
agent-browser -p ios snapshot -i
|
|
255
|
-
agent-browser -p ios click @e1 # Click/tap element
|
|
256
|
-
agent-browser -p ios fill @e2 "text"
|
|
257
|
-
agent-browser -p ios scroll down 500 # Scroll gesture
|
|
332
|
+
### Recording & Replaying Workflows
|
|
258
333
|
|
|
259
|
-
|
|
260
|
-
agent-browser -p ios screenshot mobile.png
|
|
334
|
+
For test automation and workflow capture:
|
|
261
335
|
|
|
262
|
-
|
|
263
|
-
agent-browser
|
|
336
|
+
```bash
|
|
337
|
+
agent-browser recorder start --session my-test
|
|
338
|
+
agent-browser open https://example.com/form
|
|
339
|
+
agent-browser snapshot -i
|
|
340
|
+
agent-browser fill @e1 "user@example.com"
|
|
341
|
+
agent-browser click @e3
|
|
342
|
+
agent-browser recorder stop --output test-workflow.yaml
|
|
343
|
+
agent-browser recorder replay test-workflow.yaml
|
|
264
344
|
```
|
|
265
345
|
|
|
266
|
-
|
|
346
|
+
See [recorder.md](references/recorder.md) for details.
|
|
267
347
|
|
|
268
|
-
|
|
348
|
+
### Human-like Mouse Movement
|
|
269
349
|
|
|
270
|
-
|
|
350
|
+
Simulate natural mouse trajectories via environment variable:
|
|
271
351
|
|
|
272
|
-
|
|
352
|
+
```bash
|
|
353
|
+
export AGENT_BROWSER_HUMAN=1 # Enable (default: arc path)
|
|
354
|
+
export AGENT_BROWSER_HUMAN=bezier # Bezier curve with overshoot
|
|
355
|
+
export AGENT_BROWSER_HUMAN=random # Random path with jitter
|
|
356
|
+
export AGENT_BROWSER_HUMAN=linear # Straight line (fastest)
|
|
357
|
+
|
|
358
|
+
agent-browser click @e1 # Uses human trajectory
|
|
359
|
+
agent-browser wait 3000 # Mouse wandering while waiting
|
|
360
|
+
unset AGENT_BROWSER_HUMAN # Disable
|
|
361
|
+
```
|
|
273
362
|
|
|
274
|
-
|
|
363
|
+
Features: continuous position tracking, acceleration curves, 4 trajectory types, auto-wandering on wait.
|
|
275
364
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
-
|
|
365
|
+
### Viewer / Streaming Mode
|
|
366
|
+
|
|
367
|
+
Real-time remote browser visualization with frame streaming over WebSocket.
|
|
279
368
|
|
|
280
369
|
```bash
|
|
281
|
-
|
|
282
|
-
agent-browser
|
|
283
|
-
agent-browser
|
|
370
|
+
# Start viewer after opening a page
|
|
371
|
+
agent-browser open https://example.com
|
|
372
|
+
agent-browser viewer # Opens viewer URL in browser
|
|
373
|
+
agent-browser viewer --json # Get connection details as JSON
|
|
284
374
|
```
|
|
285
375
|
|
|
286
|
-
**
|
|
376
|
+
**Architecture:** Browser -> Daemon (IPC) -> Standalone Server (:5005) -> Viewer (WebSocket)
|
|
377
|
+
|
|
378
|
+
**Element Crop Mode:** Stream can be cropped to a specific DOM element's bounds. Coordinates auto-map to element-local space.
|
|
379
|
+
|
|
380
|
+
See [viewer-mode.md](references/viewer-mode.md) for architecture details, troubleshooting, and element mode.
|
|
381
|
+
|
|
382
|
+
### Mobile Remote Control (Touch Devices)
|
|
383
|
+
|
|
384
|
+
When viewer is opened on a phone/tablet, it automatically enters **mobile mode** with touch-optimized UI:
|
|
385
|
+
|
|
386
|
+
- **Touchpad**: Bottom-area gesture surface (tap=click, drag=move cursor, long-press=drag, 2-finger=scroll)
|
|
387
|
+
- **Input Panel**: Tap remote input field -> local text input appears -> syncs to remote via `input_fill`
|
|
388
|
+
- **Virtual Keyboard Toolbar**: Tab, Arrows, Enter, Backspace, Escape
|
|
389
|
+
- **IME Support**: Chinese/Japanese composition (pinyin etc.) — intermediate input NOT sent to remote
|
|
390
|
+
- **DeviceMode**: Auto-detects device type, switches UI dynamically on resize/orientationchange/matchMedia
|
|
287
391
|
|
|
288
|
-
|
|
392
|
+
See [mobile-viewer.md](references/mobile-viewer.md) for touchpad gestures, input panel flow, DeviceMode architecture.
|
|
289
393
|
|
|
290
|
-
|
|
394
|
+
### iOS Simulator (Appium)
|
|
395
|
+
|
|
396
|
+
Native iOS automation via Xcode + Appium:
|
|
291
397
|
|
|
292
398
|
```bash
|
|
293
|
-
agent-browser
|
|
294
|
-
agent-browser
|
|
295
|
-
agent-browser
|
|
296
|
-
agent-browser
|
|
297
|
-
|
|
399
|
+
agent-browser device list # List simulators
|
|
400
|
+
agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
|
|
401
|
+
agent-browser -p ios snapshot -i && agent-browser -p ios click @e1
|
|
402
|
+
agent-browser -p ios close # Shuts down simulator
|
|
403
|
+
```
|
|
404
|
+
|
|
405
|
+
Requires: macOS + Xcode + `npm install -g appium && appium driver install xcuitest`.
|
|
406
|
+
|
|
407
|
+
Note: Mobile viewer mode (above) works on ANY phone browser via web viewer — no simulator needed.
|
|
408
|
+
|
|
409
|
+
### Cloud Browser Providers
|
|
410
|
+
|
|
411
|
+
Connect to managed browser services:
|
|
412
|
+
|
|
413
|
+
```bash
|
|
414
|
+
BROWSERBASE_API_KEY=key agent-browser --provider browserbase open https://example.com
|
|
415
|
+
KERNEL_API_KEY=key agent-browser --provider kernel open https://example.com
|
|
416
|
+
BROWSERUSE_API_KEY=key agent-browser --provider browseruse open https://example.com
|
|
417
|
+
```
|
|
418
|
+
|
|
419
|
+
Useful for: geo-distributed testing, IP diversity, team sharing, parallel scaling.
|
|
420
|
+
|
|
421
|
+
## Ref Lifecycle (Important)
|
|
422
|
+
|
|
423
|
+
Refs (`@e1`, `@e2`) are invalidated when the page changes. Always re-snapshot after navigation, form submission, or dynamic content loading:
|
|
424
|
+
|
|
425
|
+
```bash
|
|
426
|
+
agent-browser click @e5 # Navigates to new page
|
|
427
|
+
agent-browser snapshot -i # MUST re-snapshot
|
|
428
|
+
agent-browser click @e1 # Use new refs
|
|
298
429
|
```
|
|
299
430
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
|
305
|
-
|
|
|
306
|
-
| [
|
|
307
|
-
| [
|
|
308
|
-
| [
|
|
309
|
-
| [
|
|
310
|
-
| [
|
|
431
|
+
Refs are session-specific. For shell scripts, use semantic locators or CSS selectors instead. See [snapshot-refs.md](references/snapshot-refs.md).
|
|
432
|
+
|
|
433
|
+
## Reference Docs
|
|
434
|
+
|
|
435
|
+
| Reference | Content |
|
|
436
|
+
| --------------------------------------------------------- | ------------------------------------------------------------- |
|
|
437
|
+
| [commands.md](references/commands.md) | Complete command reference with all options |
|
|
438
|
+
| [data-extraction.md](references/data-extraction.md) | DOM, JS variables, API interception, infinite scroll, iframe |
|
|
439
|
+
| [snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, shell script conversion |
|
|
440
|
+
| [session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
|
|
441
|
+
| [authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
|
|
442
|
+
| [video-recording.md](references/video-recording.md) | Video recording for debugging |
|
|
443
|
+
| [recorder.md](references/recorder.md) | Action recording & replay for test automation |
|
|
444
|
+
| [proxy-support.md](references/proxy-support.md) | Proxy config, geo-testing, rotating proxies |
|
|
445
|
+
| [network-monitoring.md](references/network-monitoring.md) | Request monitoring, API mocking, request blocking |
|
|
446
|
+
| [viewer-mode.md](references/viewer-mode.md) | Streaming viewer, element crop, architecture, troubleshooting |
|
|
447
|
+
| [mobile-viewer.md](references/mobile-viewer.md) | Touchpad, input panel, IME/CJK support, DeviceMode |
|
|
@@ -363,6 +363,7 @@ agent-browser --full ... # Full page screenshot (-f)
|
|
|
363
363
|
agent-browser --cdp <port> ... # Connect via Chrome DevTools Protocol
|
|
364
364
|
agent-browser -p <provider> ... # Cloud browser provider (--provider)
|
|
365
365
|
agent-browser --proxy <url> ... # Use proxy server
|
|
366
|
+
agent-browser --proxy-bypass <list> # Comma-separated bypass list for proxy
|
|
366
367
|
agent-browser --headers <json> ... # HTTP headers scoped to URL's origin
|
|
367
368
|
agent-browser --executable-path <p> # Custom browser executable
|
|
368
369
|
agent-browser --extension <path> ... # Load browser extension (repeatable)
|
|
@@ -468,4 +469,6 @@ AGENT_BROWSER_PROVIDER="browserbase" # Cloud browser provider
|
|
|
468
469
|
AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port
|
|
469
470
|
AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location
|
|
470
471
|
AGENT_BROWSER_HUMAN="1" # Enable human-like mouse movement (1, bezier, arc, random, linear)
|
|
472
|
+
AGENT_BROWSER_PROXY="http://proxy:8080" # Proxy server URL
|
|
473
|
+
AGENT_BROWSER_PROXY_BYPASS="localhost,*" # Proxy bypass hosts
|
|
471
474
|
```
|