agent-browser 0.18.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +131 -57
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/bin/agent-browser-darwin-x64 +0 -0
- package/bin/agent-browser-linux-arm64 +0 -0
- package/bin/agent-browser-linux-x64 +0 -0
- package/bin/agent-browser-win32-x64.exe +0 -0
- package/dist/actions.js +45 -44
- package/dist/actions.js.map +1 -1
- package/dist/browser.d.ts +38 -1
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +152 -0
- package/dist/browser.js.map +1 -1
- package/dist/daemon.d.ts.map +1 -1
- package/dist/daemon.js +0 -2
- package/dist/daemon.js.map +1 -1
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +3 -1
- package/dist/protocol.js.map +1 -1
- package/dist/types.d.ts +3 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +11 -2
- package/skills/agent-browser/SKILL.md +28 -14
package/README.md
CHANGED
|
@@ -115,6 +115,8 @@ agent-browser drag <src> <tgt> # Drag and drop
|
|
|
115
115
|
agent-browser upload <sel> <files> # Upload files
|
|
116
116
|
agent-browser screenshot [path] # Take screenshot (--full for full page, saves to a temporary directory if no path)
|
|
117
117
|
agent-browser screenshot --annotate # Annotated screenshot with numbered element labels
|
|
118
|
+
agent-browser screenshot --screenshot-dir ./shots # Save to custom directory
|
|
119
|
+
agent-browser screenshot --screenshot-format jpeg --screenshot-quality 80
|
|
118
120
|
agent-browser pdf <path> # Save as PDF
|
|
119
121
|
agent-browser snapshot # Accessibility tree with refs (best for AI)
|
|
120
122
|
agent-browser eval <js> # Run JavaScript (-b for base64, --stdin for piped input)
|
|
@@ -165,6 +167,7 @@ agent-browser find nth <n> <sel> <action> [value] # Nth match
|
|
|
165
167
|
**Options:** `--name <name>` (filter role by accessible name), `--exact` (require exact text match)
|
|
166
168
|
|
|
167
169
|
**Examples:**
|
|
170
|
+
|
|
168
171
|
```bash
|
|
169
172
|
agent-browser find role button click --name "Submit"
|
|
170
173
|
agent-browser find text "Sign In" click
|
|
@@ -178,14 +181,27 @@ agent-browser find nth 2 "a" text
|
|
|
178
181
|
```bash
|
|
179
182
|
agent-browser wait <selector> # Wait for element to be visible
|
|
180
183
|
agent-browser wait <ms> # Wait for time (milliseconds)
|
|
181
|
-
agent-browser wait --text "Welcome" # Wait for text to appear
|
|
184
|
+
agent-browser wait --text "Welcome" # Wait for text to appear (substring match)
|
|
182
185
|
agent-browser wait --url "**/dash" # Wait for URL pattern
|
|
183
186
|
agent-browser wait --load networkidle # Wait for load state
|
|
184
187
|
agent-browser wait --fn "window.ready === true" # Wait for JS condition
|
|
188
|
+
|
|
189
|
+
# Wait for text/element to disappear
|
|
190
|
+
agent-browser wait --fn "!document.body.innerText.includes('Loading...')"
|
|
191
|
+
agent-browser wait "#spinner" --state hidden
|
|
185
192
|
```
|
|
186
193
|
|
|
187
194
|
**Load states:** `load`, `domcontentloaded`, `networkidle`
|
|
188
195
|
|
|
196
|
+
### Clipboard
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
agent-browser clipboard read # Read text from clipboard
|
|
200
|
+
agent-browser clipboard write "Hello, World!" # Write text to clipboard
|
|
201
|
+
agent-browser clipboard copy # Copy current selection (Ctrl+C)
|
|
202
|
+
agent-browser clipboard paste # Paste from clipboard (Ctrl+V)
|
|
203
|
+
```
|
|
204
|
+
|
|
189
205
|
### Mouse Control
|
|
190
206
|
|
|
191
207
|
```bash
|
|
@@ -375,6 +391,7 @@ agent-browser session
|
|
|
375
391
|
```
|
|
376
392
|
|
|
377
393
|
Each session has its own:
|
|
394
|
+
|
|
378
395
|
- Browser instance
|
|
379
396
|
- Cookies and storage
|
|
380
397
|
- Navigation history
|
|
@@ -396,6 +413,7 @@ AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
|
|
|
396
413
|
```
|
|
397
414
|
|
|
398
415
|
The profile directory stores:
|
|
416
|
+
|
|
399
417
|
- Cookies and localStorage
|
|
400
418
|
- IndexedDB data
|
|
401
419
|
- Service workers
|
|
@@ -432,10 +450,10 @@ export AGENT_BROWSER_ENCRYPTION_KEY=<64-char-hex-key>
|
|
|
432
450
|
agent-browser --session-name secure open example.com
|
|
433
451
|
```
|
|
434
452
|
|
|
435
|
-
| Variable
|
|
436
|
-
|
|
437
|
-
| `AGENT_BROWSER_SESSION_NAME`
|
|
438
|
-
| `AGENT_BROWSER_ENCRYPTION_KEY`
|
|
453
|
+
| Variable | Description |
|
|
454
|
+
| --------------------------------- | -------------------------------------------------- |
|
|
455
|
+
| `AGENT_BROWSER_SESSION_NAME` | Auto-save/load state persistence name |
|
|
456
|
+
| `AGENT_BROWSER_ENCRYPTION_KEY` | 64-char hex key for AES-256-GCM encryption |
|
|
439
457
|
| `AGENT_BROWSER_STATE_EXPIRE_DAYS` | Auto-delete states older than N days (default: 30) |
|
|
440
458
|
|
|
441
459
|
## Security
|
|
@@ -449,14 +467,14 @@ agent-browser includes security features for safe AI agent deployments. All feat
|
|
|
449
467
|
- **Action Confirmation** -- Require explicit approval for sensitive action categories: `--confirm-actions eval,download`
|
|
450
468
|
- **Output Length Limits** -- Prevent context flooding: `--max-output 50000`
|
|
451
469
|
|
|
452
|
-
| Variable
|
|
453
|
-
|
|
454
|
-
| `AGENT_BROWSER_CONTENT_BOUNDARIES`
|
|
455
|
-
| `AGENT_BROWSER_MAX_OUTPUT`
|
|
456
|
-
| `AGENT_BROWSER_ALLOWED_DOMAINS`
|
|
457
|
-
| `AGENT_BROWSER_ACTION_POLICY`
|
|
458
|
-
| `AGENT_BROWSER_CONFIRM_ACTIONS`
|
|
459
|
-
| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts
|
|
470
|
+
| Variable | Description |
|
|
471
|
+
| ----------------------------------- | ---------------------------------------- |
|
|
472
|
+
| `AGENT_BROWSER_CONTENT_BOUNDARIES` | Wrap page output in boundary markers |
|
|
473
|
+
| `AGENT_BROWSER_MAX_OUTPUT` | Max characters for page output |
|
|
474
|
+
| `AGENT_BROWSER_ALLOWED_DOMAINS` | Comma-separated allowed domain patterns |
|
|
475
|
+
| `AGENT_BROWSER_ACTION_POLICY` | Path to action policy JSON file |
|
|
476
|
+
| `AGENT_BROWSER_CONFIRM_ACTIONS` | Action categories requiring confirmation |
|
|
477
|
+
| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts |
|
|
460
478
|
|
|
461
479
|
See [Security documentation](https://agent-browser.dev/security) for details.
|
|
462
480
|
|
|
@@ -474,13 +492,13 @@ agent-browser snapshot -s "#main" # Scope to CSS selector
|
|
|
474
492
|
agent-browser snapshot -i -c -d 5 # Combine options
|
|
475
493
|
```
|
|
476
494
|
|
|
477
|
-
| Option
|
|
478
|
-
|
|
479
|
-
| `-i, --interactive`
|
|
480
|
-
| `-C, --cursor`
|
|
481
|
-
| `-c, --compact`
|
|
482
|
-
| `-d, --depth <n>`
|
|
483
|
-
| `-s, --selector <sel>` | Scope to CSS selector
|
|
495
|
+
| Option | Description |
|
|
496
|
+
| ---------------------- | ----------------------------------------------------------------------- |
|
|
497
|
+
| `-i, --interactive` | Only show interactive elements (buttons, links, inputs) |
|
|
498
|
+
| `-C, --cursor` | Include cursor-interactive elements (cursor:pointer, onclick, tabindex) |
|
|
499
|
+
| `-c, --compact` | Remove empty structural elements |
|
|
500
|
+
| `-d, --depth <n>` | Limit tree depth |
|
|
501
|
+
| `-s, --selector <sel>` | Scope to CSS selector |
|
|
484
502
|
|
|
485
503
|
The `-C` flag is useful for modern web apps that use custom clickable elements (divs, spans) instead of standard buttons/links.
|
|
486
504
|
|
|
@@ -529,6 +547,9 @@ This is useful for multimodal AI models that can reason about visual layout, unl
|
|
|
529
547
|
| `--json` | JSON output (for agents) |
|
|
530
548
|
| `--full, -f` | Full page screenshot |
|
|
531
549
|
| `--annotate` | Annotated screenshot with numbered element labels (or `AGENT_BROWSER_ANNOTATE` env) |
|
|
550
|
+
| `--screenshot-dir <path>` | Default screenshot output directory (or `AGENT_BROWSER_SCREENSHOT_DIR` env) |
|
|
551
|
+
| `--screenshot-quality <n>` | JPEG quality 0-100 (or `AGENT_BROWSER_SCREENSHOT_QUALITY` env) |
|
|
552
|
+
| `--screenshot-format <fmt>` | Screenshot format: `png`, `jpeg` (or `AGENT_BROWSER_SCREENSHOT_FORMAT` env) |
|
|
532
553
|
| `--headed` | Show browser window (not headless) (or `AGENT_BROWSER_HEADED` env) |
|
|
533
554
|
| `--cdp <port\|url>` | Connect via Chrome DevTools Protocol (port or WebSocket URL) |
|
|
534
555
|
| `--auto-connect` | Auto-discover and connect to running Chrome (or `AGENT_BROWSER_AUTO_CONNECT` env) |
|
|
@@ -596,8 +617,8 @@ export AGENT_BROWSER_DEFAULT_TIMEOUT=45000
|
|
|
596
617
|
|
|
597
618
|
> **Note:** Setting this above 30000 (30s) may cause EAGAIN errors on slow operations because the CLI's read timeout will expire before Playwright responds. The CLI retries transient errors automatically, but response times will increase.
|
|
598
619
|
|
|
599
|
-
| Variable
|
|
600
|
-
|
|
620
|
+
| Variable | Description |
|
|
621
|
+
| ------------------------------- | ------------------------------------------------- |
|
|
601
622
|
| `AGENT_BROWSER_DEFAULT_TIMEOUT` | Default Playwright timeout in ms (default: 25000) |
|
|
602
623
|
|
|
603
624
|
## Selectors
|
|
@@ -623,6 +644,7 @@ agent-browser hover @e4 # Hover the link
|
|
|
623
644
|
```
|
|
624
645
|
|
|
625
646
|
**Why use refs?**
|
|
647
|
+
|
|
626
648
|
- **Deterministic**: Ref points to exact element from snapshot
|
|
627
649
|
- **Fast**: No DOM re-query needed
|
|
628
650
|
- **AI-friendly**: Snapshot + ref workflow is optimal for LLMs
|
|
@@ -723,6 +745,7 @@ agent-browser open other-site.com
|
|
|
723
745
|
```
|
|
724
746
|
|
|
725
747
|
This is useful for:
|
|
748
|
+
|
|
726
749
|
- **Skipping login flows** - Authenticate via headers instead of UI
|
|
727
750
|
- **Switching users** - Start new sessions with different auth tokens
|
|
728
751
|
- **API testing** - Access protected endpoints directly
|
|
@@ -744,6 +767,7 @@ agent-browser set headers '{"X-Custom-Header": "value"}'
|
|
|
744
767
|
## Custom Browser Executable
|
|
745
768
|
|
|
746
769
|
Use a custom browser executable instead of the bundled Chromium. This is useful for:
|
|
770
|
+
|
|
747
771
|
- **Serverless deployment**: Use lightweight Chromium builds like `@sparticuz/chromium` (~50MB vs ~684MB)
|
|
748
772
|
- **System browsers**: Use an existing Chrome/Chromium installation
|
|
749
773
|
- **Custom builds**: Use modified browser builds
|
|
@@ -804,6 +828,7 @@ agent-browser screenshot report.png
|
|
|
804
828
|
```
|
|
805
829
|
|
|
806
830
|
The `--allow-file-access` flag adds Chromium flags (`--allow-file-access-from-files`, `--allow-file-access`) that allow `file://` URLs to:
|
|
831
|
+
|
|
807
832
|
- Load and render local files
|
|
808
833
|
- Access other local files via JavaScript (XHR, fetch)
|
|
809
834
|
- Load local resources (images, scripts, stylesheets)
|
|
@@ -831,10 +856,12 @@ agent-browser --cdp "wss://your-browser-service.com/cdp?token=..." snapshot
|
|
|
831
856
|
```
|
|
832
857
|
|
|
833
858
|
The `--cdp` flag accepts either:
|
|
859
|
+
|
|
834
860
|
- A port number (e.g., `9222`) for local connections via `http://localhost:{port}`
|
|
835
861
|
- A full WebSocket URL (e.g., `wss://...` or `ws://...`) for remote browser services
|
|
836
862
|
|
|
837
863
|
This enables control of:
|
|
864
|
+
|
|
838
865
|
- Electron apps
|
|
839
866
|
- Chrome/Chromium instances with remote debugging
|
|
840
867
|
- WebView2 applications
|
|
@@ -854,10 +881,12 @@ AGENT_BROWSER_AUTO_CONNECT=1 agent-browser snapshot
|
|
|
854
881
|
```
|
|
855
882
|
|
|
856
883
|
Auto-connect discovers Chrome by:
|
|
884
|
+
|
|
857
885
|
1. Reading Chrome's `DevToolsActivePort` file from the default user data directory
|
|
858
886
|
2. Falling back to probing common debugging ports (9222, 9229)
|
|
859
887
|
|
|
860
888
|
This is useful when:
|
|
889
|
+
|
|
861
890
|
- Chrome 144+ has remote debugging enabled via `chrome://inspect/#remote-debugging` (which uses a dynamic port)
|
|
862
891
|
- You want a zero-configuration connection to your existing browser
|
|
863
892
|
- You don't want to track which port Chrome is using
|
|
@@ -881,6 +910,7 @@ This starts a WebSocket server on the specified port that streams the browser vi
|
|
|
881
910
|
Connect to `ws://localhost:9223` to receive frames and send input:
|
|
882
911
|
|
|
883
912
|
**Receive frames:**
|
|
913
|
+
|
|
884
914
|
```json
|
|
885
915
|
{
|
|
886
916
|
"type": "frame",
|
|
@@ -897,6 +927,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
897
927
|
```
|
|
898
928
|
|
|
899
929
|
**Send mouse events:**
|
|
930
|
+
|
|
900
931
|
```json
|
|
901
932
|
{
|
|
902
933
|
"type": "input_mouse",
|
|
@@ -909,6 +940,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
909
940
|
```
|
|
910
941
|
|
|
911
942
|
**Send keyboard events:**
|
|
943
|
+
|
|
912
944
|
```json
|
|
913
945
|
{
|
|
914
946
|
"type": "input_keyboard",
|
|
@@ -919,6 +951,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
919
951
|
```
|
|
920
952
|
|
|
921
953
|
**Send touch events:**
|
|
954
|
+
|
|
922
955
|
```json
|
|
923
956
|
{
|
|
924
957
|
"type": "input_touch",
|
|
@@ -939,16 +972,19 @@ await browser.launch({ headless: true });
|
|
|
939
972
|
await browser.navigate('https://example.com');
|
|
940
973
|
|
|
941
974
|
// Start screencast
|
|
942
|
-
await browser.startScreencast(
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
975
|
+
await browser.startScreencast(
|
|
976
|
+
(frame) => {
|
|
977
|
+
// frame.data is base64-encoded image
|
|
978
|
+
// frame.metadata contains viewport info
|
|
979
|
+
console.log('Frame received:', frame.metadata.deviceWidth, 'x', frame.metadata.deviceHeight);
|
|
980
|
+
},
|
|
981
|
+
{
|
|
982
|
+
format: 'jpeg',
|
|
983
|
+
quality: 80,
|
|
984
|
+
maxWidth: 1280,
|
|
985
|
+
maxHeight: 720,
|
|
986
|
+
}
|
|
987
|
+
);
|
|
952
988
|
|
|
953
989
|
// Inject mouse events
|
|
954
990
|
await browser.injectMouseEvent({
|
|
@@ -1000,18 +1036,18 @@ agent-browser open example.com
|
|
|
1000
1036
|
Or add to your config file (`agent-browser.json`):
|
|
1001
1037
|
|
|
1002
1038
|
```json
|
|
1003
|
-
{"native": true}
|
|
1039
|
+
{ "native": true }
|
|
1004
1040
|
```
|
|
1005
1041
|
|
|
1006
1042
|
### What's Different
|
|
1007
1043
|
|
|
1008
|
-
|
|
|
1009
|
-
|
|
1010
|
-
| **Runtime**
|
|
1011
|
-
| **Protocol**
|
|
1012
|
-
| **Install size**
|
|
1013
|
-
| **Browser support** | Chromium, Firefox, WebKit
|
|
1014
|
-
| **Stability**
|
|
1044
|
+
| | Default (Node.js) | Native (`--native`) |
|
|
1045
|
+
| ------------------- | --------------------------- | -------------------------------- |
|
|
1046
|
+
| **Runtime** | Node.js + Playwright | Pure Rust binary |
|
|
1047
|
+
| **Protocol** | Playwright protocol | Direct CDP / WebDriver |
|
|
1048
|
+
| **Install size** | Larger (Node.js + npm deps) | Smaller (single binary) |
|
|
1049
|
+
| **Browser support** | Chromium, Firefox, WebKit | Chromium, Safari (via WebDriver) |
|
|
1050
|
+
| **Stability** | Stable | Experimental |
|
|
1015
1051
|
|
|
1016
1052
|
### Known Limitations
|
|
1017
1053
|
|
|
@@ -1021,13 +1057,13 @@ Or add to your config file (`agent-browser.json`):
|
|
|
1021
1057
|
|
|
1022
1058
|
## Platforms
|
|
1023
1059
|
|
|
1024
|
-
| Platform
|
|
1025
|
-
|
|
1026
|
-
| macOS ARM64 | Native Rust | Node.js
|
|
1027
|
-
| macOS x64
|
|
1028
|
-
| Linux ARM64 | Native Rust | Node.js
|
|
1029
|
-
| Linux x64
|
|
1030
|
-
| Windows x64 | Native Rust | Node.js
|
|
1060
|
+
| Platform | Binary | Fallback |
|
|
1061
|
+
| ----------- | ----------- | -------- |
|
|
1062
|
+
| macOS ARM64 | Native Rust | Node.js |
|
|
1063
|
+
| macOS x64 | Native Rust | Node.js |
|
|
1064
|
+
| Linux ARM64 | Native Rust | Node.js |
|
|
1065
|
+
| Linux x64 | Native Rust | Node.js |
|
|
1066
|
+
| Windows x64 | Native Rust | Node.js |
|
|
1031
1067
|
|
|
1032
1068
|
## Usage with AI Agents
|
|
1033
1069
|
|
|
@@ -1071,6 +1107,7 @@ For more consistent results, add to your project or global instructions file:
|
|
|
1071
1107
|
Use `agent-browser` for web automation. Run `agent-browser --help` for all commands.
|
|
1072
1108
|
|
|
1073
1109
|
Core workflow:
|
|
1110
|
+
|
|
1074
1111
|
1. `agent-browser open <url>` - Navigate to page
|
|
1075
1112
|
2. `agent-browser snapshot -i` - Get interactive elements with refs (@e1, @e2)
|
|
1076
1113
|
3. `agent-browser click @e1` / `fill @e2 "text"` - Interact using refs
|
|
@@ -1122,11 +1159,11 @@ export AGENT_BROWSER_IOS_DEVICE="iPhone 16 Pro"
|
|
|
1122
1159
|
agent-browser open https://example.com
|
|
1123
1160
|
```
|
|
1124
1161
|
|
|
1125
|
-
| Variable
|
|
1126
|
-
|
|
1127
|
-
| `AGENT_BROWSER_PROVIDER`
|
|
1162
|
+
| Variable | Description |
|
|
1163
|
+
| -------------------------- | ----------------------------------------------- |
|
|
1164
|
+
| `AGENT_BROWSER_PROVIDER` | Set to `ios` to enable iOS mode |
|
|
1128
1165
|
| `AGENT_BROWSER_IOS_DEVICE` | Device name (e.g., "iPhone 16 Pro", "iPad Pro") |
|
|
1129
|
-
| `AGENT_BROWSER_IOS_UDID`
|
|
1166
|
+
| `AGENT_BROWSER_IOS_UDID` | Device UDID (alternative to device name) |
|
|
1130
1167
|
|
|
1131
1168
|
**Supported devices:** All iOS Simulators available in Xcode (iPhones, iPads), plus real iOS devices.
|
|
1132
1169
|
|
|
@@ -1137,6 +1174,7 @@ agent-browser open https://example.com
|
|
|
1137
1174
|
Appium also supports real iOS devices connected via USB. This requires additional one-time setup:
|
|
1138
1175
|
|
|
1139
1176
|
**1. Get your device UDID:**
|
|
1177
|
+
|
|
1140
1178
|
```bash
|
|
1141
1179
|
xcrun xctrace list devices
|
|
1142
1180
|
# or
|
|
@@ -1144,6 +1182,7 @@ system_profiler SPUSBDataType | grep -A 5 "iPhone\|iPad"
|
|
|
1144
1182
|
```
|
|
1145
1183
|
|
|
1146
1184
|
**2. Sign WebDriverAgent (one-time):**
|
|
1185
|
+
|
|
1147
1186
|
```bash
|
|
1148
1187
|
# Open the WebDriverAgent Xcode project
|
|
1149
1188
|
cd ~/.appium/node_modules/appium-xcuitest-driver/node_modules/appium-webdriveragent
|
|
@@ -1151,12 +1190,14 @@ open WebDriverAgent.xcodeproj
|
|
|
1151
1190
|
```
|
|
1152
1191
|
|
|
1153
1192
|
In Xcode:
|
|
1193
|
+
|
|
1154
1194
|
- Select the `WebDriverAgentRunner` target
|
|
1155
1195
|
- Go to Signing & Capabilities
|
|
1156
1196
|
- Select your Team (requires Apple Developer account, free tier works)
|
|
1157
1197
|
- Let Xcode manage signing automatically
|
|
1158
1198
|
|
|
1159
1199
|
**3. Use with agent-browser:**
|
|
1200
|
+
|
|
1160
1201
|
```bash
|
|
1161
1202
|
# Connect device via USB, then:
|
|
1162
1203
|
agent-browser -p ios --device "<DEVICE_UDID>" open https://example.com
|
|
@@ -1166,11 +1207,44 @@ agent-browser -p ios --device "John's iPhone" open https://example.com
|
|
|
1166
1207
|
```
|
|
1167
1208
|
|
|
1168
1209
|
**Real device notes:**
|
|
1210
|
+
|
|
1169
1211
|
- First run installs WebDriverAgent to the device (may require Trust prompt)
|
|
1170
1212
|
- Device must be unlocked and connected via USB
|
|
1171
1213
|
- Slightly slower initial connection than simulator
|
|
1172
1214
|
- Tests against real Safari performance and behavior
|
|
1173
1215
|
|
|
1216
|
+
### Browserless
|
|
1217
|
+
|
|
1218
|
+
[Browserless](https://browserless.io) provides cloud browser infrastructure with a Sessions API. Use it when running agent-browser in environments where a local browser isn't available.
|
|
1219
|
+
|
|
1220
|
+
To enable Browserless, use the `-p` flag:
|
|
1221
|
+
|
|
1222
|
+
```bash
|
|
1223
|
+
export BROWSERLESS_API_KEY="your-api-token"
|
|
1224
|
+
agent-browser -p browserless open https://example.com
|
|
1225
|
+
```
|
|
1226
|
+
|
|
1227
|
+
Or use environment variables for CI/scripts:
|
|
1228
|
+
|
|
1229
|
+
```bash
|
|
1230
|
+
export AGENT_BROWSER_PROVIDER=browserless
|
|
1231
|
+
export BROWSERLESS_API_KEY="your-api-token"
|
|
1232
|
+
agent-browser open https://example.com
|
|
1233
|
+
```
|
|
1234
|
+
|
|
1235
|
+
Optional configuration via environment variables:
|
|
1236
|
+
|
|
1237
|
+
| Variable | Description | Default |
|
|
1238
|
+
| -------------------------- | ------------------------------------------------ | --------------------------------------- |
|
|
1239
|
+
| `BROWSERLESS_API_URL` | Base API URL (for custom regions or self-hosted) | `https://production-sfo.browserless.io` |
|
|
1240
|
+
| `BROWSERLESS_BROWSER_TYPE` | Type of browser to use (chromium or chrome) | chromium |
|
|
1241
|
+
| `BROWSERLESS_TTL` | Session TTL in milliseconds | `300000` |
|
|
1242
|
+
| `BROWSERLESS_STEALTH` | Enable stealth mode (`true`/`false`) | `true` |
|
|
1243
|
+
|
|
1244
|
+
When enabled, agent-browser connects to a Browserless cloud session instead of launching a local browser. All commands work identically.
|
|
1245
|
+
|
|
1246
|
+
Get your API token from the [Browserless Dashboard](https://browserless.io).
|
|
1247
|
+
|
|
1174
1248
|
### Browserbase
|
|
1175
1249
|
|
|
1176
1250
|
[Browserbase](https://browserbase.com) provides remote browser infrastructure to make deployment of agentic browsing agents easy. Use it when running the agent-browser CLI in an environment where a local browser isn't feasible.
|
|
@@ -1238,12 +1312,12 @@ agent-browser open https://example.com
|
|
|
1238
1312
|
|
|
1239
1313
|
Optional configuration via environment variables:
|
|
1240
1314
|
|
|
1241
|
-
| Variable
|
|
1242
|
-
|
|
1243
|
-
| `KERNEL_HEADLESS`
|
|
1244
|
-
| `KERNEL_STEALTH`
|
|
1245
|
-
| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds
|
|
1246
|
-
| `KERNEL_PROFILE_NAME`
|
|
1315
|
+
| Variable | Description | Default |
|
|
1316
|
+
| ------------------------ | -------------------------------------------------------------------------------- | ------- |
|
|
1317
|
+
| `KERNEL_HEADLESS` | Run browser in headless mode (`true`/`false`) | `false` |
|
|
1318
|
+
| `KERNEL_STEALTH` | Enable stealth mode to avoid bot detection (`true`/`false`) | `true` |
|
|
1319
|
+
| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds | `300` |
|
|
1320
|
+
| `KERNEL_PROFILE_NAME` | Browser profile name for persistent cookies/logins (created if it doesn't exist) | (none) |
|
|
1247
1321
|
|
|
1248
1322
|
When enabled, agent-browser connects to a Kernel cloud session instead of launching a local browser. All commands work identically.
|
|
1249
1323
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/dist/actions.js
CHANGED
|
@@ -411,19 +411,11 @@ async function handleLaunch(command, browser) {
|
|
|
411
411
|
return successResponse(command.id, { launched: true });
|
|
412
412
|
}
|
|
413
413
|
async function handleNavigate(command, browser) {
|
|
414
|
-
browser.
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
if (command.headers && Object.keys(command.headers).length > 0) {
|
|
418
|
-
await browser.setScopedHeaders(command.url, command.headers);
|
|
419
|
-
}
|
|
420
|
-
await page.goto(command.url, {
|
|
421
|
-
waitUntil: command.waitUntil ?? 'load',
|
|
422
|
-
});
|
|
423
|
-
return successResponse(command.id, {
|
|
424
|
-
url: page.url(),
|
|
425
|
-
title: await page.title(),
|
|
414
|
+
const result = await browser.navigate(command.url, {
|
|
415
|
+
headers: command.headers,
|
|
416
|
+
waitUntil: command.waitUntil,
|
|
426
417
|
});
|
|
418
|
+
return successResponse(command.id, result);
|
|
427
419
|
}
|
|
428
420
|
async function handleClick(command, browser) {
|
|
429
421
|
// Support both refs (@e1) and regular selectors
|
|
@@ -513,7 +505,7 @@ async function handleScreenshot(command, browser) {
|
|
|
513
505
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
514
506
|
const random = Math.random().toString(36).substring(2, 8);
|
|
515
507
|
const filename = `screenshot-${timestamp}-${random}.${ext}`;
|
|
516
|
-
const screenshotDir = path.join(getAppDir(), 'tmp', 'screenshots');
|
|
508
|
+
const screenshotDir = command.screenshotDir ?? path.join(getAppDir(), 'tmp', 'screenshots');
|
|
517
509
|
mkdirSync(screenshotDir, { recursive: true });
|
|
518
510
|
savePath = path.join(screenshotDir, filename);
|
|
519
511
|
}
|
|
@@ -696,7 +688,10 @@ async function handleEvaluate(command, browser) {
|
|
|
696
688
|
}
|
|
697
689
|
async function handleWait(command, browser) {
|
|
698
690
|
const page = browser.getPage();
|
|
699
|
-
if (command.
|
|
691
|
+
if (command.text) {
|
|
692
|
+
await page.waitForFunction(`(document.body.innerText || '').includes(${JSON.stringify(command.text)})`, { timeout: command.timeout });
|
|
693
|
+
}
|
|
694
|
+
else if (command.selector) {
|
|
700
695
|
await page.waitForSelector(command.selector, {
|
|
701
696
|
state: command.state ?? 'visible',
|
|
702
697
|
timeout: command.timeout,
|
|
@@ -706,7 +701,6 @@ async function handleWait(command, browser) {
|
|
|
706
701
|
await page.waitForTimeout(command.timeout);
|
|
707
702
|
}
|
|
708
703
|
else {
|
|
709
|
-
// Default: wait for load state
|
|
710
704
|
await page.waitForLoadState('load');
|
|
711
705
|
}
|
|
712
706
|
return successResponse(command.id, { waited: true });
|
|
@@ -772,7 +766,8 @@ async function handleContent(command, browser) {
|
|
|
772
766
|
const page = browser.getPage();
|
|
773
767
|
let html;
|
|
774
768
|
if (command.selector) {
|
|
775
|
-
|
|
769
|
+
const locator = browser.getLocator(command.selector);
|
|
770
|
+
html = await locator.innerHTML();
|
|
776
771
|
}
|
|
777
772
|
else {
|
|
778
773
|
html = await page.content();
|
|
@@ -1240,13 +1235,13 @@ async function handleIsChecked(command, browser) {
|
|
|
1240
1235
|
return successResponse(command.id, { checked });
|
|
1241
1236
|
}
|
|
1242
1237
|
async function handleCount(command, browser) {
|
|
1243
|
-
const
|
|
1244
|
-
const count = await
|
|
1238
|
+
const locator = browser.getLocator(command.selector);
|
|
1239
|
+
const count = await locator.count();
|
|
1245
1240
|
return successResponse(command.id, { count });
|
|
1246
1241
|
}
|
|
1247
1242
|
async function handleBoundingBox(command, browser) {
|
|
1248
|
-
const
|
|
1249
|
-
const box = await
|
|
1243
|
+
const locator = browser.getLocator(command.selector);
|
|
1244
|
+
const box = await locator.boundingBox();
|
|
1250
1245
|
return successResponse(command.id, { box });
|
|
1251
1246
|
}
|
|
1252
1247
|
async function handleStyles(command, browser) {
|
|
@@ -1365,8 +1360,6 @@ async function handleStateLoad(command, browser) {
|
|
|
1365
1360
|
return errorResponse(command.id, `State file not found: ${command.path}`);
|
|
1366
1361
|
}
|
|
1367
1362
|
await browser.launch({
|
|
1368
|
-
id: command.id,
|
|
1369
|
-
action: 'launch',
|
|
1370
1363
|
headless: true,
|
|
1371
1364
|
autoStateFilePath: command.path,
|
|
1372
1365
|
});
|
|
@@ -1534,7 +1527,7 @@ async function handleKeyboard(command, browser) {
|
|
|
1534
1527
|
async function handleWheel(command, browser) {
|
|
1535
1528
|
const page = browser.getPage();
|
|
1536
1529
|
if (command.selector) {
|
|
1537
|
-
const element =
|
|
1530
|
+
const element = browser.getLocator(command.selector);
|
|
1538
1531
|
await element.hover();
|
|
1539
1532
|
}
|
|
1540
1533
|
await page.mouse.wheel(command.deltaX ?? 0, command.deltaY ?? 0);
|
|
@@ -1549,41 +1542,50 @@ async function handleClipboard(command, browser) {
|
|
|
1549
1542
|
const page = browser.getPage();
|
|
1550
1543
|
switch (command.operation) {
|
|
1551
1544
|
case 'copy':
|
|
1552
|
-
await page.keyboard.press('
|
|
1545
|
+
await page.keyboard.press('ControlOrMeta+c');
|
|
1553
1546
|
return successResponse(command.id, { copied: true });
|
|
1554
1547
|
case 'paste':
|
|
1555
|
-
await page.keyboard.press('
|
|
1548
|
+
await page.keyboard.press('ControlOrMeta+v');
|
|
1556
1549
|
return successResponse(command.id, { pasted: true });
|
|
1557
|
-
case 'read':
|
|
1550
|
+
case 'read': {
|
|
1558
1551
|
const text = await page.evaluate('navigator.clipboard.readText()');
|
|
1559
1552
|
return successResponse(command.id, { text });
|
|
1553
|
+
}
|
|
1554
|
+
case 'write': {
|
|
1555
|
+
if (!command.text) {
|
|
1556
|
+
return errorResponse(command.id, "Missing 'text' parameter for clipboard write");
|
|
1557
|
+
}
|
|
1558
|
+
await page.evaluate(`navigator.clipboard.writeText(${JSON.stringify(command.text)})`);
|
|
1559
|
+
return successResponse(command.id, { written: command.text });
|
|
1560
|
+
}
|
|
1560
1561
|
default:
|
|
1561
1562
|
return errorResponse(command.id, 'Unknown clipboard operation');
|
|
1562
1563
|
}
|
|
1563
1564
|
}
|
|
1564
1565
|
async function handleHighlight(command, browser) {
|
|
1565
|
-
const
|
|
1566
|
-
await
|
|
1566
|
+
const locator = browser.getLocator(command.selector);
|
|
1567
|
+
await locator.highlight();
|
|
1567
1568
|
return successResponse(command.id, { highlighted: true });
|
|
1568
1569
|
}
|
|
1569
1570
|
async function handleClear(command, browser) {
|
|
1570
|
-
const
|
|
1571
|
-
await
|
|
1571
|
+
const locator = browser.getLocator(command.selector);
|
|
1572
|
+
await locator.clear();
|
|
1572
1573
|
return successResponse(command.id, { cleared: true });
|
|
1573
1574
|
}
|
|
1574
1575
|
async function handleSelectAll(command, browser) {
|
|
1575
|
-
const
|
|
1576
|
-
await
|
|
1576
|
+
const locator = browser.getLocator(command.selector);
|
|
1577
|
+
await locator.selectText();
|
|
1577
1578
|
return successResponse(command.id, { selected: true });
|
|
1578
1579
|
}
|
|
1579
1580
|
async function handleInnerText(command, browser) {
|
|
1580
|
-
const
|
|
1581
|
-
const text = await
|
|
1581
|
+
const locator = browser.getLocator(command.selector);
|
|
1582
|
+
const text = await locator.innerText();
|
|
1582
1583
|
return successResponse(command.id, { text });
|
|
1583
1584
|
}
|
|
1584
1585
|
async function handleInnerHtml(command, browser) {
|
|
1585
1586
|
const page = browser.getPage();
|
|
1586
|
-
const
|
|
1587
|
+
const locator = browser.getLocator(command.selector);
|
|
1588
|
+
const html = await locator.innerHTML();
|
|
1587
1589
|
return successResponse(command.id, { html, origin: page.url() });
|
|
1588
1590
|
}
|
|
1589
1591
|
async function handleInputValue(command, browser) {
|
|
@@ -1593,13 +1595,13 @@ async function handleInputValue(command, browser) {
|
|
|
1593
1595
|
return successResponse(command.id, { value, origin: page.url() });
|
|
1594
1596
|
}
|
|
1595
1597
|
async function handleSetValue(command, browser) {
|
|
1596
|
-
const
|
|
1597
|
-
await
|
|
1598
|
+
const locator = browser.getLocator(command.selector);
|
|
1599
|
+
await locator.fill(command.value);
|
|
1598
1600
|
return successResponse(command.id, { set: true });
|
|
1599
1601
|
}
|
|
1600
1602
|
async function handleDispatch(command, browser) {
|
|
1601
|
-
const
|
|
1602
|
-
await
|
|
1603
|
+
const locator = browser.getLocator(command.selector);
|
|
1604
|
+
await locator.dispatchEvent(command.event, command.eventInit);
|
|
1603
1605
|
return successResponse(command.id, { dispatched: command.event });
|
|
1604
1606
|
}
|
|
1605
1607
|
async function handleEvalHandle(command, browser) {
|
|
@@ -1705,8 +1707,7 @@ async function handleGetByTestId(command, browser) {
|
|
|
1705
1707
|
}
|
|
1706
1708
|
}
|
|
1707
1709
|
async function handleNth(command, browser) {
|
|
1708
|
-
const
|
|
1709
|
-
const base = page.locator(command.selector);
|
|
1710
|
+
const base = browser.getLocator(command.selector);
|
|
1710
1711
|
const locator = command.index === -1 ? base.last() : base.nth(command.index);
|
|
1711
1712
|
switch (command.subaction) {
|
|
1712
1713
|
case 'click':
|
|
@@ -1816,8 +1817,8 @@ async function handleInsertText(command, browser) {
|
|
|
1816
1817
|
return successResponse(command.id, { inserted: true });
|
|
1817
1818
|
}
|
|
1818
1819
|
async function handleMultiSelect(command, browser) {
|
|
1819
|
-
const
|
|
1820
|
-
const selected = await
|
|
1820
|
+
const locator = browser.getLocator(command.selector);
|
|
1821
|
+
const selected = await locator.selectOption(command.values);
|
|
1821
1822
|
return successResponse(command.id, { selected });
|
|
1822
1823
|
}
|
|
1823
1824
|
async function handleWaitForDownload(command, browser) {
|
|
@@ -1965,7 +1966,7 @@ async function handleDiffScreenshot(command, browser) {
|
|
|
1965
1966
|
const page = browser.getPage();
|
|
1966
1967
|
let screenshotBuffer;
|
|
1967
1968
|
if (command.selector) {
|
|
1968
|
-
const locator = browser.
|
|
1969
|
+
const locator = browser.getLocator(command.selector);
|
|
1969
1970
|
screenshotBuffer = await locator.screenshot({ type: 'png' });
|
|
1970
1971
|
}
|
|
1971
1972
|
else {
|