agent-browser 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +150 -122
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/bin/agent-browser-darwin-x64 +0 -0
- package/bin/agent-browser-linux-arm64 +0 -0
- package/bin/agent-browser-linux-x64 +0 -0
- package/bin/agent-browser-win32-x64.exe +0 -0
- package/package.json +6 -40
- package/scripts/postinstall.js +12 -15
- package/skills/agent-browser/SKILL.md +37 -32
- package/skills/electron/SKILL.md +5 -5
- package/dist/action-policy.d.ts +0 -14
- package/dist/action-policy.d.ts.map +0 -1
- package/dist/action-policy.js +0 -253
- package/dist/action-policy.js.map +0 -1
- package/dist/actions.d.ts +0 -18
- package/dist/actions.d.ts.map +0 -1
- package/dist/actions.js +0 -2120
- package/dist/actions.js.map +0 -1
- package/dist/auth-cli.d.ts +0 -2
- package/dist/auth-cli.d.ts.map +0 -1
- package/dist/auth-cli.js +0 -97
- package/dist/auth-cli.js.map +0 -1
- package/dist/auth-vault.d.ts +0 -36
- package/dist/auth-vault.d.ts.map +0 -1
- package/dist/auth-vault.js +0 -125
- package/dist/auth-vault.js.map +0 -1
- package/dist/browser.d.ts +0 -592
- package/dist/browser.d.ts.map +0 -1
- package/dist/browser.js +0 -2190
- package/dist/browser.js.map +0 -1
- package/dist/confirmation.d.ts +0 -8
- package/dist/confirmation.d.ts.map +0 -1
- package/dist/confirmation.js +0 -30
- package/dist/confirmation.js.map +0 -1
- package/dist/daemon.d.ts +0 -71
- package/dist/daemon.d.ts.map +0 -1
- package/dist/daemon.js +0 -671
- package/dist/daemon.js.map +0 -1
- package/dist/diff.d.ts +0 -18
- package/dist/diff.d.ts.map +0 -1
- package/dist/diff.js +0 -271
- package/dist/diff.js.map +0 -1
- package/dist/domain-filter.d.ts +0 -28
- package/dist/domain-filter.d.ts.map +0 -1
- package/dist/domain-filter.js +0 -149
- package/dist/domain-filter.js.map +0 -1
- package/dist/encryption.d.ts +0 -73
- package/dist/encryption.d.ts.map +0 -1
- package/dist/encryption.js +0 -171
- package/dist/encryption.js.map +0 -1
- package/dist/inspect-server.d.ts +0 -26
- package/dist/inspect-server.d.ts.map +0 -1
- package/dist/inspect-server.js +0 -218
- package/dist/inspect-server.js.map +0 -1
- package/dist/ios-actions.d.ts +0 -11
- package/dist/ios-actions.d.ts.map +0 -1
- package/dist/ios-actions.js +0 -228
- package/dist/ios-actions.js.map +0 -1
- package/dist/ios-manager.d.ts +0 -266
- package/dist/ios-manager.d.ts.map +0 -1
- package/dist/ios-manager.js +0 -1073
- package/dist/ios-manager.js.map +0 -1
- package/dist/protocol.d.ts +0 -28
- package/dist/protocol.d.ts.map +0 -1
- package/dist/protocol.js +0 -986
- package/dist/protocol.js.map +0 -1
- package/dist/snapshot.d.ts +0 -67
- package/dist/snapshot.d.ts.map +0 -1
- package/dist/snapshot.js +0 -514
- package/dist/snapshot.js.map +0 -1
- package/dist/state-utils.d.ts +0 -77
- package/dist/state-utils.d.ts.map +0 -1
- package/dist/state-utils.js +0 -178
- package/dist/state-utils.js.map +0 -1
- package/dist/stream-server.d.ts +0 -117
- package/dist/stream-server.d.ts.map +0 -1
- package/dist/stream-server.js +0 -309
- package/dist/stream-server.js.map +0 -1
- package/dist/types.d.ts +0 -925
- package/dist/types.d.ts.map +0 -1
- package/dist/types.js +0 -2
- package/dist/types.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,51 +1,41 @@
|
|
|
1
1
|
# agent-browser
|
|
2
2
|
|
|
3
|
-
Headless browser automation CLI for AI agents. Fast Rust CLI
|
|
3
|
+
Headless browser automation CLI for AI agents. Fast native Rust CLI.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
7
|
### Global Installation (recommended)
|
|
8
8
|
|
|
9
|
-
Installs the native Rust binary
|
|
9
|
+
Installs the native Rust binary:
|
|
10
10
|
|
|
11
11
|
```bash
|
|
12
12
|
npm install -g agent-browser
|
|
13
|
-
agent-browser install # Download
|
|
13
|
+
agent-browser install # Download Chrome from Chrome for Testing (first time only)
|
|
14
14
|
```
|
|
15
15
|
|
|
16
|
-
This is the fastest option -- commands run through the native Rust CLI directly with sub-millisecond parsing overhead.
|
|
17
|
-
|
|
18
|
-
### Quick Start (no install)
|
|
19
|
-
|
|
20
|
-
Run directly with `npx` if you want to try it without installing globally:
|
|
21
|
-
|
|
22
|
-
```bash
|
|
23
|
-
npx agent-browser install # Download Chromium (first time only)
|
|
24
|
-
npx agent-browser open example.com
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
> **Note:** `npx` routes through Node.js before reaching the Rust CLI, so it is noticeably slower than a global install. For regular use, install globally.
|
|
28
|
-
|
|
29
16
|
### Project Installation (local dependency)
|
|
30
17
|
|
|
31
18
|
For projects that want to pin the version in `package.json`:
|
|
32
19
|
|
|
33
20
|
```bash
|
|
34
21
|
npm install agent-browser
|
|
35
|
-
|
|
22
|
+
agent-browser install
|
|
36
23
|
```
|
|
37
24
|
|
|
38
|
-
Then use via `
|
|
25
|
+
Then use via `package.json` scripts or by invoking `agent-browser` directly.
|
|
26
|
+
|
|
27
|
+
### Homebrew (macOS)
|
|
39
28
|
|
|
40
29
|
```bash
|
|
41
|
-
|
|
30
|
+
brew install agent-browser
|
|
31
|
+
agent-browser install # Download Chrome from Chrome for Testing (first time only)
|
|
42
32
|
```
|
|
43
33
|
|
|
44
|
-
###
|
|
34
|
+
### Cargo (Rust)
|
|
45
35
|
|
|
46
36
|
```bash
|
|
47
|
-
|
|
48
|
-
agent-browser install # Download
|
|
37
|
+
cargo install agent-browser
|
|
38
|
+
agent-browser install # Download Chrome from Chrome for Testing (first time only)
|
|
49
39
|
```
|
|
50
40
|
|
|
51
41
|
### From Source
|
|
@@ -66,9 +56,13 @@ On Linux, install system dependencies:
|
|
|
66
56
|
|
|
67
57
|
```bash
|
|
68
58
|
agent-browser install --with-deps
|
|
69
|
-
# or manually: npx playwright install-deps chromium
|
|
70
59
|
```
|
|
71
60
|
|
|
61
|
+
### Requirements
|
|
62
|
+
|
|
63
|
+
- **Chrome** - Run `agent-browser install` to download Chrome from [Chrome for Testing](https://developer.chrome.com/blog/chrome-for-testing/) (Google's official automation channel). No Playwright or Node.js required for the daemon.
|
|
64
|
+
- **Rust** - Only needed when building from source (see From Source above).
|
|
65
|
+
|
|
72
66
|
## Quick Start
|
|
73
67
|
|
|
74
68
|
```bash
|
|
@@ -115,6 +109,8 @@ agent-browser drag <src> <tgt> # Drag and drop
|
|
|
115
109
|
agent-browser upload <sel> <files> # Upload files
|
|
116
110
|
agent-browser screenshot [path] # Take screenshot (--full for full page, saves to a temporary directory if no path)
|
|
117
111
|
agent-browser screenshot --annotate # Annotated screenshot with numbered element labels
|
|
112
|
+
agent-browser screenshot --screenshot-dir ./shots # Save to custom directory
|
|
113
|
+
agent-browser screenshot --screenshot-format jpeg --screenshot-quality 80
|
|
118
114
|
agent-browser pdf <path> # Save as PDF
|
|
119
115
|
agent-browser snapshot # Accessibility tree with refs (best for AI)
|
|
120
116
|
agent-browser eval <js> # Run JavaScript (-b for base64, --stdin for piped input)
|
|
@@ -165,6 +161,7 @@ agent-browser find nth <n> <sel> <action> [value] # Nth match
|
|
|
165
161
|
**Options:** `--name <name>` (filter role by accessible name), `--exact` (require exact text match)
|
|
166
162
|
|
|
167
163
|
**Examples:**
|
|
164
|
+
|
|
168
165
|
```bash
|
|
169
166
|
agent-browser find role button click --name "Submit"
|
|
170
167
|
agent-browser find text "Sign In" click
|
|
@@ -178,14 +175,27 @@ agent-browser find nth 2 "a" text
|
|
|
178
175
|
```bash
|
|
179
176
|
agent-browser wait <selector> # Wait for element to be visible
|
|
180
177
|
agent-browser wait <ms> # Wait for time (milliseconds)
|
|
181
|
-
agent-browser wait --text "Welcome" # Wait for text to appear
|
|
178
|
+
agent-browser wait --text "Welcome" # Wait for text to appear (substring match)
|
|
182
179
|
agent-browser wait --url "**/dash" # Wait for URL pattern
|
|
183
180
|
agent-browser wait --load networkidle # Wait for load state
|
|
184
181
|
agent-browser wait --fn "window.ready === true" # Wait for JS condition
|
|
182
|
+
|
|
183
|
+
# Wait for text/element to disappear
|
|
184
|
+
agent-browser wait --fn "!document.body.innerText.includes('Loading...')"
|
|
185
|
+
agent-browser wait "#spinner" --state hidden
|
|
185
186
|
```
|
|
186
187
|
|
|
187
188
|
**Load states:** `load`, `domcontentloaded`, `networkidle`
|
|
188
189
|
|
|
190
|
+
### Clipboard
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
agent-browser clipboard read # Read text from clipboard
|
|
194
|
+
agent-browser clipboard write "Hello, World!" # Write text to clipboard
|
|
195
|
+
agent-browser clipboard copy # Copy current selection (Ctrl+C)
|
|
196
|
+
agent-browser clipboard paste # Paste from clipboard (Ctrl+V)
|
|
197
|
+
```
|
|
198
|
+
|
|
189
199
|
### Mouse Control
|
|
190
200
|
|
|
191
201
|
```bash
|
|
@@ -306,7 +316,7 @@ agent-browser reload # Reload page
|
|
|
306
316
|
### Setup
|
|
307
317
|
|
|
308
318
|
```bash
|
|
309
|
-
agent-browser install # Download
|
|
319
|
+
agent-browser install # Download Chrome from Chrome for Testing (Google's official automation channel)
|
|
310
320
|
agent-browser install --with-deps # Also install system deps (Linux)
|
|
311
321
|
```
|
|
312
322
|
|
|
@@ -375,6 +385,7 @@ agent-browser session
|
|
|
375
385
|
```
|
|
376
386
|
|
|
377
387
|
Each session has its own:
|
|
388
|
+
|
|
378
389
|
- Browser instance
|
|
379
390
|
- Cookies and storage
|
|
380
391
|
- Navigation history
|
|
@@ -396,6 +407,7 @@ AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
|
|
|
396
407
|
```
|
|
397
408
|
|
|
398
409
|
The profile directory stores:
|
|
410
|
+
|
|
399
411
|
- Cookies and localStorage
|
|
400
412
|
- IndexedDB data
|
|
401
413
|
- Service workers
|
|
@@ -432,10 +444,10 @@ export AGENT_BROWSER_ENCRYPTION_KEY=<64-char-hex-key>
|
|
|
432
444
|
agent-browser --session-name secure open example.com
|
|
433
445
|
```
|
|
434
446
|
|
|
435
|
-
| Variable
|
|
436
|
-
|
|
437
|
-
| `AGENT_BROWSER_SESSION_NAME`
|
|
438
|
-
| `AGENT_BROWSER_ENCRYPTION_KEY`
|
|
447
|
+
| Variable | Description |
|
|
448
|
+
| --------------------------------- | -------------------------------------------------- |
|
|
449
|
+
| `AGENT_BROWSER_SESSION_NAME` | Auto-save/load state persistence name |
|
|
450
|
+
| `AGENT_BROWSER_ENCRYPTION_KEY` | 64-char hex key for AES-256-GCM encryption |
|
|
439
451
|
| `AGENT_BROWSER_STATE_EXPIRE_DAYS` | Auto-delete states older than N days (default: 30) |
|
|
440
452
|
|
|
441
453
|
## Security
|
|
@@ -449,14 +461,14 @@ agent-browser includes security features for safe AI agent deployments. All feat
|
|
|
449
461
|
- **Action Confirmation** -- Require explicit approval for sensitive action categories: `--confirm-actions eval,download`
|
|
450
462
|
- **Output Length Limits** -- Prevent context flooding: `--max-output 50000`
|
|
451
463
|
|
|
452
|
-
| Variable
|
|
453
|
-
|
|
454
|
-
| `AGENT_BROWSER_CONTENT_BOUNDARIES`
|
|
455
|
-
| `AGENT_BROWSER_MAX_OUTPUT`
|
|
456
|
-
| `AGENT_BROWSER_ALLOWED_DOMAINS`
|
|
457
|
-
| `AGENT_BROWSER_ACTION_POLICY`
|
|
458
|
-
| `AGENT_BROWSER_CONFIRM_ACTIONS`
|
|
459
|
-
| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts
|
|
464
|
+
| Variable | Description |
|
|
465
|
+
| ----------------------------------- | ---------------------------------------- |
|
|
466
|
+
| `AGENT_BROWSER_CONTENT_BOUNDARIES` | Wrap page output in boundary markers |
|
|
467
|
+
| `AGENT_BROWSER_MAX_OUTPUT` | Max characters for page output |
|
|
468
|
+
| `AGENT_BROWSER_ALLOWED_DOMAINS` | Comma-separated allowed domain patterns |
|
|
469
|
+
| `AGENT_BROWSER_ACTION_POLICY` | Path to action policy JSON file |
|
|
470
|
+
| `AGENT_BROWSER_CONFIRM_ACTIONS` | Action categories requiring confirmation |
|
|
471
|
+
| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts |
|
|
460
472
|
|
|
461
473
|
See [Security documentation](https://agent-browser.dev/security) for details.
|
|
462
474
|
|
|
@@ -474,13 +486,13 @@ agent-browser snapshot -s "#main" # Scope to CSS selector
|
|
|
474
486
|
agent-browser snapshot -i -c -d 5 # Combine options
|
|
475
487
|
```
|
|
476
488
|
|
|
477
|
-
| Option
|
|
478
|
-
|
|
479
|
-
| `-i, --interactive`
|
|
480
|
-
| `-C, --cursor`
|
|
481
|
-
| `-c, --compact`
|
|
482
|
-
| `-d, --depth <n>`
|
|
483
|
-
| `-s, --selector <sel>` | Scope to CSS selector
|
|
489
|
+
| Option | Description |
|
|
490
|
+
| ---------------------- | ----------------------------------------------------------------------- |
|
|
491
|
+
| `-i, --interactive` | Only show interactive elements (buttons, links, inputs) |
|
|
492
|
+
| `-C, --cursor` | Include cursor-interactive elements (cursor:pointer, onclick, tabindex) |
|
|
493
|
+
| `-c, --compact` | Remove empty structural elements |
|
|
494
|
+
| `-d, --depth <n>` | Limit tree depth |
|
|
495
|
+
| `-s, --selector <sel>` | Scope to CSS selector |
|
|
484
496
|
|
|
485
497
|
The `-C` flag is useful for modern web apps that use custom clickable elements (divs, spans) instead of standard buttons/links.
|
|
486
498
|
|
|
@@ -488,7 +500,7 @@ The `-C` flag is useful for modern web apps that use custom clickable elements (
|
|
|
488
500
|
|
|
489
501
|
The `--annotate` flag overlays numbered labels on interactive elements in the screenshot. Each label `[N]` corresponds to ref `@eN`, so the same refs work for both visual and text-based workflows.
|
|
490
502
|
|
|
491
|
-
|
|
503
|
+
Annotated screenshots are supported on the CDP-backed browser path (Chrome/Lightpanda). The Safari/WebDriver backend does not yet support `--annotate`.
|
|
492
504
|
|
|
493
505
|
```bash
|
|
494
506
|
agent-browser screenshot --annotate
|
|
@@ -529,6 +541,9 @@ This is useful for multimodal AI models that can reason about visual layout, unl
|
|
|
529
541
|
| `--json` | JSON output (for agents) |
|
|
530
542
|
| `--full, -f` | Full page screenshot |
|
|
531
543
|
| `--annotate` | Annotated screenshot with numbered element labels (or `AGENT_BROWSER_ANNOTATE` env) |
|
|
544
|
+
| `--screenshot-dir <path>` | Default screenshot output directory (or `AGENT_BROWSER_SCREENSHOT_DIR` env) |
|
|
545
|
+
| `--screenshot-quality <n>` | JPEG quality 0-100 (or `AGENT_BROWSER_SCREENSHOT_QUALITY` env) |
|
|
546
|
+
| `--screenshot-format <fmt>` | Screenshot format: `png`, `jpeg` (or `AGENT_BROWSER_SCREENSHOT_FORMAT` env) |
|
|
532
547
|
| `--headed` | Show browser window (not headless) (or `AGENT_BROWSER_HEADED` env) |
|
|
533
548
|
| `--cdp <port\|url>` | Connect via Chrome DevTools Protocol (port or WebSocket URL) |
|
|
534
549
|
| `--auto-connect` | Auto-discover and connect to running Chrome (or `AGENT_BROWSER_AUTO_CONNECT` env) |
|
|
@@ -540,8 +555,7 @@ This is useful for multimodal AI models that can reason about visual layout, unl
|
|
|
540
555
|
| `--action-policy <path>` | Path to action policy JSON file (or `AGENT_BROWSER_ACTION_POLICY` env) |
|
|
541
556
|
| `--confirm-actions <list>` | Action categories requiring confirmation (or `AGENT_BROWSER_CONFIRM_ACTIONS` env) |
|
|
542
557
|
| `--confirm-interactive` | Interactive confirmation prompts; auto-denies if stdin is not a TTY (or `AGENT_BROWSER_CONFIRM_INTERACTIVE` env) |
|
|
543
|
-
| `--engine <name>` | Browser engine: `chrome` (default), `lightpanda
|
|
544
|
-
| `--native` | [Experimental] Use native Rust daemon instead of Node.js (or `AGENT_BROWSER_NATIVE` env) |
|
|
558
|
+
| `--engine <name>` | Browser engine: `chrome` (default), `lightpanda` (or `AGENT_BROWSER_ENGINE` env) |
|
|
545
559
|
| `--config <path>` | Use a custom config file (or `AGENT_BROWSER_CONFIG` env) |
|
|
546
560
|
| `--debug` | Debug output |
|
|
547
561
|
|
|
@@ -585,7 +599,7 @@ Auto-discovered config files that are missing are silently ignored. If `--config
|
|
|
585
599
|
|
|
586
600
|
## Default Timeout
|
|
587
601
|
|
|
588
|
-
The default
|
|
602
|
+
The default timeout for standard operations (clicks, waits, fills, etc.) is 25 seconds. This is intentionally below the CLI's 30-second IPC read timeout so that the daemon returns a proper error instead of the CLI timing out with EAGAIN.
|
|
589
603
|
|
|
590
604
|
Override the default timeout via environment variable:
|
|
591
605
|
|
|
@@ -594,11 +608,11 @@ Override the default timeout via environment variable:
|
|
|
594
608
|
export AGENT_BROWSER_DEFAULT_TIMEOUT=45000
|
|
595
609
|
```
|
|
596
610
|
|
|
597
|
-
> **Note:** Setting this above 30000 (30s) may cause EAGAIN errors on slow operations because the CLI's read timeout will expire before
|
|
611
|
+
> **Note:** Setting this above 30000 (30s) may cause EAGAIN errors on slow operations because the CLI's read timeout will expire before the daemon responds. The CLI retries transient errors automatically, but response times will increase.
|
|
598
612
|
|
|
599
|
-
| Variable
|
|
600
|
-
|
|
601
|
-
| `AGENT_BROWSER_DEFAULT_TIMEOUT` | Default
|
|
613
|
+
| Variable | Description |
|
|
614
|
+
| ------------------------------- | ---------------------------------------- |
|
|
615
|
+
| `AGENT_BROWSER_DEFAULT_TIMEOUT` | Default operation timeout in ms (default: 25000) |
|
|
602
616
|
|
|
603
617
|
## Selectors
|
|
604
618
|
|
|
@@ -623,6 +637,7 @@ agent-browser hover @e4 # Hover the link
|
|
|
623
637
|
```
|
|
624
638
|
|
|
625
639
|
**Why use refs?**
|
|
640
|
+
|
|
626
641
|
- **Deterministic**: Ref points to exact element from snapshot
|
|
627
642
|
- **Fast**: No DOM re-query needed
|
|
628
643
|
- **AI-friendly**: Snapshot + ref workflow is optimal for LLMs
|
|
@@ -723,6 +738,7 @@ agent-browser open other-site.com
|
|
|
723
738
|
```
|
|
724
739
|
|
|
725
740
|
This is useful for:
|
|
741
|
+
|
|
726
742
|
- **Skipping login flows** - Authenticate via headers instead of UI
|
|
727
743
|
- **Switching users** - Start new sessions with different auth tokens
|
|
728
744
|
- **API testing** - Access protected endpoints directly
|
|
@@ -744,6 +760,7 @@ agent-browser set headers '{"X-Custom-Header": "value"}'
|
|
|
744
760
|
## Custom Browser Executable
|
|
745
761
|
|
|
746
762
|
Use a custom browser executable instead of the bundled Chromium. This is useful for:
|
|
763
|
+
|
|
747
764
|
- **Serverless deployment**: Use lightweight Chromium builds like `@sparticuz/chromium` (~50MB vs ~684MB)
|
|
748
765
|
- **System browsers**: Use an existing Chrome/Chromium installation
|
|
749
766
|
- **Custom builds**: Use modified browser builds
|
|
@@ -804,6 +821,7 @@ agent-browser screenshot report.png
|
|
|
804
821
|
```
|
|
805
822
|
|
|
806
823
|
The `--allow-file-access` flag adds Chromium flags (`--allow-file-access-from-files`, `--allow-file-access`) that allow `file://` URLs to:
|
|
824
|
+
|
|
807
825
|
- Load and render local files
|
|
808
826
|
- Access other local files via JavaScript (XHR, fetch)
|
|
809
827
|
- Load local resources (images, scripts, stylesheets)
|
|
@@ -831,10 +849,12 @@ agent-browser --cdp "wss://your-browser-service.com/cdp?token=..." snapshot
|
|
|
831
849
|
```
|
|
832
850
|
|
|
833
851
|
The `--cdp` flag accepts either:
|
|
852
|
+
|
|
834
853
|
- A port number (e.g., `9222`) for local connections via `http://localhost:{port}`
|
|
835
854
|
- A full WebSocket URL (e.g., `wss://...` or `ws://...`) for remote browser services
|
|
836
855
|
|
|
837
856
|
This enables control of:
|
|
857
|
+
|
|
838
858
|
- Electron apps
|
|
839
859
|
- Chrome/Chromium instances with remote debugging
|
|
840
860
|
- WebView2 applications
|
|
@@ -854,10 +874,12 @@ AGENT_BROWSER_AUTO_CONNECT=1 agent-browser snapshot
|
|
|
854
874
|
```
|
|
855
875
|
|
|
856
876
|
Auto-connect discovers Chrome by:
|
|
877
|
+
|
|
857
878
|
1. Reading Chrome's `DevToolsActivePort` file from the default user data directory
|
|
858
879
|
2. Falling back to probing common debugging ports (9222, 9229)
|
|
859
880
|
|
|
860
881
|
This is useful when:
|
|
882
|
+
|
|
861
883
|
- Chrome 144+ has remote debugging enabled via `chrome://inspect/#remote-debugging` (which uses a dynamic port)
|
|
862
884
|
- You want a zero-configuration connection to your existing browser
|
|
863
885
|
- You don't want to track which port Chrome is using
|
|
@@ -881,6 +903,7 @@ This starts a WebSocket server on the specified port that streams the browser vi
|
|
|
881
903
|
Connect to `ws://localhost:9223` to receive frames and send input:
|
|
882
904
|
|
|
883
905
|
**Receive frames:**
|
|
906
|
+
|
|
884
907
|
```json
|
|
885
908
|
{
|
|
886
909
|
"type": "frame",
|
|
@@ -897,6 +920,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
897
920
|
```
|
|
898
921
|
|
|
899
922
|
**Send mouse events:**
|
|
923
|
+
|
|
900
924
|
```json
|
|
901
925
|
{
|
|
902
926
|
"type": "input_mouse",
|
|
@@ -909,6 +933,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
909
933
|
```
|
|
910
934
|
|
|
911
935
|
**Send keyboard events:**
|
|
936
|
+
|
|
912
937
|
```json
|
|
913
938
|
{
|
|
914
939
|
"type": "input_keyboard",
|
|
@@ -919,6 +944,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
919
944
|
```
|
|
920
945
|
|
|
921
946
|
**Send touch events:**
|
|
947
|
+
|
|
922
948
|
```json
|
|
923
949
|
{
|
|
924
950
|
"type": "input_touch",
|
|
@@ -939,16 +965,19 @@ await browser.launch({ headless: true });
|
|
|
939
965
|
await browser.navigate('https://example.com');
|
|
940
966
|
|
|
941
967
|
// Start screencast
|
|
942
|
-
await browser.startScreencast(
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
968
|
+
await browser.startScreencast(
|
|
969
|
+
(frame) => {
|
|
970
|
+
// frame.data is base64-encoded image
|
|
971
|
+
// frame.metadata contains viewport info
|
|
972
|
+
console.log('Frame received:', frame.metadata.deviceWidth, 'x', frame.metadata.deviceHeight);
|
|
973
|
+
},
|
|
974
|
+
{
|
|
975
|
+
format: 'jpeg',
|
|
976
|
+
quality: 80,
|
|
977
|
+
maxWidth: 1280,
|
|
978
|
+
maxHeight: 720,
|
|
979
|
+
}
|
|
980
|
+
);
|
|
952
981
|
|
|
953
982
|
// Inject mouse events
|
|
954
983
|
await browser.injectMouseEvent({
|
|
@@ -973,61 +1002,22 @@ await browser.stopScreencast();
|
|
|
973
1002
|
|
|
974
1003
|
agent-browser uses a client-daemon architecture:
|
|
975
1004
|
|
|
976
|
-
1. **Rust CLI**
|
|
977
|
-
2. **
|
|
978
|
-
3. **Native Daemon** (experimental, `--native`) - Pure Rust daemon using direct CDP, no Node.js required
|
|
979
|
-
4. **Fallback** - If native binary unavailable, uses Node.js directly
|
|
980
|
-
|
|
981
|
-
The daemon starts automatically on first command and persists between commands for fast subsequent operations.
|
|
1005
|
+
1. **Rust CLI** - Parses commands, communicates with daemon
|
|
1006
|
+
2. **Rust Daemon** - Pure Rust daemon using direct CDP, no Node.js required
|
|
982
1007
|
|
|
983
|
-
|
|
1008
|
+
The daemon starts automatically on first command and persists between commands for fast subsequent operations. To auto-shutdown the daemon after a period of inactivity, set `AGENT_BROWSER_IDLE_TIMEOUT_MS` (value in milliseconds). When set, the daemon closes the browser and exits after receiving no commands for the specified duration.
|
|
984
1009
|
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
The native daemon is a pure Rust implementation that communicates with Chrome directly via CDP, eliminating the Node.js and Playwright dependencies. It is currently **experimental** and opt-in.
|
|
988
|
-
|
|
989
|
-
### Enabling Native Mode
|
|
990
|
-
|
|
991
|
-
```bash
|
|
992
|
-
# Via flag
|
|
993
|
-
agent-browser --native open example.com
|
|
994
|
-
|
|
995
|
-
# Via environment variable (recommended for persistent use)
|
|
996
|
-
export AGENT_BROWSER_NATIVE=1
|
|
997
|
-
agent-browser open example.com
|
|
998
|
-
```
|
|
999
|
-
|
|
1000
|
-
Or add to your config file (`agent-browser.json`):
|
|
1001
|
-
|
|
1002
|
-
```json
|
|
1003
|
-
{"native": true}
|
|
1004
|
-
```
|
|
1005
|
-
|
|
1006
|
-
### What's Different
|
|
1007
|
-
|
|
1008
|
-
| | Default (Node.js) | Native (`--native`) |
|
|
1009
|
-
|---|---|---|
|
|
1010
|
-
| **Runtime** | Node.js + Playwright | Pure Rust binary |
|
|
1011
|
-
| **Protocol** | Playwright protocol | Direct CDP / WebDriver |
|
|
1012
|
-
| **Install size** | Larger (Node.js + npm deps) | Smaller (single binary) |
|
|
1013
|
-
| **Browser support** | Chromium, Firefox, WebKit | Chromium, Safari (via WebDriver) |
|
|
1014
|
-
| **Stability** | Stable | Experimental |
|
|
1015
|
-
|
|
1016
|
-
### Known Limitations
|
|
1017
|
-
|
|
1018
|
-
- Firefox and WebKit are not yet supported (Chromium and Safari only)
|
|
1019
|
-
- Some Playwright-specific features (tracing format, HAR export) are not available
|
|
1020
|
-
- The native daemon and Node.js daemon share the same session socket, so you cannot run both simultaneously for the same session. Use `agent-browser close` before switching modes.
|
|
1010
|
+
**Browser Engine:** Uses Chrome (from Chrome for Testing) by default. The `--engine` flag selects between `chrome` and `lightpanda`. Supported browsers: Chromium/Chrome (via CDP) and Safari (via WebDriver for iOS).
|
|
1021
1011
|
|
|
1022
1012
|
## Platforms
|
|
1023
1013
|
|
|
1024
|
-
| Platform
|
|
1025
|
-
|
|
1026
|
-
| macOS ARM64 | Native Rust |
|
|
1027
|
-
| macOS x64
|
|
1028
|
-
| Linux ARM64 | Native Rust |
|
|
1029
|
-
| Linux x64
|
|
1030
|
-
| Windows x64 | Native Rust |
|
|
1014
|
+
| Platform | Binary |
|
|
1015
|
+
| ----------- | ----------- |
|
|
1016
|
+
| macOS ARM64 | Native Rust |
|
|
1017
|
+
| macOS x64 | Native Rust |
|
|
1018
|
+
| Linux ARM64 | Native Rust |
|
|
1019
|
+
| Linux x64 | Native Rust |
|
|
1020
|
+
| Windows x64 | Native Rust |
|
|
1031
1021
|
|
|
1032
1022
|
## Usage with AI Agents
|
|
1033
1023
|
|
|
@@ -1071,6 +1061,7 @@ For more consistent results, add to your project or global instructions file:
|
|
|
1071
1061
|
Use `agent-browser` for web automation. Run `agent-browser --help` for all commands.
|
|
1072
1062
|
|
|
1073
1063
|
Core workflow:
|
|
1064
|
+
|
|
1074
1065
|
1. `agent-browser open <url>` - Navigate to page
|
|
1075
1066
|
2. `agent-browser snapshot -i` - Get interactive elements with refs (@e1, @e2)
|
|
1076
1067
|
3. `agent-browser click @e1` / `fill @e2 "text"` - Interact using refs
|
|
@@ -1122,11 +1113,11 @@ export AGENT_BROWSER_IOS_DEVICE="iPhone 16 Pro"
|
|
|
1122
1113
|
agent-browser open https://example.com
|
|
1123
1114
|
```
|
|
1124
1115
|
|
|
1125
|
-
| Variable
|
|
1126
|
-
|
|
1127
|
-
| `AGENT_BROWSER_PROVIDER`
|
|
1116
|
+
| Variable | Description |
|
|
1117
|
+
| -------------------------- | ----------------------------------------------- |
|
|
1118
|
+
| `AGENT_BROWSER_PROVIDER` | Set to `ios` to enable iOS mode |
|
|
1128
1119
|
| `AGENT_BROWSER_IOS_DEVICE` | Device name (e.g., "iPhone 16 Pro", "iPad Pro") |
|
|
1129
|
-
| `AGENT_BROWSER_IOS_UDID`
|
|
1120
|
+
| `AGENT_BROWSER_IOS_UDID` | Device UDID (alternative to device name) |
|
|
1130
1121
|
|
|
1131
1122
|
**Supported devices:** All iOS Simulators available in Xcode (iPhones, iPads), plus real iOS devices.
|
|
1132
1123
|
|
|
@@ -1137,6 +1128,7 @@ agent-browser open https://example.com
|
|
|
1137
1128
|
Appium also supports real iOS devices connected via USB. This requires additional one-time setup:
|
|
1138
1129
|
|
|
1139
1130
|
**1. Get your device UDID:**
|
|
1131
|
+
|
|
1140
1132
|
```bash
|
|
1141
1133
|
xcrun xctrace list devices
|
|
1142
1134
|
# or
|
|
@@ -1144,6 +1136,7 @@ system_profiler SPUSBDataType | grep -A 5 "iPhone\|iPad"
|
|
|
1144
1136
|
```
|
|
1145
1137
|
|
|
1146
1138
|
**2. Sign WebDriverAgent (one-time):**
|
|
1139
|
+
|
|
1147
1140
|
```bash
|
|
1148
1141
|
# Open the WebDriverAgent Xcode project
|
|
1149
1142
|
cd ~/.appium/node_modules/appium-xcuitest-driver/node_modules/appium-webdriveragent
|
|
@@ -1151,12 +1144,14 @@ open WebDriverAgent.xcodeproj
|
|
|
1151
1144
|
```
|
|
1152
1145
|
|
|
1153
1146
|
In Xcode:
|
|
1147
|
+
|
|
1154
1148
|
- Select the `WebDriverAgentRunner` target
|
|
1155
1149
|
- Go to Signing & Capabilities
|
|
1156
1150
|
- Select your Team (requires Apple Developer account, free tier works)
|
|
1157
1151
|
- Let Xcode manage signing automatically
|
|
1158
1152
|
|
|
1159
1153
|
**3. Use with agent-browser:**
|
|
1154
|
+
|
|
1160
1155
|
```bash
|
|
1161
1156
|
# Connect device via USB, then:
|
|
1162
1157
|
agent-browser -p ios --device "<DEVICE_UDID>" open https://example.com
|
|
@@ -1166,11 +1161,44 @@ agent-browser -p ios --device "John's iPhone" open https://example.com
|
|
|
1166
1161
|
```
|
|
1167
1162
|
|
|
1168
1163
|
**Real device notes:**
|
|
1164
|
+
|
|
1169
1165
|
- First run installs WebDriverAgent to the device (may require Trust prompt)
|
|
1170
1166
|
- Device must be unlocked and connected via USB
|
|
1171
1167
|
- Slightly slower initial connection than simulator
|
|
1172
1168
|
- Tests against real Safari performance and behavior
|
|
1173
1169
|
|
|
1170
|
+
### Browserless
|
|
1171
|
+
|
|
1172
|
+
[Browserless](https://browserless.io) provides cloud browser infrastructure with a Sessions API. Use it when running agent-browser in environments where a local browser isn't available.
|
|
1173
|
+
|
|
1174
|
+
To enable Browserless, use the `-p` flag:
|
|
1175
|
+
|
|
1176
|
+
```bash
|
|
1177
|
+
export BROWSERLESS_API_KEY="your-api-token"
|
|
1178
|
+
agent-browser -p browserless open https://example.com
|
|
1179
|
+
```
|
|
1180
|
+
|
|
1181
|
+
Or use environment variables for CI/scripts:
|
|
1182
|
+
|
|
1183
|
+
```bash
|
|
1184
|
+
export AGENT_BROWSER_PROVIDER=browserless
|
|
1185
|
+
export BROWSERLESS_API_KEY="your-api-token"
|
|
1186
|
+
agent-browser open https://example.com
|
|
1187
|
+
```
|
|
1188
|
+
|
|
1189
|
+
Optional configuration via environment variables:
|
|
1190
|
+
|
|
1191
|
+
| Variable | Description | Default |
|
|
1192
|
+
| -------------------------- | ------------------------------------------------ | --------------------------------------- |
|
|
1193
|
+
| `BROWSERLESS_API_URL` | Base API URL (for custom regions or self-hosted) | `https://production-sfo.browserless.io` |
|
|
1194
|
+
| `BROWSERLESS_BROWSER_TYPE` | Type of browser to use (chromium or chrome) | chromium |
|
|
1195
|
+
| `BROWSERLESS_TTL` | Session TTL in milliseconds | `300000` |
|
|
1196
|
+
| `BROWSERLESS_STEALTH` | Enable stealth mode (`true`/`false`) | `true` |
|
|
1197
|
+
|
|
1198
|
+
When enabled, agent-browser connects to a Browserless cloud session instead of launching a local browser. All commands work identically.
|
|
1199
|
+
|
|
1200
|
+
Get your API token from the [Browserless Dashboard](https://browserless.io).
|
|
1201
|
+
|
|
1174
1202
|
### Browserbase
|
|
1175
1203
|
|
|
1176
1204
|
[Browserbase](https://browserbase.com) provides remote browser infrastructure to make deployment of agentic browsing agents easy. Use it when running the agent-browser CLI in an environment where a local browser isn't feasible.
|
|
@@ -1238,12 +1266,12 @@ agent-browser open https://example.com
|
|
|
1238
1266
|
|
|
1239
1267
|
Optional configuration via environment variables:
|
|
1240
1268
|
|
|
1241
|
-
| Variable
|
|
1242
|
-
|
|
1243
|
-
| `KERNEL_HEADLESS`
|
|
1244
|
-
| `KERNEL_STEALTH`
|
|
1245
|
-
| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds
|
|
1246
|
-
| `KERNEL_PROFILE_NAME`
|
|
1269
|
+
| Variable | Description | Default |
|
|
1270
|
+
| ------------------------ | -------------------------------------------------------------------------------- | ------- |
|
|
1271
|
+
| `KERNEL_HEADLESS` | Run browser in headless mode (`true`/`false`) | `false` |
|
|
1272
|
+
| `KERNEL_STEALTH` | Enable stealth mode to avoid bot detection (`true`/`false`) | `true` |
|
|
1273
|
+
| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds | `300` |
|
|
1274
|
+
| `KERNEL_PROFILE_NAME` | Browser profile name for persistent cookies/logins (created if it doesn't exist) | (none) |
|
|
1247
1275
|
|
|
1248
1276
|
When enabled, agent-browser connects to a Kernel cloud session instead of launching a local browser. All commands work identically.
|
|
1249
1277
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-browser",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.20.0",
|
|
4
4
|
"description": "Headless browser automation CLI for AI agents",
|
|
5
5
|
"type": "module",
|
|
6
|
-
"main": "dist/daemon.js",
|
|
7
6
|
"files": [
|
|
8
|
-
"dist",
|
|
9
7
|
"bin",
|
|
10
8
|
"scripts",
|
|
11
9
|
"skills"
|
|
@@ -17,7 +15,8 @@
|
|
|
17
15
|
"browser",
|
|
18
16
|
"automation",
|
|
19
17
|
"headless",
|
|
20
|
-
"
|
|
18
|
+
"chrome",
|
|
19
|
+
"cdp",
|
|
21
20
|
"cli",
|
|
22
21
|
"agent"
|
|
23
22
|
],
|
|
@@ -30,55 +29,22 @@
|
|
|
30
29
|
"url": "https://github.com/vercel-labs/agent-browser/issues"
|
|
31
30
|
},
|
|
32
31
|
"homepage": "https://github.com/vercel-labs/agent-browser#readme",
|
|
33
|
-
"dependencies": {
|
|
34
|
-
"node-simctl": "^7.4.0",
|
|
35
|
-
"playwright-core": "^1.57.0",
|
|
36
|
-
"webdriverio": "^9.15.0",
|
|
37
|
-
"ws": "^8.19.0",
|
|
38
|
-
"zod": "^3.22.4"
|
|
39
|
-
},
|
|
40
32
|
"devDependencies": {
|
|
41
|
-
"@
|
|
42
|
-
"@changesets/cli": "^2.29.8",
|
|
43
|
-
"@types/node": "^20.10.0",
|
|
44
|
-
"@types/ws": "^8.18.1",
|
|
45
|
-
"husky": "^9.1.7",
|
|
46
|
-
"lint-staged": "^15.2.11",
|
|
47
|
-
"playwright": "^1.57.0",
|
|
48
|
-
"prettier": "^3.7.4",
|
|
49
|
-
"tsx": "^4.6.0",
|
|
50
|
-
"typescript": "^5.3.0",
|
|
51
|
-
"vitest": "^4.0.16"
|
|
52
|
-
},
|
|
53
|
-
"lint-staged": {
|
|
54
|
-
"src/**/*.ts": "prettier --write"
|
|
33
|
+
"@changesets/cli": "^2.29.8"
|
|
55
34
|
},
|
|
56
35
|
"scripts": {
|
|
57
36
|
"version:sync": "node scripts/sync-version.js",
|
|
58
37
|
"version": "npm run version:sync && git add cli/Cargo.toml",
|
|
59
|
-
"build": "tsc",
|
|
60
38
|
"build:native": "npm run version:sync && cargo build --release --manifest-path cli/Cargo.toml && node scripts/copy-native.js",
|
|
61
39
|
"build:linux": "npm run version:sync && docker compose -f docker/docker-compose.yml run --rm build-linux",
|
|
62
40
|
"build:macos": "npm run version:sync && (cargo build --release --manifest-path cli/Cargo.toml --target aarch64-apple-darwin & cargo build --release --manifest-path cli/Cargo.toml --target x86_64-apple-darwin & wait) && cp cli/target/aarch64-apple-darwin/release/agent-browser bin/agent-browser-darwin-arm64 && cp cli/target/x86_64-apple-darwin/release/agent-browser bin/agent-browser-darwin-x64",
|
|
63
41
|
"build:windows": "npm run version:sync && docker compose -f docker/docker-compose.yml run --rm build-windows",
|
|
64
42
|
"build:all-platforms": "npm run version:sync && (npm run build:linux & npm run build:windows & wait) && npm run build:macos",
|
|
65
43
|
"build:docker": "docker build -t agent-browser-builder -f docker/Dockerfile.build .",
|
|
66
|
-
"release": "npm run version:sync && npm run build
|
|
67
|
-
"start": "node dist/daemon.js",
|
|
68
|
-
"dev": "tsx src/daemon.ts",
|
|
69
|
-
"typecheck": "tsc --noEmit",
|
|
70
|
-
"format": "prettier --write 'src/**/*.ts'",
|
|
71
|
-
"format:check": "prettier --check 'src/**/*.ts'",
|
|
72
|
-
"test": "vitest run",
|
|
73
|
-
"test:watch": "vitest",
|
|
74
|
-
"test:e2e:dogfood": "vitest run test/e2e/dogfood.eval.ts",
|
|
75
|
-
"bench": "pnpm build && tsx test/benchmarks/run.ts",
|
|
76
|
-
"bench:node": "pnpm build && tsx test/benchmarks/run.ts --node-only",
|
|
77
|
-
"bench:native": "pnpm build && tsx test/benchmarks/run.ts --native-only",
|
|
78
|
-
"bench:engine": "pnpm build && tsx test/benchmarks/run.ts --engine",
|
|
44
|
+
"release": "npm run version:sync && npm run build:all-platforms && npm publish",
|
|
79
45
|
"postinstall": "node scripts/postinstall.js",
|
|
80
46
|
"changeset": "changeset",
|
|
81
47
|
"ci:version": "changeset version && pnpm run version:sync && pnpm install --no-frozen-lockfile",
|
|
82
|
-
"ci:publish": "pnpm run version:sync &&
|
|
48
|
+
"ci:publish": "pnpm run version:sync && changeset publish"
|
|
83
49
|
}
|
|
84
50
|
}
|