agent-browser 0.17.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +194 -62
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/bin/agent-browser-darwin-x64 +0 -0
- package/bin/agent-browser-linux-arm64 +0 -0
- package/bin/agent-browser-linux-x64 +0 -0
- package/bin/agent-browser-win32-x64.exe +0 -0
- package/dist/actions.d.ts.map +1 -1
- package/dist/actions.js +108 -45
- package/dist/actions.js.map +1 -1
- package/dist/browser.d.ts +50 -4
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +245 -38
- package/dist/browser.js.map +1 -1
- package/dist/daemon.d.ts +5 -0
- package/dist/daemon.d.ts.map +1 -1
- package/dist/daemon.js +62 -3
- package/dist/daemon.js.map +1 -1
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +5 -0
- package/dist/index.js.map +1 -0
- package/dist/inspect-server.d.ts +26 -0
- package/dist/inspect-server.d.ts.map +1 -0
- package/dist/inspect-server.js +218 -0
- package/dist/inspect-server.js.map +1 -0
- package/dist/protocol.d.ts +3 -1
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +7 -3
- package/dist/protocol.js.map +1 -1
- package/dist/types.d.ts +10 -2
- package/dist/types.d.ts.map +1 -1
- package/package.json +11 -2
- package/skills/agent-browser/SKILL.md +116 -14
- package/skills/agent-browser/references/authentication.md +101 -0
- package/skills/agent-browser/references/commands.md +2 -0
- package/skills/vercel-sandbox/SKILL.md +280 -0
package/README.md
CHANGED
|
@@ -115,6 +115,8 @@ agent-browser drag <src> <tgt> # Drag and drop
|
|
|
115
115
|
agent-browser upload <sel> <files> # Upload files
|
|
116
116
|
agent-browser screenshot [path] # Take screenshot (--full for full page, saves to a temporary directory if no path)
|
|
117
117
|
agent-browser screenshot --annotate # Annotated screenshot with numbered element labels
|
|
118
|
+
agent-browser screenshot --screenshot-dir ./shots # Save to custom directory
|
|
119
|
+
agent-browser screenshot --screenshot-format jpeg --screenshot-quality 80
|
|
118
120
|
agent-browser pdf <path> # Save as PDF
|
|
119
121
|
agent-browser snapshot # Accessibility tree with refs (best for AI)
|
|
120
122
|
agent-browser eval <js> # Run JavaScript (-b for base64, --stdin for piped input)
|
|
@@ -131,6 +133,7 @@ agent-browser get value <sel> # Get input value
|
|
|
131
133
|
agent-browser get attr <sel> <attr> # Get attribute
|
|
132
134
|
agent-browser get title # Get page title
|
|
133
135
|
agent-browser get url # Get current URL
|
|
136
|
+
agent-browser get cdp-url # Get CDP WebSocket URL (for DevTools, debugging)
|
|
134
137
|
agent-browser get count <sel> # Count matching elements
|
|
135
138
|
agent-browser get box <sel> # Get bounding box
|
|
136
139
|
agent-browser get styles <sel> # Get computed styles
|
|
@@ -164,6 +167,7 @@ agent-browser find nth <n> <sel> <action> [value] # Nth match
|
|
|
164
167
|
**Options:** `--name <name>` (filter role by accessible name), `--exact` (require exact text match)
|
|
165
168
|
|
|
166
169
|
**Examples:**
|
|
170
|
+
|
|
167
171
|
```bash
|
|
168
172
|
agent-browser find role button click --name "Submit"
|
|
169
173
|
agent-browser find text "Sign In" click
|
|
@@ -177,14 +181,27 @@ agent-browser find nth 2 "a" text
|
|
|
177
181
|
```bash
|
|
178
182
|
agent-browser wait <selector> # Wait for element to be visible
|
|
179
183
|
agent-browser wait <ms> # Wait for time (milliseconds)
|
|
180
|
-
agent-browser wait --text "Welcome" # Wait for text to appear
|
|
184
|
+
agent-browser wait --text "Welcome" # Wait for text to appear (substring match)
|
|
181
185
|
agent-browser wait --url "**/dash" # Wait for URL pattern
|
|
182
186
|
agent-browser wait --load networkidle # Wait for load state
|
|
183
187
|
agent-browser wait --fn "window.ready === true" # Wait for JS condition
|
|
188
|
+
|
|
189
|
+
# Wait for text/element to disappear
|
|
190
|
+
agent-browser wait --fn "!document.body.innerText.includes('Loading...')"
|
|
191
|
+
agent-browser wait "#spinner" --state hidden
|
|
184
192
|
```
|
|
185
193
|
|
|
186
194
|
**Load states:** `load`, `domcontentloaded`, `networkidle`
|
|
187
195
|
|
|
196
|
+
### Clipboard
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
agent-browser clipboard read # Read text from clipboard
|
|
200
|
+
agent-browser clipboard write "Hello, World!" # Write text to clipboard
|
|
201
|
+
agent-browser clipboard copy # Copy current selection (Ctrl+C)
|
|
202
|
+
agent-browser clipboard paste # Paste from clipboard (Ctrl+V)
|
|
203
|
+
```
|
|
204
|
+
|
|
188
205
|
### Mouse Control
|
|
189
206
|
|
|
190
207
|
```bash
|
|
@@ -283,6 +300,7 @@ agent-browser console --clear # Clear console
|
|
|
283
300
|
agent-browser errors # View page errors (uncaught JavaScript exceptions)
|
|
284
301
|
agent-browser errors --clear # Clear errors
|
|
285
302
|
agent-browser highlight <sel> # Highlight element
|
|
303
|
+
agent-browser inspect # Open Chrome DevTools for the active page
|
|
286
304
|
agent-browser state save <path> # Save auth state
|
|
287
305
|
agent-browser state load <path> # Load auth state
|
|
288
306
|
agent-browser state list # List saved state files
|
|
@@ -308,6 +326,47 @@ agent-browser install # Download Chromium browser
|
|
|
308
326
|
agent-browser install --with-deps # Also install system deps (Linux)
|
|
309
327
|
```
|
|
310
328
|
|
|
329
|
+
## Authentication
|
|
330
|
+
|
|
331
|
+
agent-browser provides multiple ways to persist login sessions so you don't re-authenticate every run.
|
|
332
|
+
|
|
333
|
+
### Quick summary
|
|
334
|
+
|
|
335
|
+
| Approach | Best for | Flag / Env |
|
|
336
|
+
|----------|----------|------------|
|
|
337
|
+
| **Persistent profile** | Full browser state (cookies, IndexedDB, service workers, cache) across restarts | `--profile <path>` / `AGENT_BROWSER_PROFILE` |
|
|
338
|
+
| **Session persistence** | Auto-save/restore cookies + localStorage by name | `--session-name <name>` / `AGENT_BROWSER_SESSION_NAME` |
|
|
339
|
+
| **Import from your browser** | Grab auth from a Chrome session you already logged into | `--auto-connect` + `state save` |
|
|
340
|
+
| **State file** | Load a previously saved state JSON on launch | `--state <path>` / `AGENT_BROWSER_STATE` |
|
|
341
|
+
| **Auth vault** | Store credentials locally (encrypted), login by name | `auth save` / `auth login` |
|
|
342
|
+
|
|
343
|
+
### Import auth from your browser
|
|
344
|
+
|
|
345
|
+
If you are already logged in to a site in Chrome, you can grab that auth state and reuse it:
|
|
346
|
+
|
|
347
|
+
```bash
|
|
348
|
+
# 1. Launch Chrome with remote debugging enabled
|
|
349
|
+
# macOS:
|
|
350
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=9222
|
|
351
|
+
# Or use --auto-connect to discover an already-running Chrome
|
|
352
|
+
|
|
353
|
+
# 2. Connect and save the authenticated state
|
|
354
|
+
agent-browser --auto-connect state save ./my-auth.json
|
|
355
|
+
|
|
356
|
+
# 3. Use the saved auth in future sessions
|
|
357
|
+
agent-browser --state ./my-auth.json open https://app.example.com/dashboard
|
|
358
|
+
|
|
359
|
+
# 4. Or use --session-name for automatic persistence
|
|
360
|
+
agent-browser --session-name myapp state load ./my-auth.json
|
|
361
|
+
# From now on, --session-name myapp auto-saves/restores this state
|
|
362
|
+
```
|
|
363
|
+
|
|
364
|
+
> **Security notes:**
|
|
365
|
+
> - `--remote-debugging-port` exposes full browser control on localhost. Any local process can connect. Only use on trusted machines and close Chrome when done.
|
|
366
|
+
> - State files contain session tokens in plaintext. Add them to `.gitignore` and delete when no longer needed. For encryption at rest, set `AGENT_BROWSER_ENCRYPTION_KEY` (see [State Encryption](#state-encryption)).
|
|
367
|
+
|
|
368
|
+
For full details on login flows, OAuth, 2FA, cookie-based auth, and the auth vault, see the [Authentication](docs/src/app/sessions/page.mdx) docs.
|
|
369
|
+
|
|
311
370
|
## Sessions
|
|
312
371
|
|
|
313
372
|
Run multiple isolated browser instances:
|
|
@@ -332,6 +391,7 @@ agent-browser session
|
|
|
332
391
|
```
|
|
333
392
|
|
|
334
393
|
Each session has its own:
|
|
394
|
+
|
|
335
395
|
- Browser instance
|
|
336
396
|
- Cookies and storage
|
|
337
397
|
- Navigation history
|
|
@@ -353,6 +413,7 @@ AGENT_BROWSER_PROFILE=~/.myapp-profile agent-browser open myapp.com
|
|
|
353
413
|
```
|
|
354
414
|
|
|
355
415
|
The profile directory stores:
|
|
416
|
+
|
|
356
417
|
- Cookies and localStorage
|
|
357
418
|
- IndexedDB data
|
|
358
419
|
- Service workers
|
|
@@ -389,10 +450,10 @@ export AGENT_BROWSER_ENCRYPTION_KEY=<64-char-hex-key>
|
|
|
389
450
|
agent-browser --session-name secure open example.com
|
|
390
451
|
```
|
|
391
452
|
|
|
392
|
-
| Variable
|
|
393
|
-
|
|
394
|
-
| `AGENT_BROWSER_SESSION_NAME`
|
|
395
|
-
| `AGENT_BROWSER_ENCRYPTION_KEY`
|
|
453
|
+
| Variable | Description |
|
|
454
|
+
| --------------------------------- | -------------------------------------------------- |
|
|
455
|
+
| `AGENT_BROWSER_SESSION_NAME` | Auto-save/load state persistence name |
|
|
456
|
+
| `AGENT_BROWSER_ENCRYPTION_KEY` | 64-char hex key for AES-256-GCM encryption |
|
|
396
457
|
| `AGENT_BROWSER_STATE_EXPIRE_DAYS` | Auto-delete states older than N days (default: 30) |
|
|
397
458
|
|
|
398
459
|
## Security
|
|
@@ -406,16 +467,16 @@ agent-browser includes security features for safe AI agent deployments. All feat
|
|
|
406
467
|
- **Action Confirmation** -- Require explicit approval for sensitive action categories: `--confirm-actions eval,download`
|
|
407
468
|
- **Output Length Limits** -- Prevent context flooding: `--max-output 50000`
|
|
408
469
|
|
|
409
|
-
| Variable
|
|
410
|
-
|
|
411
|
-
| `AGENT_BROWSER_CONTENT_BOUNDARIES`
|
|
412
|
-
| `AGENT_BROWSER_MAX_OUTPUT`
|
|
413
|
-
| `AGENT_BROWSER_ALLOWED_DOMAINS`
|
|
414
|
-
| `AGENT_BROWSER_ACTION_POLICY`
|
|
415
|
-
| `AGENT_BROWSER_CONFIRM_ACTIONS`
|
|
416
|
-
| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts
|
|
470
|
+
| Variable | Description |
|
|
471
|
+
| ----------------------------------- | ---------------------------------------- |
|
|
472
|
+
| `AGENT_BROWSER_CONTENT_BOUNDARIES` | Wrap page output in boundary markers |
|
|
473
|
+
| `AGENT_BROWSER_MAX_OUTPUT` | Max characters for page output |
|
|
474
|
+
| `AGENT_BROWSER_ALLOWED_DOMAINS` | Comma-separated allowed domain patterns |
|
|
475
|
+
| `AGENT_BROWSER_ACTION_POLICY` | Path to action policy JSON file |
|
|
476
|
+
| `AGENT_BROWSER_CONFIRM_ACTIONS` | Action categories requiring confirmation |
|
|
477
|
+
| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts |
|
|
417
478
|
|
|
418
|
-
See [Security documentation](https://agent-browser.
|
|
479
|
+
See [Security documentation](https://agent-browser.dev/security) for details.
|
|
419
480
|
|
|
420
481
|
## Snapshot Options
|
|
421
482
|
|
|
@@ -431,13 +492,13 @@ agent-browser snapshot -s "#main" # Scope to CSS selector
|
|
|
431
492
|
agent-browser snapshot -i -c -d 5 # Combine options
|
|
432
493
|
```
|
|
433
494
|
|
|
434
|
-
| Option
|
|
435
|
-
|
|
436
|
-
| `-i, --interactive`
|
|
437
|
-
| `-C, --cursor`
|
|
438
|
-
| `-c, --compact`
|
|
439
|
-
| `-d, --depth <n>`
|
|
440
|
-
| `-s, --selector <sel>` | Scope to CSS selector
|
|
495
|
+
| Option | Description |
|
|
496
|
+
| ---------------------- | ----------------------------------------------------------------------- |
|
|
497
|
+
| `-i, --interactive` | Only show interactive elements (buttons, links, inputs) |
|
|
498
|
+
| `-C, --cursor` | Include cursor-interactive elements (cursor:pointer, onclick, tabindex) |
|
|
499
|
+
| `-c, --compact` | Remove empty structural elements |
|
|
500
|
+
| `-d, --depth <n>` | Limit tree depth |
|
|
501
|
+
| `-s, --selector <sel>` | Scope to CSS selector |
|
|
441
502
|
|
|
442
503
|
The `-C` flag is useful for modern web apps that use custom clickable elements (divs, spans) instead of standard buttons/links.
|
|
443
504
|
|
|
@@ -445,6 +506,8 @@ The `-C` flag is useful for modern web apps that use custom clickable elements (
|
|
|
445
506
|
|
|
446
507
|
The `--annotate` flag overlays numbered labels on interactive elements in the screenshot. Each label `[N]` corresponds to ref `@eN`, so the same refs work for both visual and text-based workflows.
|
|
447
508
|
|
|
509
|
+
In native mode, annotated screenshots are supported on the CDP-backed browser path (`--native` with Chromium/Lightpanda). The Safari/WebDriver backend does not yet support `--annotate`.
|
|
510
|
+
|
|
448
511
|
```bash
|
|
449
512
|
agent-browser screenshot --annotate
|
|
450
513
|
# -> Screenshot saved to /tmp/screenshot-2026-02-17T12-00-00-abc123.png
|
|
@@ -484,6 +547,9 @@ This is useful for multimodal AI models that can reason about visual layout, unl
|
|
|
484
547
|
| `--json` | JSON output (for agents) |
|
|
485
548
|
| `--full, -f` | Full page screenshot |
|
|
486
549
|
| `--annotate` | Annotated screenshot with numbered element labels (or `AGENT_BROWSER_ANNOTATE` env) |
|
|
550
|
+
| `--screenshot-dir <path>` | Default screenshot output directory (or `AGENT_BROWSER_SCREENSHOT_DIR` env) |
|
|
551
|
+
| `--screenshot-quality <n>` | JPEG quality 0-100 (or `AGENT_BROWSER_SCREENSHOT_QUALITY` env) |
|
|
552
|
+
| `--screenshot-format <fmt>` | Screenshot format: `png`, `jpeg` (or `AGENT_BROWSER_SCREENSHOT_FORMAT` env) |
|
|
487
553
|
| `--headed` | Show browser window (not headless) (or `AGENT_BROWSER_HEADED` env) |
|
|
488
554
|
| `--cdp <port\|url>` | Connect via Chrome DevTools Protocol (port or WebSocket URL) |
|
|
489
555
|
| `--auto-connect` | Auto-discover and connect to running Chrome (or `AGENT_BROWSER_AUTO_CONNECT` env) |
|
|
@@ -551,8 +617,8 @@ export AGENT_BROWSER_DEFAULT_TIMEOUT=45000
|
|
|
551
617
|
|
|
552
618
|
> **Note:** Setting this above 30000 (30s) may cause EAGAIN errors on slow operations because the CLI's read timeout will expire before Playwright responds. The CLI retries transient errors automatically, but response times will increase.
|
|
553
619
|
|
|
554
|
-
| Variable
|
|
555
|
-
|
|
620
|
+
| Variable | Description |
|
|
621
|
+
| ------------------------------- | ------------------------------------------------- |
|
|
556
622
|
| `AGENT_BROWSER_DEFAULT_TIMEOUT` | Default Playwright timeout in ms (default: 25000) |
|
|
557
623
|
|
|
558
624
|
## Selectors
|
|
@@ -578,6 +644,7 @@ agent-browser hover @e4 # Hover the link
|
|
|
578
644
|
```
|
|
579
645
|
|
|
580
646
|
**Why use refs?**
|
|
647
|
+
|
|
581
648
|
- **Deterministic**: Ref points to exact element from snapshot
|
|
582
649
|
- **Fast**: No DOM re-query needed
|
|
583
650
|
- **AI-friendly**: Snapshot + ref workflow is optimal for LLMs
|
|
@@ -678,6 +745,7 @@ agent-browser open other-site.com
|
|
|
678
745
|
```
|
|
679
746
|
|
|
680
747
|
This is useful for:
|
|
748
|
+
|
|
681
749
|
- **Skipping login flows** - Authenticate via headers instead of UI
|
|
682
750
|
- **Switching users** - Start new sessions with different auth tokens
|
|
683
751
|
- **API testing** - Access protected endpoints directly
|
|
@@ -699,6 +767,7 @@ agent-browser set headers '{"X-Custom-Header": "value"}'
|
|
|
699
767
|
## Custom Browser Executable
|
|
700
768
|
|
|
701
769
|
Use a custom browser executable instead of the bundled Chromium. This is useful for:
|
|
770
|
+
|
|
702
771
|
- **Serverless deployment**: Use lightweight Chromium builds like `@sparticuz/chromium` (~50MB vs ~684MB)
|
|
703
772
|
- **System browsers**: Use an existing Chrome/Chromium installation
|
|
704
773
|
- **Custom builds**: Use modified browser builds
|
|
@@ -713,7 +782,22 @@ agent-browser --executable-path /path/to/chromium open example.com
|
|
|
713
782
|
AGENT_BROWSER_EXECUTABLE_PATH=/path/to/chromium agent-browser open example.com
|
|
714
783
|
```
|
|
715
784
|
|
|
716
|
-
### Serverless
|
|
785
|
+
### Serverless (Vercel)
|
|
786
|
+
|
|
787
|
+
Run agent-browser + Chrome in an ephemeral Vercel Sandbox microVM. No external server needed:
|
|
788
|
+
|
|
789
|
+
```typescript
|
|
790
|
+
import { Sandbox } from "@vercel/sandbox";
|
|
791
|
+
|
|
792
|
+
const sandbox = await Sandbox.create({ runtime: "node24" });
|
|
793
|
+
await sandbox.runCommand("agent-browser", ["open", "https://example.com"]);
|
|
794
|
+
const result = await sandbox.runCommand("agent-browser", ["screenshot", "--json"]);
|
|
795
|
+
await sandbox.stop();
|
|
796
|
+
```
|
|
797
|
+
|
|
798
|
+
See the [environments example](examples/environments/) for a working demo with a UI and deploy-to-Vercel button.
|
|
799
|
+
|
|
800
|
+
### Serverless (AWS Lambda)
|
|
717
801
|
|
|
718
802
|
```typescript
|
|
719
803
|
import chromium from '@sparticuz/chromium';
|
|
@@ -744,6 +828,7 @@ agent-browser screenshot report.png
|
|
|
744
828
|
```
|
|
745
829
|
|
|
746
830
|
The `--allow-file-access` flag adds Chromium flags (`--allow-file-access-from-files`, `--allow-file-access`) that allow `file://` URLs to:
|
|
831
|
+
|
|
747
832
|
- Load and render local files
|
|
748
833
|
- Access other local files via JavaScript (XHR, fetch)
|
|
749
834
|
- Load local resources (images, scripts, stylesheets)
|
|
@@ -771,10 +856,12 @@ agent-browser --cdp "wss://your-browser-service.com/cdp?token=..." snapshot
|
|
|
771
856
|
```
|
|
772
857
|
|
|
773
858
|
The `--cdp` flag accepts either:
|
|
859
|
+
|
|
774
860
|
- A port number (e.g., `9222`) for local connections via `http://localhost:{port}`
|
|
775
861
|
- A full WebSocket URL (e.g., `wss://...` or `ws://...`) for remote browser services
|
|
776
862
|
|
|
777
863
|
This enables control of:
|
|
864
|
+
|
|
778
865
|
- Electron apps
|
|
779
866
|
- Chrome/Chromium instances with remote debugging
|
|
780
867
|
- WebView2 applications
|
|
@@ -794,10 +881,12 @@ AGENT_BROWSER_AUTO_CONNECT=1 agent-browser snapshot
|
|
|
794
881
|
```
|
|
795
882
|
|
|
796
883
|
Auto-connect discovers Chrome by:
|
|
884
|
+
|
|
797
885
|
1. Reading Chrome's `DevToolsActivePort` file from the default user data directory
|
|
798
886
|
2. Falling back to probing common debugging ports (9222, 9229)
|
|
799
887
|
|
|
800
888
|
This is useful when:
|
|
889
|
+
|
|
801
890
|
- Chrome 144+ has remote debugging enabled via `chrome://inspect/#remote-debugging` (which uses a dynamic port)
|
|
802
891
|
- You want a zero-configuration connection to your existing browser
|
|
803
892
|
- You don't want to track which port Chrome is using
|
|
@@ -821,6 +910,7 @@ This starts a WebSocket server on the specified port that streams the browser vi
|
|
|
821
910
|
Connect to `ws://localhost:9223` to receive frames and send input:
|
|
822
911
|
|
|
823
912
|
**Receive frames:**
|
|
913
|
+
|
|
824
914
|
```json
|
|
825
915
|
{
|
|
826
916
|
"type": "frame",
|
|
@@ -837,6 +927,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
837
927
|
```
|
|
838
928
|
|
|
839
929
|
**Send mouse events:**
|
|
930
|
+
|
|
840
931
|
```json
|
|
841
932
|
{
|
|
842
933
|
"type": "input_mouse",
|
|
@@ -849,6 +940,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
849
940
|
```
|
|
850
941
|
|
|
851
942
|
**Send keyboard events:**
|
|
943
|
+
|
|
852
944
|
```json
|
|
853
945
|
{
|
|
854
946
|
"type": "input_keyboard",
|
|
@@ -859,6 +951,7 @@ Connect to `ws://localhost:9223` to receive frames and send input:
|
|
|
859
951
|
```
|
|
860
952
|
|
|
861
953
|
**Send touch events:**
|
|
954
|
+
|
|
862
955
|
```json
|
|
863
956
|
{
|
|
864
957
|
"type": "input_touch",
|
|
@@ -879,16 +972,19 @@ await browser.launch({ headless: true });
|
|
|
879
972
|
await browser.navigate('https://example.com');
|
|
880
973
|
|
|
881
974
|
// Start screencast
|
|
882
|
-
await browser.startScreencast(
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
975
|
+
await browser.startScreencast(
|
|
976
|
+
(frame) => {
|
|
977
|
+
// frame.data is base64-encoded image
|
|
978
|
+
// frame.metadata contains viewport info
|
|
979
|
+
console.log('Frame received:', frame.metadata.deviceWidth, 'x', frame.metadata.deviceHeight);
|
|
980
|
+
},
|
|
981
|
+
{
|
|
982
|
+
format: 'jpeg',
|
|
983
|
+
quality: 80,
|
|
984
|
+
maxWidth: 1280,
|
|
985
|
+
maxHeight: 720,
|
|
986
|
+
}
|
|
987
|
+
);
|
|
892
988
|
|
|
893
989
|
// Inject mouse events
|
|
894
990
|
await browser.injectMouseEvent({
|
|
@@ -940,18 +1036,18 @@ agent-browser open example.com
|
|
|
940
1036
|
Or add to your config file (`agent-browser.json`):
|
|
941
1037
|
|
|
942
1038
|
```json
|
|
943
|
-
{"native": true}
|
|
1039
|
+
{ "native": true }
|
|
944
1040
|
```
|
|
945
1041
|
|
|
946
1042
|
### What's Different
|
|
947
1043
|
|
|
948
|
-
|
|
|
949
|
-
|
|
950
|
-
| **Runtime**
|
|
951
|
-
| **Protocol**
|
|
952
|
-
| **Install size**
|
|
953
|
-
| **Browser support** | Chromium, Firefox, WebKit
|
|
954
|
-
| **Stability**
|
|
1044
|
+
| | Default (Node.js) | Native (`--native`) |
|
|
1045
|
+
| ------------------- | --------------------------- | -------------------------------- |
|
|
1046
|
+
| **Runtime** | Node.js + Playwright | Pure Rust binary |
|
|
1047
|
+
| **Protocol** | Playwright protocol | Direct CDP / WebDriver |
|
|
1048
|
+
| **Install size** | Larger (Node.js + npm deps) | Smaller (single binary) |
|
|
1049
|
+
| **Browser support** | Chromium, Firefox, WebKit | Chromium, Safari (via WebDriver) |
|
|
1050
|
+
| **Stability** | Stable | Experimental |
|
|
955
1051
|
|
|
956
1052
|
### Known Limitations
|
|
957
1053
|
|
|
@@ -961,13 +1057,13 @@ Or add to your config file (`agent-browser.json`):
|
|
|
961
1057
|
|
|
962
1058
|
## Platforms
|
|
963
1059
|
|
|
964
|
-
| Platform
|
|
965
|
-
|
|
966
|
-
| macOS ARM64 | Native Rust | Node.js
|
|
967
|
-
| macOS x64
|
|
968
|
-
| Linux ARM64 | Native Rust | Node.js
|
|
969
|
-
| Linux x64
|
|
970
|
-
| Windows x64 | Native Rust | Node.js
|
|
1060
|
+
| Platform | Binary | Fallback |
|
|
1061
|
+
| ----------- | ----------- | -------- |
|
|
1062
|
+
| macOS ARM64 | Native Rust | Node.js |
|
|
1063
|
+
| macOS x64 | Native Rust | Node.js |
|
|
1064
|
+
| Linux ARM64 | Native Rust | Node.js |
|
|
1065
|
+
| Linux x64 | Native Rust | Node.js |
|
|
1066
|
+
| Windows x64 | Native Rust | Node.js |
|
|
971
1067
|
|
|
972
1068
|
## Usage with AI Agents
|
|
973
1069
|
|
|
@@ -1011,6 +1107,7 @@ For more consistent results, add to your project or global instructions file:
|
|
|
1011
1107
|
Use `agent-browser` for web automation. Run `agent-browser --help` for all commands.
|
|
1012
1108
|
|
|
1013
1109
|
Core workflow:
|
|
1110
|
+
|
|
1014
1111
|
1. `agent-browser open <url>` - Navigate to page
|
|
1015
1112
|
2. `agent-browser snapshot -i` - Get interactive elements with refs (@e1, @e2)
|
|
1016
1113
|
3. `agent-browser click @e1` / `fill @e2 "text"` - Interact using refs
|
|
@@ -1062,11 +1159,11 @@ export AGENT_BROWSER_IOS_DEVICE="iPhone 16 Pro"
|
|
|
1062
1159
|
agent-browser open https://example.com
|
|
1063
1160
|
```
|
|
1064
1161
|
|
|
1065
|
-
| Variable
|
|
1066
|
-
|
|
1067
|
-
| `AGENT_BROWSER_PROVIDER`
|
|
1162
|
+
| Variable | Description |
|
|
1163
|
+
| -------------------------- | ----------------------------------------------- |
|
|
1164
|
+
| `AGENT_BROWSER_PROVIDER` | Set to `ios` to enable iOS mode |
|
|
1068
1165
|
| `AGENT_BROWSER_IOS_DEVICE` | Device name (e.g., "iPhone 16 Pro", "iPad Pro") |
|
|
1069
|
-
| `AGENT_BROWSER_IOS_UDID`
|
|
1166
|
+
| `AGENT_BROWSER_IOS_UDID` | Device UDID (alternative to device name) |
|
|
1070
1167
|
|
|
1071
1168
|
**Supported devices:** All iOS Simulators available in Xcode (iPhones, iPads), plus real iOS devices.
|
|
1072
1169
|
|
|
@@ -1077,6 +1174,7 @@ agent-browser open https://example.com
|
|
|
1077
1174
|
Appium also supports real iOS devices connected via USB. This requires additional one-time setup:
|
|
1078
1175
|
|
|
1079
1176
|
**1. Get your device UDID:**
|
|
1177
|
+
|
|
1080
1178
|
```bash
|
|
1081
1179
|
xcrun xctrace list devices
|
|
1082
1180
|
# or
|
|
@@ -1084,6 +1182,7 @@ system_profiler SPUSBDataType | grep -A 5 "iPhone\|iPad"
|
|
|
1084
1182
|
```
|
|
1085
1183
|
|
|
1086
1184
|
**2. Sign WebDriverAgent (one-time):**
|
|
1185
|
+
|
|
1087
1186
|
```bash
|
|
1088
1187
|
# Open the WebDriverAgent Xcode project
|
|
1089
1188
|
cd ~/.appium/node_modules/appium-xcuitest-driver/node_modules/appium-webdriveragent
|
|
@@ -1091,12 +1190,14 @@ open WebDriverAgent.xcodeproj
|
|
|
1091
1190
|
```
|
|
1092
1191
|
|
|
1093
1192
|
In Xcode:
|
|
1193
|
+
|
|
1094
1194
|
- Select the `WebDriverAgentRunner` target
|
|
1095
1195
|
- Go to Signing & Capabilities
|
|
1096
1196
|
- Select your Team (requires Apple Developer account, free tier works)
|
|
1097
1197
|
- Let Xcode manage signing automatically
|
|
1098
1198
|
|
|
1099
1199
|
**3. Use with agent-browser:**
|
|
1200
|
+
|
|
1100
1201
|
```bash
|
|
1101
1202
|
# Connect device via USB, then:
|
|
1102
1203
|
agent-browser -p ios --device "<DEVICE_UDID>" open https://example.com
|
|
@@ -1106,11 +1207,44 @@ agent-browser -p ios --device "John's iPhone" open https://example.com
|
|
|
1106
1207
|
```
|
|
1107
1208
|
|
|
1108
1209
|
**Real device notes:**
|
|
1210
|
+
|
|
1109
1211
|
- First run installs WebDriverAgent to the device (may require Trust prompt)
|
|
1110
1212
|
- Device must be unlocked and connected via USB
|
|
1111
1213
|
- Slightly slower initial connection than simulator
|
|
1112
1214
|
- Tests against real Safari performance and behavior
|
|
1113
1215
|
|
|
1216
|
+
### Browserless
|
|
1217
|
+
|
|
1218
|
+
[Browserless](https://browserless.io) provides cloud browser infrastructure with a Sessions API. Use it when running agent-browser in environments where a local browser isn't available.
|
|
1219
|
+
|
|
1220
|
+
To enable Browserless, use the `-p` flag:
|
|
1221
|
+
|
|
1222
|
+
```bash
|
|
1223
|
+
export BROWSERLESS_API_KEY="your-api-token"
|
|
1224
|
+
agent-browser -p browserless open https://example.com
|
|
1225
|
+
```
|
|
1226
|
+
|
|
1227
|
+
Or use environment variables for CI/scripts:
|
|
1228
|
+
|
|
1229
|
+
```bash
|
|
1230
|
+
export AGENT_BROWSER_PROVIDER=browserless
|
|
1231
|
+
export BROWSERLESS_API_KEY="your-api-token"
|
|
1232
|
+
agent-browser open https://example.com
|
|
1233
|
+
```
|
|
1234
|
+
|
|
1235
|
+
Optional configuration via environment variables:
|
|
1236
|
+
|
|
1237
|
+
| Variable | Description | Default |
|
|
1238
|
+
| -------------------------- | ------------------------------------------------ | --------------------------------------- |
|
|
1239
|
+
| `BROWSERLESS_API_URL` | Base API URL (for custom regions or self-hosted) | `https://production-sfo.browserless.io` |
|
|
1240
|
+
| `BROWSERLESS_BROWSER_TYPE` | Type of browser to use (chromium or chrome) | chromium |
|
|
1241
|
+
| `BROWSERLESS_TTL` | Session TTL in milliseconds | `300000` |
|
|
1242
|
+
| `BROWSERLESS_STEALTH` | Enable stealth mode (`true`/`false`) | `true` |
|
|
1243
|
+
|
|
1244
|
+
When enabled, agent-browser connects to a Browserless cloud session instead of launching a local browser. All commands work identically.
|
|
1245
|
+
|
|
1246
|
+
Get your API token from the [Browserless Dashboard](https://browserless.io).
|
|
1247
|
+
|
|
1114
1248
|
### Browserbase
|
|
1115
1249
|
|
|
1116
1250
|
[Browserbase](https://browserbase.com) provides remote browser infrastructure to make deployment of agentic browsing agents easy. Use it when running the agent-browser CLI in an environment where a local browser isn't feasible.
|
|
@@ -1119,7 +1253,6 @@ To enable Browserbase, use the `-p` flag:
|
|
|
1119
1253
|
|
|
1120
1254
|
```bash
|
|
1121
1255
|
export BROWSERBASE_API_KEY="your-api-key"
|
|
1122
|
-
export BROWSERBASE_PROJECT_ID="your-project-id"
|
|
1123
1256
|
agent-browser -p browserbase open https://example.com
|
|
1124
1257
|
```
|
|
1125
1258
|
|
|
@@ -1128,13 +1261,12 @@ Or use environment variables for CI/scripts:
|
|
|
1128
1261
|
```bash
|
|
1129
1262
|
export AGENT_BROWSER_PROVIDER=browserbase
|
|
1130
1263
|
export BROWSERBASE_API_KEY="your-api-key"
|
|
1131
|
-
export BROWSERBASE_PROJECT_ID="your-project-id"
|
|
1132
1264
|
agent-browser open https://example.com
|
|
1133
1265
|
```
|
|
1134
1266
|
|
|
1135
1267
|
When enabled, agent-browser connects to a Browserbase session instead of launching a local browser. All commands work identically.
|
|
1136
1268
|
|
|
1137
|
-
Get your API key
|
|
1269
|
+
Get your API key from the [Browserbase Dashboard](https://browserbase.com/overview).
|
|
1138
1270
|
|
|
1139
1271
|
### Browser Use
|
|
1140
1272
|
|
|
@@ -1180,12 +1312,12 @@ agent-browser open https://example.com
|
|
|
1180
1312
|
|
|
1181
1313
|
Optional configuration via environment variables:
|
|
1182
1314
|
|
|
1183
|
-
| Variable
|
|
1184
|
-
|
|
1185
|
-
| `KERNEL_HEADLESS`
|
|
1186
|
-
| `KERNEL_STEALTH`
|
|
1187
|
-
| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds
|
|
1188
|
-
| `KERNEL_PROFILE_NAME`
|
|
1315
|
+
| Variable | Description | Default |
|
|
1316
|
+
| ------------------------ | -------------------------------------------------------------------------------- | ------- |
|
|
1317
|
+
| `KERNEL_HEADLESS` | Run browser in headless mode (`true`/`false`) | `false` |
|
|
1318
|
+
| `KERNEL_STEALTH` | Enable stealth mode to avoid bot detection (`true`/`false`) | `true` |
|
|
1319
|
+
| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds | `300` |
|
|
1320
|
+
| `KERNEL_PROFILE_NAME` | Browser profile name for persistent cookies/logins (created if it doesn't exist) | (none) |
|
|
1189
1321
|
|
|
1190
1322
|
When enabled, agent-browser connects to a Kernel cloud session instead of launching a local browser. All commands work identically.
|
|
1191
1323
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/dist/actions.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAqBpE,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,EAsIT,MAAM,YAAY,CAAC;AAQpB;;;GAGG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,CAAC,CAAC,KAAK,EAAE,eAAe,KAAK,IAAI,CAAC,GAAG,IAAI,GAClD,IAAI,CAEN;AAQD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,GAAG,KAAK,CAqDzE;AAKD,wBAAgB,gBAAgB,IAAI,IAAI,CAuBvC;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CA6CjG"}
|