agent-browser 0.17.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +64 -6
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/bin/agent-browser-darwin-x64 +0 -0
- package/bin/agent-browser-linux-arm64 +0 -0
- package/bin/agent-browser-linux-x64 +0 -0
- package/bin/agent-browser-win32-x64.exe +0 -0
- package/dist/actions.d.ts.map +1 -1
- package/dist/actions.js +84 -4
- package/dist/actions.js.map +1 -1
- package/dist/browser.d.ts +12 -3
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +93 -38
- package/dist/browser.js.map +1 -1
- package/dist/daemon.d.ts +5 -0
- package/dist/daemon.d.ts.map +1 -1
- package/dist/daemon.js +62 -1
- package/dist/daemon.js.map +1 -1
- package/dist/inspect-server.d.ts +26 -0
- package/dist/inspect-server.d.ts.map +1 -0
- package/dist/inspect-server.js +218 -0
- package/dist/inspect-server.js.map +1 -0
- package/dist/protocol.d.ts +3 -1
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +5 -2
- package/dist/protocol.js.map +1 -1
- package/dist/types.d.ts +8 -1
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/skills/agent-browser/SKILL.md +88 -0
- package/skills/agent-browser/references/authentication.md +101 -0
- package/skills/agent-browser/references/commands.md +3 -0
- package/skills/electron/SKILL.md +25 -0
- package/skills/vercel-sandbox/SKILL.md +280 -0
package/README.md
CHANGED
|
@@ -131,6 +131,7 @@ agent-browser get value <sel> # Get input value
|
|
|
131
131
|
agent-browser get attr <sel> <attr> # Get attribute
|
|
132
132
|
agent-browser get title # Get page title
|
|
133
133
|
agent-browser get url # Get current URL
|
|
134
|
+
agent-browser get cdp-url # Get CDP WebSocket URL (for DevTools, debugging)
|
|
134
135
|
agent-browser get count <sel> # Count matching elements
|
|
135
136
|
agent-browser get box <sel> # Get bounding box
|
|
136
137
|
agent-browser get styles <sel> # Get computed styles
|
|
@@ -197,7 +198,7 @@ agent-browser mouse wheel <dy> [dx] # Scroll wheel
|
|
|
197
198
|
### Browser Settings
|
|
198
199
|
|
|
199
200
|
```bash
|
|
200
|
-
agent-browser set viewport <w> <h>
|
|
201
|
+
agent-browser set viewport <w> <h> [scale] # Set viewport size (scale for retina, e.g. 2)
|
|
201
202
|
agent-browser set device <name> # Emulate device ("iPhone 14")
|
|
202
203
|
agent-browser set geo <lat> <lng> # Set geolocation
|
|
203
204
|
agent-browser set offline [on|off] # Toggle offline mode
|
|
@@ -283,6 +284,7 @@ agent-browser console --clear # Clear console
|
|
|
283
284
|
agent-browser errors # View page errors (uncaught JavaScript exceptions)
|
|
284
285
|
agent-browser errors --clear # Clear errors
|
|
285
286
|
agent-browser highlight <sel> # Highlight element
|
|
287
|
+
agent-browser inspect # Open Chrome DevTools for the active page
|
|
286
288
|
agent-browser state save <path> # Save auth state
|
|
287
289
|
agent-browser state load <path> # Load auth state
|
|
288
290
|
agent-browser state list # List saved state files
|
|
@@ -308,6 +310,47 @@ agent-browser install # Download Chromium browser
|
|
|
308
310
|
agent-browser install --with-deps # Also install system deps (Linux)
|
|
309
311
|
```
|
|
310
312
|
|
|
313
|
+
## Authentication
|
|
314
|
+
|
|
315
|
+
agent-browser provides multiple ways to persist login sessions so you don't re-authenticate every run.
|
|
316
|
+
|
|
317
|
+
### Quick summary
|
|
318
|
+
|
|
319
|
+
| Approach | Best for | Flag / Env |
|
|
320
|
+
|----------|----------|------------|
|
|
321
|
+
| **Persistent profile** | Full browser state (cookies, IndexedDB, service workers, cache) across restarts | `--profile <path>` / `AGENT_BROWSER_PROFILE` |
|
|
322
|
+
| **Session persistence** | Auto-save/restore cookies + localStorage by name | `--session-name <name>` / `AGENT_BROWSER_SESSION_NAME` |
|
|
323
|
+
| **Import from your browser** | Grab auth from a Chrome session you already logged into | `--auto-connect` + `state save` |
|
|
324
|
+
| **State file** | Load a previously saved state JSON on launch | `--state <path>` / `AGENT_BROWSER_STATE` |
|
|
325
|
+
| **Auth vault** | Store credentials locally (encrypted), login by name | `auth save` / `auth login` |
|
|
326
|
+
|
|
327
|
+
### Import auth from your browser
|
|
328
|
+
|
|
329
|
+
If you are already logged in to a site in Chrome, you can grab that auth state and reuse it:
|
|
330
|
+
|
|
331
|
+
```bash
|
|
332
|
+
# 1. Launch Chrome with remote debugging enabled
|
|
333
|
+
# macOS:
|
|
334
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=9222
|
|
335
|
+
# Or use --auto-connect to discover an already-running Chrome
|
|
336
|
+
|
|
337
|
+
# 2. Connect and save the authenticated state
|
|
338
|
+
agent-browser --auto-connect state save ./my-auth.json
|
|
339
|
+
|
|
340
|
+
# 3. Use the saved auth in future sessions
|
|
341
|
+
agent-browser --state ./my-auth.json open https://app.example.com/dashboard
|
|
342
|
+
|
|
343
|
+
# 4. Or use --session-name for automatic persistence
|
|
344
|
+
agent-browser --session-name myapp state load ./my-auth.json
|
|
345
|
+
# From now on, --session-name myapp auto-saves/restores this state
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
> **Security notes:**
|
|
349
|
+
> - `--remote-debugging-port` exposes full browser control on localhost. Any local process can connect. Only use on trusted machines and close Chrome when done.
|
|
350
|
+
> - State files contain session tokens in plaintext. Add them to `.gitignore` and delete when no longer needed. For encryption at rest, set `AGENT_BROWSER_ENCRYPTION_KEY` (see [State Encryption](#state-encryption)).
|
|
351
|
+
|
|
352
|
+
For full details on login flows, OAuth, 2FA, cookie-based auth, and the auth vault, see the [Authentication](docs/src/app/sessions/page.mdx) docs.
|
|
353
|
+
|
|
311
354
|
## Sessions
|
|
312
355
|
|
|
313
356
|
Run multiple isolated browser instances:
|
|
@@ -415,7 +458,7 @@ agent-browser includes security features for safe AI agent deployments. All feat
|
|
|
415
458
|
| `AGENT_BROWSER_CONFIRM_ACTIONS` | Action categories requiring confirmation |
|
|
416
459
|
| `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts |
|
|
417
460
|
|
|
418
|
-
See [Security documentation](https://agent-browser.
|
|
461
|
+
See [Security documentation](https://agent-browser.dev/security) for details.
|
|
419
462
|
|
|
420
463
|
## Snapshot Options
|
|
421
464
|
|
|
@@ -445,6 +488,8 @@ The `-C` flag is useful for modern web apps that use custom clickable elements (
|
|
|
445
488
|
|
|
446
489
|
The `--annotate` flag overlays numbered labels on interactive elements in the screenshot. Each label `[N]` corresponds to ref `@eN`, so the same refs work for both visual and text-based workflows.
|
|
447
490
|
|
|
491
|
+
In native mode, annotated screenshots are supported on the CDP-backed browser path (`--native` with Chromium/Lightpanda). The Safari/WebDriver backend does not yet support `--annotate`.
|
|
492
|
+
|
|
448
493
|
```bash
|
|
449
494
|
agent-browser screenshot --annotate
|
|
450
495
|
# -> Screenshot saved to /tmp/screenshot-2026-02-17T12-00-00-abc123.png
|
|
@@ -713,7 +758,22 @@ agent-browser --executable-path /path/to/chromium open example.com
|
|
|
713
758
|
AGENT_BROWSER_EXECUTABLE_PATH=/path/to/chromium agent-browser open example.com
|
|
714
759
|
```
|
|
715
760
|
|
|
716
|
-
### Serverless
|
|
761
|
+
### Serverless (Vercel)
|
|
762
|
+
|
|
763
|
+
Run agent-browser + Chrome in an ephemeral Vercel Sandbox microVM. No external server needed:
|
|
764
|
+
|
|
765
|
+
```typescript
|
|
766
|
+
import { Sandbox } from "@vercel/sandbox";
|
|
767
|
+
|
|
768
|
+
const sandbox = await Sandbox.create({ runtime: "node24" });
|
|
769
|
+
await sandbox.runCommand("agent-browser", ["open", "https://example.com"]);
|
|
770
|
+
const result = await sandbox.runCommand("agent-browser", ["screenshot", "--json"]);
|
|
771
|
+
await sandbox.stop();
|
|
772
|
+
```
|
|
773
|
+
|
|
774
|
+
See the [environments example](examples/environments/) for a working demo with a UI and deploy-to-Vercel button.
|
|
775
|
+
|
|
776
|
+
### Serverless (AWS Lambda)
|
|
717
777
|
|
|
718
778
|
```typescript
|
|
719
779
|
import chromium from '@sparticuz/chromium';
|
|
@@ -1119,7 +1179,6 @@ To enable Browserbase, use the `-p` flag:
|
|
|
1119
1179
|
|
|
1120
1180
|
```bash
|
|
1121
1181
|
export BROWSERBASE_API_KEY="your-api-key"
|
|
1122
|
-
export BROWSERBASE_PROJECT_ID="your-project-id"
|
|
1123
1182
|
agent-browser -p browserbase open https://example.com
|
|
1124
1183
|
```
|
|
1125
1184
|
|
|
@@ -1128,13 +1187,12 @@ Or use environment variables for CI/scripts:
|
|
|
1128
1187
|
```bash
|
|
1129
1188
|
export AGENT_BROWSER_PROVIDER=browserbase
|
|
1130
1189
|
export BROWSERBASE_API_KEY="your-api-key"
|
|
1131
|
-
export BROWSERBASE_PROJECT_ID="your-project-id"
|
|
1132
1190
|
agent-browser open https://example.com
|
|
1133
1191
|
```
|
|
1134
1192
|
|
|
1135
1193
|
When enabled, agent-browser connects to a Browserbase session instead of launching a local browser. All commands work identically.
|
|
1136
1194
|
|
|
1137
|
-
Get your API key
|
|
1195
|
+
Get your API key from the [Browserbase Dashboard](https://browserbase.com/overview).
|
|
1138
1196
|
|
|
1139
1197
|
### Browser Use
|
|
1140
1198
|
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/dist/actions.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAqBpE,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,EAsIT,MAAM,YAAY,CAAC;AAQpB;;;GAGG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,CAAC,CAAC,KAAK,EAAE,eAAe,KAAK,IAAI,CAAC,GAAG,IAAI,GAClD,IAAI,CAEN;AAQD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,GAAG,KAAK,CAqDzE;AAKD,wBAAgB,gBAAgB,IAAI,IAAI,CAuBvC;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CA6CjG"}
|
package/dist/actions.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as fs from 'fs';
|
|
2
2
|
import * as path from 'path';
|
|
3
|
+
import { exec } from 'node:child_process';
|
|
3
4
|
import { mkdirSync } from 'node:fs';
|
|
4
5
|
import { getAppDir } from './daemon.js';
|
|
5
6
|
import { checkPolicy, describeAction, getActionCategory, loadPolicyFile, initPolicyReloader, reloadPolicyIfChanged, } from './action-policy.js';
|
|
@@ -229,6 +230,10 @@ async function dispatchAction(command, browser) {
|
|
|
229
230
|
return await handleReload(command, browser);
|
|
230
231
|
case 'url':
|
|
231
232
|
return await handleUrl(command, browser);
|
|
233
|
+
case 'cdp_url':
|
|
234
|
+
return handleCdpUrl(command, browser);
|
|
235
|
+
case 'inspect':
|
|
236
|
+
return await handleInspect(command, browser);
|
|
232
237
|
case 'title':
|
|
233
238
|
return await handleTitle(command, browser);
|
|
234
239
|
case 'getattribute':
|
|
@@ -1065,11 +1070,29 @@ async function handlePermissions(command, browser) {
|
|
|
1065
1070
|
});
|
|
1066
1071
|
}
|
|
1067
1072
|
async function handleViewport(command, browser) {
|
|
1068
|
-
|
|
1069
|
-
|
|
1073
|
+
if (command.deviceScaleFactor && command.deviceScaleFactor !== 1) {
|
|
1074
|
+
await browser.setViewport(command.width, command.height);
|
|
1075
|
+
await browser.setDeviceScaleFactor(command.deviceScaleFactor, command.width, command.height, false);
|
|
1076
|
+
}
|
|
1077
|
+
else {
|
|
1078
|
+
// deviceScaleFactor is 1 or undefined -- clear any previously-set CDP
|
|
1079
|
+
// Emulation.setDeviceMetricsOverride so stale DPR doesn't persist.
|
|
1080
|
+
try {
|
|
1081
|
+
await browser.clearDeviceMetricsOverride();
|
|
1082
|
+
}
|
|
1083
|
+
catch {
|
|
1084
|
+
// Ignore if override was never set
|
|
1085
|
+
}
|
|
1086
|
+
await browser.setViewport(command.width, command.height);
|
|
1087
|
+
}
|
|
1088
|
+
const result = {
|
|
1070
1089
|
width: command.width,
|
|
1071
1090
|
height: command.height,
|
|
1072
|
-
}
|
|
1091
|
+
};
|
|
1092
|
+
if (command.deviceScaleFactor !== undefined) {
|
|
1093
|
+
result.deviceScaleFactor = command.deviceScaleFactor;
|
|
1094
|
+
}
|
|
1095
|
+
return successResponse(command.id, result);
|
|
1073
1096
|
}
|
|
1074
1097
|
async function handleUserAgent(command, browser) {
|
|
1075
1098
|
const page = browser.getPage();
|
|
@@ -1127,6 +1150,62 @@ async function handleUrl(command, browser) {
|
|
|
1127
1150
|
const page = browser.getPage();
|
|
1128
1151
|
return successResponse(command.id, { url: page.url() });
|
|
1129
1152
|
}
|
|
1153
|
+
function handleCdpUrl(command, browser) {
|
|
1154
|
+
const cdpUrl = browser.getCdpUrl();
|
|
1155
|
+
if (!cdpUrl) {
|
|
1156
|
+
return errorResponse(command.id, 'CDP URL not available (browser may not be launched)');
|
|
1157
|
+
}
|
|
1158
|
+
return successResponse(command.id, { cdpUrl });
|
|
1159
|
+
}
|
|
1160
|
+
async function handleInspect(command, browser) {
|
|
1161
|
+
const cdpUrl = browser.getCdpUrl();
|
|
1162
|
+
if (!cdpUrl) {
|
|
1163
|
+
return errorResponse(command.id, 'CDP URL not available (browser may not be launched)');
|
|
1164
|
+
}
|
|
1165
|
+
// Shut down any existing inspect server so we always target the current page
|
|
1166
|
+
browser.stopInspectServer();
|
|
1167
|
+
const stripped = cdpUrl.replace(/^(wss?|https?):\/\//, '');
|
|
1168
|
+
const hostPort = stripped.split('/')[0];
|
|
1169
|
+
// Get the target ID so the inspect server can create its own dedicated CDP session
|
|
1170
|
+
const page = browser.getPage();
|
|
1171
|
+
const context = page.context();
|
|
1172
|
+
const tmpCdp = await context.newCDPSession(page);
|
|
1173
|
+
let targetId = '';
|
|
1174
|
+
try {
|
|
1175
|
+
const info = await tmpCdp.send('Target.getTargetInfo');
|
|
1176
|
+
targetId = info?.targetInfo?.targetId || '';
|
|
1177
|
+
}
|
|
1178
|
+
catch (err) {
|
|
1179
|
+
console.error('[inspect] getTargetInfo failed:', err);
|
|
1180
|
+
}
|
|
1181
|
+
await tmpCdp.detach();
|
|
1182
|
+
if (!targetId) {
|
|
1183
|
+
return errorResponse(command.id, 'Could not determine target ID for active page');
|
|
1184
|
+
}
|
|
1185
|
+
const { InspectServer } = await import('./inspect-server.js');
|
|
1186
|
+
const server = new InspectServer({
|
|
1187
|
+
chromeHostPort: hostPort,
|
|
1188
|
+
targetId,
|
|
1189
|
+
chromeWsUrl: cdpUrl,
|
|
1190
|
+
});
|
|
1191
|
+
await server.start();
|
|
1192
|
+
browser.setInspectServer(server);
|
|
1193
|
+
const url = `http://127.0.0.1:${server.port}`;
|
|
1194
|
+
openUrlInBrowser(url);
|
|
1195
|
+
return successResponse(command.id, { opened: true, url });
|
|
1196
|
+
}
|
|
1197
|
+
function openUrlInBrowser(url) {
|
|
1198
|
+
const platform = process.platform;
|
|
1199
|
+
const cmd = platform === 'darwin'
|
|
1200
|
+
? `open "${url}"`
|
|
1201
|
+
: platform === 'win32'
|
|
1202
|
+
? `start "" "${url}"`
|
|
1203
|
+
: `xdg-open "${url}"`;
|
|
1204
|
+
exec(cmd, (err) => {
|
|
1205
|
+
if (err)
|
|
1206
|
+
console.error('[inspect] Failed to open browser:', err.message);
|
|
1207
|
+
});
|
|
1208
|
+
}
|
|
1130
1209
|
async function handleTitle(command, browser) {
|
|
1131
1210
|
const page = browser.getPage();
|
|
1132
1211
|
const title = await page.title();
|
|
@@ -1141,7 +1220,8 @@ async function handleGetAttribute(command, browser) {
|
|
|
1141
1220
|
async function handleGetText(command, browser) {
|
|
1142
1221
|
const page = browser.getPage();
|
|
1143
1222
|
const locator = browser.getLocator(command.selector);
|
|
1144
|
-
const
|
|
1223
|
+
const inner = await locator.innerText();
|
|
1224
|
+
const text = inner || (await locator.textContent()) || '';
|
|
1145
1225
|
return successResponse(command.id, { text, origin: page.url() });
|
|
1146
1226
|
}
|
|
1147
1227
|
async function handleIsVisible(command, browser) {
|