agent-browser 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -131,6 +131,7 @@ agent-browser get value <sel> # Get input value
131
131
  agent-browser get attr <sel> <attr> # Get attribute
132
132
  agent-browser get title # Get page title
133
133
  agent-browser get url # Get current URL
134
+ agent-browser get cdp-url # Get CDP WebSocket URL (for DevTools, debugging)
134
135
  agent-browser get count <sel> # Count matching elements
135
136
  agent-browser get box <sel> # Get bounding box
136
137
  agent-browser get styles <sel> # Get computed styles
@@ -197,7 +198,7 @@ agent-browser mouse wheel <dy> [dx] # Scroll wheel
197
198
  ### Browser Settings
198
199
 
199
200
  ```bash
200
- agent-browser set viewport <w> <h> # Set viewport size
201
+ agent-browser set viewport <w> <h> [scale] # Set viewport size (scale for retina, e.g. 2)
201
202
  agent-browser set device <name> # Emulate device ("iPhone 14")
202
203
  agent-browser set geo <lat> <lng> # Set geolocation
203
204
  agent-browser set offline [on|off] # Toggle offline mode
@@ -283,6 +284,7 @@ agent-browser console --clear # Clear console
283
284
  agent-browser errors # View page errors (uncaught JavaScript exceptions)
284
285
  agent-browser errors --clear # Clear errors
285
286
  agent-browser highlight <sel> # Highlight element
287
+ agent-browser inspect # Open Chrome DevTools for the active page
286
288
  agent-browser state save <path> # Save auth state
287
289
  agent-browser state load <path> # Load auth state
288
290
  agent-browser state list # List saved state files
@@ -308,6 +310,47 @@ agent-browser install # Download Chromium browser
308
310
  agent-browser install --with-deps # Also install system deps (Linux)
309
311
  ```
310
312
 
313
+ ## Authentication
314
+
315
+ agent-browser provides multiple ways to persist login sessions so you don't re-authenticate every run.
316
+
317
+ ### Quick summary
318
+
319
+ | Approach | Best for | Flag / Env |
320
+ |----------|----------|------------|
321
+ | **Persistent profile** | Full browser state (cookies, IndexedDB, service workers, cache) across restarts | `--profile <path>` / `AGENT_BROWSER_PROFILE` |
322
+ | **Session persistence** | Auto-save/restore cookies + localStorage by name | `--session-name <name>` / `AGENT_BROWSER_SESSION_NAME` |
323
+ | **Import from your browser** | Grab auth from a Chrome session you already logged into | `--auto-connect` + `state save` |
324
+ | **State file** | Load a previously saved state JSON on launch | `--state <path>` / `AGENT_BROWSER_STATE` |
325
+ | **Auth vault** | Store credentials locally (encrypted), login by name | `auth save` / `auth login` |
326
+
327
+ ### Import auth from your browser
328
+
329
+ If you are already logged in to a site in Chrome, you can grab that auth state and reuse it:
330
+
331
+ ```bash
332
+ # 1. Launch Chrome with remote debugging enabled
333
+ # macOS:
334
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" --remote-debugging-port=9222
335
+ # Or use --auto-connect to discover an already-running Chrome
336
+
337
+ # 2. Connect and save the authenticated state
338
+ agent-browser --auto-connect state save ./my-auth.json
339
+
340
+ # 3. Use the saved auth in future sessions
341
+ agent-browser --state ./my-auth.json open https://app.example.com/dashboard
342
+
343
+ # 4. Or use --session-name for automatic persistence
344
+ agent-browser --session-name myapp state load ./my-auth.json
345
+ # From now on, --session-name myapp auto-saves/restores this state
346
+ ```
347
+
348
+ > **Security notes:**
349
+ > - `--remote-debugging-port` exposes full browser control on localhost. Any local process can connect. Only use on trusted machines and close Chrome when done.
350
+ > - State files contain session tokens in plaintext. Add them to `.gitignore` and delete when no longer needed. For encryption at rest, set `AGENT_BROWSER_ENCRYPTION_KEY` (see [State Encryption](#state-encryption)).
351
+
352
+ For full details on login flows, OAuth, 2FA, cookie-based auth, and the auth vault, see the [Authentication](docs/src/app/sessions/page.mdx) docs.
353
+
311
354
  ## Sessions
312
355
 
313
356
  Run multiple isolated browser instances:
@@ -415,7 +458,7 @@ agent-browser includes security features for safe AI agent deployments. All feat
415
458
  | `AGENT_BROWSER_CONFIRM_ACTIONS` | Action categories requiring confirmation |
416
459
  | `AGENT_BROWSER_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts |
417
460
 
418
- See [Security documentation](https://agent-browser.vercel.app/security) for details.
461
+ See [Security documentation](https://agent-browser.dev/security) for details.
419
462
 
420
463
  ## Snapshot Options
421
464
 
@@ -445,6 +488,8 @@ The `-C` flag is useful for modern web apps that use custom clickable elements (
445
488
 
446
489
  The `--annotate` flag overlays numbered labels on interactive elements in the screenshot. Each label `[N]` corresponds to ref `@eN`, so the same refs work for both visual and text-based workflows.
447
490
 
491
+ In native mode, annotated screenshots are supported on the CDP-backed browser path (`--native` with Chromium/Lightpanda). The Safari/WebDriver backend does not yet support `--annotate`.
492
+
448
493
  ```bash
449
494
  agent-browser screenshot --annotate
450
495
  # -> Screenshot saved to /tmp/screenshot-2026-02-17T12-00-00-abc123.png
@@ -713,7 +758,22 @@ agent-browser --executable-path /path/to/chromium open example.com
713
758
  AGENT_BROWSER_EXECUTABLE_PATH=/path/to/chromium agent-browser open example.com
714
759
  ```
715
760
 
716
- ### Serverless Example (Vercel/AWS Lambda)
761
+ ### Serverless (Vercel)
762
+
763
+ Run agent-browser + Chrome in an ephemeral Vercel Sandbox microVM. No external server needed:
764
+
765
+ ```typescript
766
+ import { Sandbox } from "@vercel/sandbox";
767
+
768
+ const sandbox = await Sandbox.create({ runtime: "node24" });
769
+ await sandbox.runCommand("agent-browser", ["open", "https://example.com"]);
770
+ const result = await sandbox.runCommand("agent-browser", ["screenshot", "--json"]);
771
+ await sandbox.stop();
772
+ ```
773
+
774
+ See the [environments example](examples/environments/) for a working demo with a UI and deploy-to-Vercel button.
775
+
776
+ ### Serverless (AWS Lambda)
717
777
 
718
778
  ```typescript
719
779
  import chromium from '@sparticuz/chromium';
@@ -1119,7 +1179,6 @@ To enable Browserbase, use the `-p` flag:
1119
1179
 
1120
1180
  ```bash
1121
1181
  export BROWSERBASE_API_KEY="your-api-key"
1122
- export BROWSERBASE_PROJECT_ID="your-project-id"
1123
1182
  agent-browser -p browserbase open https://example.com
1124
1183
  ```
1125
1184
 
@@ -1128,13 +1187,12 @@ Or use environment variables for CI/scripts:
1128
1187
  ```bash
1129
1188
  export AGENT_BROWSER_PROVIDER=browserbase
1130
1189
  export BROWSERBASE_API_KEY="your-api-key"
1131
- export BROWSERBASE_PROJECT_ID="your-project-id"
1132
1190
  agent-browser open https://example.com
1133
1191
  ```
1134
1192
 
1135
1193
  When enabled, agent-browser connects to a Browserbase session instead of launching a local browser. All commands work identically.
1136
1194
 
1137
- Get your API key and project ID from the [Browserbase Dashboard](https://browserbase.com/overview).
1195
+ Get your API key from the [Browserbase Dashboard](https://browserbase.com/overview).
1138
1196
 
1139
1197
  ### Browser Use
1140
1198
 
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1 +1 @@
1
- {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAqBpE,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,EAsIT,MAAM,YAAY,CAAC;AAQpB;;;GAGG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,CAAC,CAAC,KAAK,EAAE,eAAe,KAAK,IAAI,CAAC,GAAG,IAAI,GAClD,IAAI,CAEN;AAQD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,GAAG,KAAK,CAqDzE;AAKD,wBAAgB,gBAAgB,IAAI,IAAI,CAuBvC;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CA6CjG"}
1
+ {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAqBpE,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,EAsIT,MAAM,YAAY,CAAC;AAQpB;;;GAGG;AACH,wBAAgB,0BAA0B,CACxC,QAAQ,EAAE,CAAC,CAAC,KAAK,EAAE,eAAe,KAAK,IAAI,CAAC,GAAG,IAAI,GAClD,IAAI,CAEN;AAQD;;;GAGG;AACH,wBAAgB,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,GAAG,KAAK,CAqDzE;AAKD,wBAAgB,gBAAgB,IAAI,IAAI,CAuBvC;AAED;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CA6CjG"}
package/dist/actions.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import * as fs from 'fs';
2
2
  import * as path from 'path';
3
+ import { exec } from 'node:child_process';
3
4
  import { mkdirSync } from 'node:fs';
4
5
  import { getAppDir } from './daemon.js';
5
6
  import { checkPolicy, describeAction, getActionCategory, loadPolicyFile, initPolicyReloader, reloadPolicyIfChanged, } from './action-policy.js';
@@ -229,6 +230,10 @@ async function dispatchAction(command, browser) {
229
230
  return await handleReload(command, browser);
230
231
  case 'url':
231
232
  return await handleUrl(command, browser);
233
+ case 'cdp_url':
234
+ return handleCdpUrl(command, browser);
235
+ case 'inspect':
236
+ return await handleInspect(command, browser);
232
237
  case 'title':
233
238
  return await handleTitle(command, browser);
234
239
  case 'getattribute':
@@ -1065,11 +1070,29 @@ async function handlePermissions(command, browser) {
1065
1070
  });
1066
1071
  }
1067
1072
  async function handleViewport(command, browser) {
1068
- await browser.setViewport(command.width, command.height);
1069
- return successResponse(command.id, {
1073
+ if (command.deviceScaleFactor && command.deviceScaleFactor !== 1) {
1074
+ await browser.setViewport(command.width, command.height);
1075
+ await browser.setDeviceScaleFactor(command.deviceScaleFactor, command.width, command.height, false);
1076
+ }
1077
+ else {
1078
+ // deviceScaleFactor is 1 or undefined -- clear any previously-set CDP
1079
+ // Emulation.setDeviceMetricsOverride so stale DPR doesn't persist.
1080
+ try {
1081
+ await browser.clearDeviceMetricsOverride();
1082
+ }
1083
+ catch {
1084
+ // Ignore if override was never set
1085
+ }
1086
+ await browser.setViewport(command.width, command.height);
1087
+ }
1088
+ const result = {
1070
1089
  width: command.width,
1071
1090
  height: command.height,
1072
- });
1091
+ };
1092
+ if (command.deviceScaleFactor !== undefined) {
1093
+ result.deviceScaleFactor = command.deviceScaleFactor;
1094
+ }
1095
+ return successResponse(command.id, result);
1073
1096
  }
1074
1097
  async function handleUserAgent(command, browser) {
1075
1098
  const page = browser.getPage();
@@ -1127,6 +1150,62 @@ async function handleUrl(command, browser) {
1127
1150
  const page = browser.getPage();
1128
1151
  return successResponse(command.id, { url: page.url() });
1129
1152
  }
1153
+ function handleCdpUrl(command, browser) {
1154
+ const cdpUrl = browser.getCdpUrl();
1155
+ if (!cdpUrl) {
1156
+ return errorResponse(command.id, 'CDP URL not available (browser may not be launched)');
1157
+ }
1158
+ return successResponse(command.id, { cdpUrl });
1159
+ }
1160
+ async function handleInspect(command, browser) {
1161
+ const cdpUrl = browser.getCdpUrl();
1162
+ if (!cdpUrl) {
1163
+ return errorResponse(command.id, 'CDP URL not available (browser may not be launched)');
1164
+ }
1165
+ // Shut down any existing inspect server so we always target the current page
1166
+ browser.stopInspectServer();
1167
+ const stripped = cdpUrl.replace(/^(wss?|https?):\/\//, '');
1168
+ const hostPort = stripped.split('/')[0];
1169
+ // Get the target ID so the inspect server can create its own dedicated CDP session
1170
+ const page = browser.getPage();
1171
+ const context = page.context();
1172
+ const tmpCdp = await context.newCDPSession(page);
1173
+ let targetId = '';
1174
+ try {
1175
+ const info = await tmpCdp.send('Target.getTargetInfo');
1176
+ targetId = info?.targetInfo?.targetId || '';
1177
+ }
1178
+ catch (err) {
1179
+ console.error('[inspect] getTargetInfo failed:', err);
1180
+ }
1181
+ await tmpCdp.detach();
1182
+ if (!targetId) {
1183
+ return errorResponse(command.id, 'Could not determine target ID for active page');
1184
+ }
1185
+ const { InspectServer } = await import('./inspect-server.js');
1186
+ const server = new InspectServer({
1187
+ chromeHostPort: hostPort,
1188
+ targetId,
1189
+ chromeWsUrl: cdpUrl,
1190
+ });
1191
+ await server.start();
1192
+ browser.setInspectServer(server);
1193
+ const url = `http://127.0.0.1:${server.port}`;
1194
+ openUrlInBrowser(url);
1195
+ return successResponse(command.id, { opened: true, url });
1196
+ }
1197
+ function openUrlInBrowser(url) {
1198
+ const platform = process.platform;
1199
+ const cmd = platform === 'darwin'
1200
+ ? `open "${url}"`
1201
+ : platform === 'win32'
1202
+ ? `start "" "${url}"`
1203
+ : `xdg-open "${url}"`;
1204
+ exec(cmd, (err) => {
1205
+ if (err)
1206
+ console.error('[inspect] Failed to open browser:', err.message);
1207
+ });
1208
+ }
1130
1209
  async function handleTitle(command, browser) {
1131
1210
  const page = browser.getPage();
1132
1211
  const title = await page.title();
@@ -1141,7 +1220,8 @@ async function handleGetAttribute(command, browser) {
1141
1220
  async function handleGetText(command, browser) {
1142
1221
  const page = browser.getPage();
1143
1222
  const locator = browser.getLocator(command.selector);
1144
- const text = await locator.textContent();
1223
+ const inner = await locator.innerText();
1224
+ const text = inner || (await locator.textContent()) || '';
1145
1225
  return successResponse(command.id, { text, origin: page.url() });
1146
1226
  }
1147
1227
  async function handleIsVisible(command, browser) {