agent-browser-loop 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  # Agent Browser Loop - CLI Reference
2
2
 
3
+ <!-- TIP: Check package.json for dev server scripts to find the port to test (e.g. dev:basic, dev:next) -->
4
+
3
5
  Complete CLI reference for `agent-browser`.
4
6
 
5
7
  ## Commands
@@ -18,12 +20,19 @@ agent-browser open <url> [options]
18
20
  | `--headed` | Show browser window (default: headless) |
19
21
  | `--new, -n` | Create new session with auto-generated ID |
20
22
  | `--session, -s <id>` | Target session (from `--new`) |
23
+ | `--profile, -p <name>` | Load profile and save back on close |
24
+ | `--no-save` | Don't save profile changes on close (read-only) |
25
+ | `--width, -W <pixels>` | Viewport width (default: 1280) |
26
+ | `--height, -H <pixels>` | Viewport height (default: 720) |
21
27
  | `--json` | Output as JSON |
22
28
 
23
29
  **Examples:**
24
30
  ```bash
25
31
  agent-browser open http://localhost:3000
26
32
  agent-browser open http://localhost:3000 --headed
33
+ agent-browser open http://localhost:3000 --width 1920 --height 1080
34
+ agent-browser open http://localhost:3000 --profile admin # Loads and auto-saves on close
35
+ agent-browser open http://localhost:3000 --profile admin --no-save # Read-only
27
36
  agent-browser open --new http://localhost:3000 # Output: Session: swift-fox
28
37
  ```
29
38
 
@@ -54,6 +63,7 @@ agent-browser act <actions...> [options]
54
63
  | Type | `type:<ref>:<text>` | `type:input_0:hello` |
55
64
  | Press key | `press:<key>` | `press:Enter` |
56
65
  | Scroll | `scroll:<direction>[:<amount>]` | `scroll:down:500` |
66
+ | Resize | `resize:<width>:<height>` | `resize:1920:1080` |
57
67
  | Select | `select:<ref>:<value>` | `select:select_0:option1` |
58
68
  | Check | `check:<ref>` | `check:checkbox_0` |
59
69
  | Uncheck | `uncheck:<ref>` | `uncheck:checkbox_0` |
@@ -213,6 +223,127 @@ agent-browser screenshot
213
223
 
214
224
  ---
215
225
 
226
+ ### `resize <width> <height>`
227
+
228
+ Resize the browser viewport mid-session.
229
+
230
+ ```bash
231
+ agent-browser resize <width> <height> [options]
232
+ ```
233
+
234
+ **Options:**
235
+ | Flag | Description |
236
+ |------|-------------|
237
+ | `--session, -s <id>` | Target session |
238
+ | `--json` | Output as JSON |
239
+
240
+ **Examples:**
241
+ ```bash
242
+ agent-browser resize 1920 1080
243
+ agent-browser resize 375 667 # Mobile viewport
244
+ agent-browser act "resize:1920:1080" # Via act command
245
+ ```
246
+
247
+ ---
248
+
249
+ ### `profile <subcommand>`
250
+
251
+ Manage session storage profiles (cookies + localStorage). The `<name>` in all commands is an arbitrary identifier you choose (e.g., `admin`, `testuser`, `staging`).
252
+
253
+ #### `profile list`
254
+
255
+ List all available profiles.
256
+
257
+ ```bash
258
+ agent-browser profile list [--json]
259
+ ```
260
+
261
+ #### `profile show <name>`
262
+
263
+ Show profile contents.
264
+
265
+ ```bash
266
+ agent-browser profile show <name> [--json]
267
+ ```
268
+
269
+ #### `profile save <name>`
270
+
271
+ Save current session storage to a profile.
272
+
273
+ ```bash
274
+ agent-browser profile save <name> [options]
275
+ ```
276
+
277
+ **Options:**
278
+ | Flag | Description |
279
+ |------|-------------|
280
+ | `--session, -s <id>` | Source session |
281
+ | `--global` | Save to global profiles (`~/.config/agent-browser/profiles/`) |
282
+ | `--private` | Save to private profiles (gitignored) |
283
+ | `--description, -d <text>` | Profile description |
284
+
285
+ #### `profile delete <name>`
286
+
287
+ Delete a profile.
288
+
289
+ ```bash
290
+ agent-browser profile delete <name>
291
+ ```
292
+
293
+ #### `profile import <name> <path>`
294
+
295
+ Import profile from a Playwright storage state JSON file.
296
+
297
+ ```bash
298
+ agent-browser profile import <name> <path> [--global] [--private]
299
+ ```
300
+
301
+ #### `profile capture <name>`
302
+
303
+ Opens a headed browser, lets you interact manually (log in, etc.), then saves the session when you press Enter in the terminal.
304
+
305
+ ```bash
306
+ agent-browser profile capture <name> --url <url> [options]
307
+ ```
308
+
309
+ **Options:**
310
+ | Flag | Description |
311
+ |------|-------------|
312
+ | `--url <url>` | URL to navigate to (required) |
313
+ | `--global` | Save to global profiles |
314
+ | `--private` | Save to private profiles |
315
+ | `--description, -d <text>` | Profile description |
316
+
317
+ **Examples:**
318
+ ```bash
319
+ # Capture a session (opens browser, you log in, press Enter to save)
320
+ agent-browser profile capture admin --url http://localhost:3000/login
321
+ agent-browser profile capture testuser --url http://localhost:3000/login
322
+
323
+ # Save from an already-open session instead
324
+ agent-browser open http://localhost:3000/login --headed
325
+ # ... log in manually ...
326
+ agent-browser profile save admin --description "Admin account"
327
+
328
+ # Use profile (loads saved cookies/localStorage)
329
+ agent-browser open http://localhost:3000/dashboard --profile admin
330
+
331
+ # List profiles
332
+ agent-browser profile list
333
+
334
+ # Import existing Playwright storage state file
335
+ agent-browser profile import staging ./storage-state.json --global
336
+ ```
337
+
338
+ **Profile Storage Locations:**
339
+ - Local: `.agent-browser/profiles/<name>.json` (project-scoped, shareable via git)
340
+ - Private: `.agent-browser/profiles/.private/<name>.json` (gitignored)
341
+ - Global: `~/.config/agent-browser/profiles/<name>.json` (user-level)
342
+
343
+ Resolution order: private -> local -> global
344
+
345
+ ---
346
+
216
347
  ### `close`
217
348
 
218
349
  Close browser session or stop daemon.
@@ -3,12 +3,15 @@ name: agent-browser-loop
3
3
  description: Use when an agent must drive a live browser session in a back-and-forth loop (state -> explicit actions -> state) for UI validation, reproducible QA, or debugging UI behavior. Prefer this over one-shot CLI usage when an agent needs inspectable, stepwise control.
4
4
  ---
5
5
 
6
+
6
7
  # Agent Browser Loop
7
8
 
8
9
  Control a browser via CLI. Execute actions, read state, and verify UI changes in a stepwise loop.
9
10
 
10
11
  ## Quick Start
11
12
 
13
+ > **TIP**: Check package.json for dev server scripts to find the port to test
14
+
12
15
  ```bash
13
16
  # Open a URL (starts browser daemon automatically)
14
17
  agent-browser open http://localhost:3000
@@ -165,13 +168,42 @@ agent-browser close
165
168
  # Headed mode (visible browser)
166
169
  agent-browser open http://localhost:3000 --headed
167
170
 
168
- # JSON output
169
- agent-browser state --json
171
+ # Custom viewport size
172
+ agent-browser open http://localhost:3000 --width 1920 --height 1080
173
+
174
+ # Resize mid-session
175
+ agent-browser resize 1920 1080
176
+ ```
177
+
178
+ ## Profiles (Session Storage)
179
+
180
+ Save and reuse cookies/localStorage across sessions. The profile name (e.g., `admin`, `testuser`) is an arbitrary identifier you choose.
170
181
 
171
- # Skip state in response
172
- agent-browser act click:button_0 --no-state
182
+ ```bash
183
+ # Capture: opens browser, you interact, press Enter in terminal to save
184
+ agent-browser profile capture admin --url http://localhost:3000/login
185
+
186
+ # Or save from an already-open session
187
+ agent-browser open http://localhost:3000/login --headed
188
+ # ... log in manually ...
189
+ agent-browser profile save admin
190
+
191
+ # Use saved profile - auto-saves updated tokens on close
192
+ agent-browser open http://localhost:3000/dashboard --profile admin
193
+ # ... use the app (tokens may refresh) ...
194
+ agent-browser close # Updated tokens saved back to profile
195
+
196
+ # Use --no-save for read-only (don't save changes back)
197
+ agent-browser open http://localhost:3000 --profile admin --no-save
198
+
199
+ # List/manage profiles
200
+ agent-browser profile list
201
+ agent-browser profile show admin
202
+ agent-browser profile delete admin
173
203
  ```
174
204
 
205
+ Profiles are stored locally (`.agent-browser/profiles/`) or globally (`~/.config/agent-browser/profiles/`).
206
+
175
207
  ## Multi-Session
176
208
 
177
209
  Run multiple browsers in parallel with `--new`:
package/README.md CHANGED
@@ -57,6 +57,45 @@ agent-browser state
57
57
 
58
58
  Every command returns the current page state - interactive elements, form values, scroll position, console errors, network failures. The agent sees exactly what it needs to verify the code works or debug why it doesn't.
59
59
 
60
+ ## Profiles (Session Storage)
61
+
62
+ Save and reuse login sessions across runs:
63
+
64
+ ```bash
65
+ # Save current session to a profile
66
+ agent-browser profile save admin
67
+
68
+ # Use profile (auto-saves updated tokens on close)
69
+ agent-browser open http://localhost:3000 --profile admin
70
+
71
+ # Use --no-save for read-only access
72
+ agent-browser open http://localhost:3000 --profile admin --no-save
73
+
74
+ # Other commands
75
+ agent-browser profile list
76
+ agent-browser profile capture admin --url http://localhost:3000/login
77
+ agent-browser profile delete admin
78
+ ```
79
+
80
+ Profiles store cookies and localStorage. Use `--global` for user-level profiles, `--private` for gitignored project profiles.
81
+
82
+ ## Multi-Session
83
+
84
+ Run multiple browser sessions in parallel:
85
+
86
+ ```bash
87
+ # Create sessions with auto-generated IDs
88
+ agent-browser open --new http://localhost:3000 # Output: Session: swift-fox
89
+ agent-browser open --new http://localhost:3000 # Output: Session: calm-river
90
+
91
+ # Target specific sessions
92
+ agent-browser act -s swift-fox click:button_0
93
+ agent-browser state -s calm-river
94
+
95
+ # List all sessions
96
+ agent-browser sessions
97
+ ```
98
+
60
99
  ## CLI Reference
61
100
 
62
101
  | Command | Description |
@@ -102,23 +141,6 @@ agent-browser wait --timeout 60000 # Custom timeout
102
141
  --no-state # Skip state in response
103
142
  ```
104
143
 
105
- ## Multi-Session
106
-
107
- Run multiple browser sessions in parallel:
108
-
109
- ```bash
110
- # Create sessions with auto-generated IDs
111
- agent-browser open --new http://localhost:3000 # Output: Session: swift-fox
112
- agent-browser open --new http://localhost:3000 # Output: Session: calm-river
113
-
114
- # Target specific sessions
115
- agent-browser act -s swift-fox click:button_0
116
- agent-browser state -s calm-river
117
-
118
- # List all sessions
119
- agent-browser sessions
120
- ```
121
-
122
144
  ## State Output
123
145
 
124
146
  ```
@@ -172,6 +194,8 @@ export default defineBrowserConfig({
172
194
  });
173
195
  ```
174
196
 
197
+ On macOS, headless system Chrome can crash during AppKit startup. By default, the CLI falls back to bundled Playwright Chromium when `headless: true`. If you explicitly want system Chrome in headless mode, set `allowSystemChromeHeadless: true`.
198
+
175
199
  ## What This Is NOT For
176
200
 
177
201
  This tool is for agents to test their own code. It is **not** for:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-browser-loop",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "Let your AI coding agent drive a browser to verify its own work",
5
5
  "license": "MIT",
6
6
  "author": "Jason Silberman",
package/src/actions.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { Page, Request } from "playwright";
2
+ import type { ElementRefStore } from "./ref-store";
2
3
  import type {
3
4
  ClickOptions,
4
5
  NavigateOptions,
@@ -7,27 +8,26 @@ import type {
7
8
  } from "./types";
8
9
 
9
10
  /**
10
- * Get a locator for an element by ref or index
11
- * After calling getState(), elements have data-ref attributes injected
11
+ * Get a locator for an element by ref or index using the ref store
12
+ * The ref store contains selectors generated during getState()
12
13
  */
13
- function getLocator(page: Page, options: { ref?: string; index?: number }) {
14
- if (options.ref) {
15
- return page.locator(`[data-ref="${options.ref}"]`);
16
- }
17
- if (options.index !== undefined) {
18
- // Use data-index (injected by getState). Fallback to legacy e{index} refs.
19
- return page.locator(
20
- `[data-index="${options.index}"], [data-ref="e${options.index}"]`,
21
- );
22
- }
23
- throw new Error("Must provide either ref or index");
14
+ async function getLocator(
15
+ page: Page,
16
+ refStore: ElementRefStore,
17
+ options: { ref?: string; index?: number },
18
+ ) {
19
+ return await refStore.resolveLocator(page, options);
24
20
  }
25
21
 
26
22
  /**
27
23
  * Click an element
28
24
  */
29
- export async function click(page: Page, options: ClickOptions): Promise<void> {
30
- const locator = getLocator(page, options);
25
+ export async function click(
26
+ page: Page,
27
+ refStore: ElementRefStore,
28
+ options: ClickOptions,
29
+ ): Promise<void> {
30
+ const locator = await getLocator(page, refStore, options);
31
31
 
32
32
  const clickOptions: Parameters<typeof locator.click>[0] = {
33
33
  button: options.button,
@@ -44,8 +44,12 @@ export async function click(page: Page, options: ClickOptions): Promise<void> {
44
44
  /**
45
45
  * Type text into an element
46
46
  */
47
- export async function type(page: Page, options: TypeOptions): Promise<void> {
48
- const locator = getLocator(page, options);
47
+ export async function type(
48
+ page: Page,
49
+ refStore: ElementRefStore,
50
+ options: TypeOptions,
51
+ ): Promise<void> {
52
+ const locator = await getLocator(page, refStore, options);
49
53
 
50
54
  // Clear existing text if requested
51
55
  if (options.clear) {
@@ -129,9 +133,10 @@ export async function waitForElement(
129
133
  */
130
134
  export async function hover(
131
135
  page: Page,
136
+ refStore: ElementRefStore,
132
137
  options: { ref?: string; index?: number },
133
138
  ): Promise<void> {
134
- const locator = getLocator(page, options);
139
+ const locator = await getLocator(page, refStore, options);
135
140
  await locator.hover();
136
141
  }
137
142
 
@@ -140,9 +145,10 @@ export async function hover(
140
145
  */
141
146
  export async function select(
142
147
  page: Page,
148
+ refStore: ElementRefStore,
143
149
  options: { ref?: string; index?: number; value: string | string[] },
144
150
  ): Promise<void> {
145
- const locator = getLocator(page, options);
151
+ const locator = await getLocator(page, refStore, options);
146
152
  await locator.selectOption(options.value);
147
153
  }
148
154
 
package/src/browser.ts CHANGED
@@ -3,6 +3,7 @@ import { chromium } from "playwright";
3
3
  import * as actions from "./actions";
4
4
  import { findChromeExecutable } from "./chrome";
5
5
  import { log } from "./log";
6
+ import { ElementRefStore } from "./ref-store";
6
7
  import { formatStateText, getState } from "./state";
7
8
  import type {
8
9
  BrowserConfig,
@@ -33,12 +34,14 @@ export class AgentBrowser {
33
34
  private networkLogLimit: number;
34
35
  private usePersistentContext = false;
35
36
  private lastState: BrowserState | null = null;
37
+ private refStore: ElementRefStore = new ElementRefStore();
36
38
 
37
39
  constructor(options: AgentBrowserOptions = {}) {
38
40
  this.config = {
39
41
  headless: options.headless ?? true,
40
42
  executablePath: options.executablePath,
41
43
  useSystemChrome: options.useSystemChrome ?? true,
44
+ allowSystemChromeHeadless: options.allowSystemChromeHeadless,
42
45
  viewportWidth: options.viewportWidth ?? 1280,
43
46
  viewportHeight: options.viewportHeight ?? 720,
44
47
  userDataDir: options.userDataDir,
@@ -60,8 +63,27 @@ export class AgentBrowser {
60
63
  throw new Error("Browser already started");
61
64
  }
62
65
 
63
- const resolvedExecutablePath = this.config.useSystemChrome
64
- ? this.config.executablePath || findChromeExecutable()
66
+ const isDarwin = process.platform === "darwin";
67
+ let useSystemChrome = this.config.useSystemChrome ?? true;
68
+ let executablePath = this.config.executablePath;
69
+
70
+ if (
71
+ isDarwin &&
72
+ this.config.headless &&
73
+ (useSystemChrome || executablePath) &&
74
+ !this.config.allowSystemChromeHeadless
75
+ ) {
76
+ log
77
+ .withMetadata({ executablePath })
78
+ .warn(
79
+ "Headless system Chrome can crash on macOS. Falling back to bundled Chromium. Set allowSystemChromeHeadless to true to override.",
80
+ );
81
+ useSystemChrome = false;
82
+ executablePath = undefined;
83
+ }
84
+
85
+ const resolvedExecutablePath = useSystemChrome
86
+ ? executablePath || findChromeExecutable()
65
87
  : undefined;
66
88
 
67
89
  log
@@ -160,6 +182,7 @@ export class AgentBrowser {
160
182
  this.networkLogs = [];
161
183
  this.networkCaptureEnabled = false;
162
184
  this.usePersistentContext = false;
185
+ this.refStore.clear();
163
186
  }
164
187
 
165
188
  /**
@@ -190,15 +213,21 @@ export class AgentBrowser {
190
213
  options?: Omit<NavigateOptions, "url">,
191
214
  ): Promise<void> {
192
215
  await actions.navigate(this.getPage(), { url, ...options });
216
+ this.refStore.clear();
193
217
  }
194
218
 
195
219
  /**
196
220
  * Get rich state of the current page
197
- * Also injects data-ref attributes for element targeting
221
+ * Stores element refs server-side (no DOM modification)
198
222
  */
199
223
  async getState(options?: GetStateOptions): Promise<BrowserState> {
200
- // getState now handles ref injection internally
201
- const state = await getState(this.getPage(), this.getContext(), options);
224
+ // getState now stores refs in this.refStore instead of injecting into DOM
225
+ const state = await getState(
226
+ this.getPage(),
227
+ this.getContext(),
228
+ this.refStore,
229
+ options,
230
+ );
202
231
  const result = {
203
232
  ...state,
204
233
  errors: {
@@ -254,14 +283,14 @@ export class AgentBrowser {
254
283
  * Click an element
255
284
  */
256
285
  async click(options: ClickOptions): Promise<void> {
257
- await actions.click(this.getPage(), options);
286
+ await actions.click(this.getPage(), this.refStore, options);
258
287
  }
259
288
 
260
289
  /**
261
290
  * Type text into an element
262
291
  */
263
292
  async type(options: TypeOptions): Promise<void> {
264
- await actions.type(this.getPage(), options);
293
+ await actions.type(this.getPage(), this.refStore, options);
265
294
  }
266
295
 
267
296
  /**
@@ -429,7 +458,7 @@ export class AgentBrowser {
429
458
  * Hover over an element
430
459
  */
431
460
  async hover(options: { ref?: string; index?: number }): Promise<void> {
432
- await actions.hover(this.getPage(), options);
461
+ await actions.hover(this.getPage(), this.refStore, options);
433
462
  }
434
463
 
435
464
  /**
@@ -440,7 +469,7 @@ export class AgentBrowser {
440
469
  index?: number;
441
470
  value: string | string[];
442
471
  }): Promise<void> {
443
- await actions.select(this.getPage(), options);
472
+ await actions.select(this.getPage(), this.refStore, options);
444
473
  }
445
474
 
446
475
  /**
@@ -453,6 +482,28 @@ export class AgentBrowser {
453
482
  return actions.screenshot(this.getPage(), options);
454
483
  }
455
484
 
485
+ /**
486
+ * Resize the viewport
487
+ */
488
+ async resize(width: number, height: number): Promise<void> {
489
+ await this.getPage().setViewportSize({ width, height });
490
+ this.config.viewportWidth = width;
491
+ this.config.viewportHeight = height;
492
+ }
493
+
494
+ /**
495
+ * Get current viewport size
496
+ */
497
+ getViewportSize(): { width: number; height: number } {
498
+ const size = this.getPage().viewportSize();
499
+ return (
500
+ size ?? {
501
+ width: this.config.viewportWidth!,
502
+ height: this.config.viewportHeight!,
503
+ }
504
+ );
505
+ }
506
+
456
507
  /**
457
508
  * Get captured console logs
458
509
  */
@@ -554,6 +605,13 @@ export class AgentBrowser {
554
605
  }
555
606
  return state;
556
607
  }
608
+
609
+ /**
610
+ * Get the element ref store (for advanced usage/testing)
611
+ */
612
+ getRefStore(): ElementRefStore {
613
+ return this.refStore;
614
+ }
557
615
  }
558
616
 
559
617
  /**