@kritchoff/agent-browser 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +903 -0
  3. package/README.sdk.md +77 -0
  4. package/bin/agent-browser-linux-x64 +0 -0
  5. package/bin/agent-browser.js +109 -0
  6. package/dist/actions.d.ts +17 -0
  7. package/dist/actions.d.ts.map +1 -0
  8. package/dist/actions.js +1427 -0
  9. package/dist/actions.js.map +1 -0
  10. package/dist/browser.d.ts +474 -0
  11. package/dist/browser.d.ts.map +1 -0
  12. package/dist/browser.js +1566 -0
  13. package/dist/browser.js.map +1 -0
  14. package/dist/cdp-client.d.ts +103 -0
  15. package/dist/cdp-client.d.ts.map +1 -0
  16. package/dist/cdp-client.js +223 -0
  17. package/dist/cdp-client.js.map +1 -0
  18. package/dist/daemon.d.ts +60 -0
  19. package/dist/daemon.d.ts.map +1 -0
  20. package/dist/daemon.js +401 -0
  21. package/dist/daemon.js.map +1 -0
  22. package/dist/dualmode-config.d.ts +37 -0
  23. package/dist/dualmode-config.d.ts.map +1 -0
  24. package/dist/dualmode-config.js +44 -0
  25. package/dist/dualmode-config.js.map +1 -0
  26. package/dist/dualmode-fetcher.d.ts +60 -0
  27. package/dist/dualmode-fetcher.d.ts.map +1 -0
  28. package/dist/dualmode-fetcher.js +449 -0
  29. package/dist/dualmode-fetcher.js.map +1 -0
  30. package/dist/dualmode-types.d.ts +183 -0
  31. package/dist/dualmode-types.d.ts.map +1 -0
  32. package/dist/dualmode-types.js +8 -0
  33. package/dist/dualmode-types.js.map +1 -0
  34. package/dist/ios-actions.d.ts +11 -0
  35. package/dist/ios-actions.d.ts.map +1 -0
  36. package/dist/ios-actions.js +228 -0
  37. package/dist/ios-actions.js.map +1 -0
  38. package/dist/ios-manager.d.ts +266 -0
  39. package/dist/ios-manager.d.ts.map +1 -0
  40. package/dist/ios-manager.js +1073 -0
  41. package/dist/ios-manager.js.map +1 -0
  42. package/dist/protocol.d.ts +26 -0
  43. package/dist/protocol.d.ts.map +1 -0
  44. package/dist/protocol.js +832 -0
  45. package/dist/protocol.js.map +1 -0
  46. package/dist/snapshot.d.ts +83 -0
  47. package/dist/snapshot.d.ts.map +1 -0
  48. package/dist/snapshot.js +653 -0
  49. package/dist/snapshot.js.map +1 -0
  50. package/dist/stream-server.d.ts +117 -0
  51. package/dist/stream-server.d.ts.map +1 -0
  52. package/dist/stream-server.js +305 -0
  53. package/dist/stream-server.js.map +1 -0
  54. package/dist/types.d.ts +742 -0
  55. package/dist/types.d.ts.map +1 -0
  56. package/dist/types.js +2 -0
  57. package/dist/types.js.map +1 -0
  58. package/docker-compose.sdk.yml +45 -0
  59. package/package.json +85 -0
  60. package/scripts/benchmark.sh +80 -0
  61. package/scripts/build-all-platforms.sh +68 -0
  62. package/scripts/check-version-sync.js +39 -0
  63. package/scripts/copy-native.js +36 -0
  64. package/scripts/fast_reset.sh +108 -0
  65. package/scripts/postinstall.js +235 -0
  66. package/scripts/publish_images.sh +55 -0
  67. package/scripts/snapshot_manager.sh +293 -0
  68. package/scripts/start-android-agent.sh +49 -0
  69. package/scripts/sync-version.js +69 -0
  70. package/scripts/vaccine-run +26 -0
  71. package/sdk.sh +153 -0
  72. package/skills/agent-browser/SKILL.md +217 -0
  73. package/skills/agent-browser/references/authentication.md +202 -0
  74. package/skills/agent-browser/references/commands.md +259 -0
  75. package/skills/agent-browser/references/proxy-support.md +188 -0
  76. package/skills/agent-browser/references/session-management.md +193 -0
  77. package/skills/agent-browser/references/snapshot-refs.md +194 -0
  78. package/skills/agent-browser/references/video-recording.md +173 -0
  79. package/skills/agent-browser/templates/authenticated-session.sh +97 -0
  80. package/skills/agent-browser/templates/capture-workflow.sh +69 -0
  81. package/skills/agent-browser/templates/form-automation.sh +62 -0
  82. package/skills/skill-creator/LICENSE.txt +202 -0
  83. package/skills/skill-creator/SKILL.md +356 -0
  84. package/skills/skill-creator/references/output-patterns.md +82 -0
  85. package/skills/skill-creator/references/workflows.md +28 -0
  86. package/skills/skill-creator/scripts/init_skill.py +303 -0
  87. package/skills/skill-creator/scripts/package_skill.py +113 -0
  88. package/skills/skill-creator/scripts/quick_validate.py +95 -0
@@ -0,0 +1,26 @@
1
+ #!/bin/bash
2
+ exec 2>&1
3
+
4
+ echo "[vaccine] Waiting for emulator..."
5
+ adb wait-for-device
6
+
7
+ echo "[vaccine] Waiting for boot completion..."
8
+ while [[ -z $(adb shell getprop sys.boot_completed) ]]; do sleep 1; done
9
+
10
+ echo "[vaccine] Waiting for Package Manager..."
11
+ while ! adb shell pm list packages > /dev/null 2>&1; do sleep 1; done
12
+
13
+ echo "[vaccine] Applying permissions to org.chromium.chrome (WootzApp)..."
14
+ adb shell pm grant org.chromium.chrome android.permission.ACCESS_FINE_LOCATION || true
15
+ adb shell pm grant org.chromium.chrome android.permission.CAMERA || true
16
+ adb shell pm grant org.chromium.chrome android.permission.RECORD_AUDIO || true
17
+
18
+ # Fallback for standard Chrome if present
19
+ adb shell pm grant com.android.chrome android.permission.ACCESS_FINE_LOCATION || true
20
+ adb shell pm grant com.android.chrome android.permission.CAMERA || true
21
+ adb shell pm grant com.android.chrome android.permission.RECORD_AUDIO || true
22
+
23
+ echo "[vaccine] Permissions granted."
24
+
25
+ # Prevent restart
26
+ touch /etc/services.d/vaccine/down
package/sdk.sh ADDED
@@ -0,0 +1,153 @@
1
+ #!/bin/bash
2
+ # Agent Browser SDK Wrapper
3
+ #
4
+ # A user-friendly entry point for AI Agents to interact with the Android Browser Environment.
5
+ # Handles "Lazy Snapshot" logic for instant startups.
6
+ #
7
+ # Usage:
8
+ # ./sdk.sh start - Start the environment (Cold boot 1st time, Warm boot after)
9
+ # ./sdk.sh stop - Stop the environment
10
+ # ./sdk.sh reset - Fast Reset the browser (15s)
11
+ # ./sdk.sh agent ... - Run agent commands (e.g. ./sdk.sh agent open google.com)
12
+ #
13
+
14
+ set -e
15
+
16
+ SDK_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
17
+ CACHE_DIR="$SDK_ROOT/cache"
18
+ SNAPSHOT_FILE="$CACHE_DIR/baseline.tar.gz"
19
+
20
+ # Default to production/local build unless --dist is used
21
+ COMPOSE_FILE="docker-compose.prod.yml"
22
+
23
+ # Colors
24
+ GREEN='\033[0;32m'
25
+ BLUE='\033[0;34m'
26
+ YELLOW='\033[1;33m'
27
+ RED='\033[0;31m'
28
+ NC='\033[0m' # No Color
29
+
30
+ # Parse global options
31
+ while [[ $# -gt 0 ]]; do
32
+ case $1 in
33
+ --dist)
34
+ COMPOSE_FILE="docker-compose.sdk.yml"
35
+ shift
36
+ ;;
37
+ *)
38
+ break
39
+ ;;
40
+ esac
41
+ done
42
+
43
+ # Ensure cache dir exists
44
+ mkdir -p "$CACHE_DIR"
45
+
46
+ log_info() { echo -e "${BLUE}[SDK]${NC} $1"; }
47
+ log_success() { echo -e "${GREEN}[SDK]${NC} $1"; }
48
+ log_warn() { echo -e "${YELLOW}[SDK]${NC} $1"; }
49
+ log_error() { echo -e "${RED}[SDK]${NC} $1"; }
50
+
51
+ cmd_start() {
52
+ log_info "Initializing Agent Environment (using $COMPOSE_FILE)..."
53
+
54
+ # Export COMPOSE_FILE so start.sh (which might call docker compose) uses it
55
+ # Note: start.sh needs to be updated to respect COMPOSE_FILE if it doesn't already
56
+ export COMPOSE_FILE="$COMPOSE_FILE"
57
+
58
+ if [ -f "$SNAPSHOT_FILE" ]; then
59
+ # === WARM START ===
60
+ log_success "Found cached baseline snapshot. Performing WARM BOOT..."
61
+ "$SDK_ROOT/start.sh" --snapshot "$SNAPSHOT_FILE"
62
+ else
63
+ # === COLD START & FREEZE ===
64
+ log_warn "No baseline found. Performing FIRST RUN SETUP (Cold Boot)..."
65
+ log_warn "This will take ~60-90 seconds, but only once."
66
+
67
+ # Start in background to wait for it
68
+ "$SDK_ROOT/start.sh" &
69
+ START_PID=$!
70
+ wait $START_PID
71
+
72
+ # Check if start was successful
73
+ if [ $? -ne 0 ]; then
74
+ log_error "Startup failed."
75
+ exit 1
76
+ fi
77
+
78
+ log_info "Creating 'Golden' Baseline Snapshot for future runs..."
79
+
80
+ # We need to find the container ID to send the adb command
81
+ # We use docker compose to find it reliably
82
+ cd "$SDK_ROOT"
83
+ CONTAINER=$(docker compose ps -q android-service)
84
+
85
+ if [ -z "$CONTAINER" ]; then
86
+ log_error "Error: Android container not found."
87
+ exit 1
88
+ fi
89
+
90
+ # 1. Save snapshot inside emulator
91
+ log_info "Saving emulator state (quickboot)..."
92
+ if docker exec "$CONTAINER" adb emu avd snapshot save quickboot; then
93
+ log_success "Snapshot saved inside emulator."
94
+ else
95
+ log_error "Failed to save snapshot inside emulator."
96
+ exit 1
97
+ fi
98
+
99
+ # 2. Export to host
100
+ log_info "Exporting to cache..."
101
+ if "$SDK_ROOT/scripts/snapshot_manager.sh" export quickboot "$SNAPSHOT_FILE"; then
102
+ log_success "Snapshot exported to $SNAPSHOT_FILE"
103
+ log_success "Setup Complete! Future runs will launch in ~20 seconds."
104
+ else
105
+ log_error "Failed to export snapshot."
106
+ # Don't exit, environment is still running
107
+ fi
108
+ fi
109
+ }
110
+
111
+ cmd_stop() {
112
+ log_info "Stopping environment (using $COMPOSE_FILE)..."
113
+ cd "$SDK_ROOT"
114
+ docker compose -f "$COMPOSE_FILE" stop
115
+ log_success "Stopped."
116
+ }
117
+
118
+ cmd_reset() {
119
+ log_info "Performing Fast Browser Reset..."
120
+ "$SDK_ROOT/scripts/fast_reset.sh"
121
+ }
122
+
123
+ cmd_agent() {
124
+ # Forward commands to the native agent binary or script
125
+ # Assuming 'agent' binary is in $SDK_ROOT/agent or bin/agent-browser
126
+ if [ -f "$SDK_ROOT/agent" ]; then
127
+ "$SDK_ROOT/agent" "$@"
128
+ else
129
+ # Fallback to bin/agent-browser.js if binary not built/linked
130
+ node "$SDK_ROOT/bin/agent-browser.js" "$@"
131
+ fi
132
+ }
133
+
134
+ # Main Dispatch
135
+ case "$1" in
136
+ start)
137
+ cmd_start
138
+ ;;
139
+ stop)
140
+ cmd_stop
141
+ ;;
142
+ reset)
143
+ cmd_reset
144
+ ;;
145
+ agent)
146
+ shift
147
+ cmd_agent "$@"
148
+ ;;
149
+ *)
150
+ echo "Usage: $0 {start|stop|reset|agent}"
151
+ exit 1
152
+ ;;
153
+ esac
@@ -0,0 +1,217 @@
1
+ ---
2
+ name: agent-browser
3
+ description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", or any task requiring programmatic web interaction.
4
+ allowed-tools: Bash(agent-browser:*)
5
+ ---
6
+
7
+ # Browser Automation with agent-browser
8
+
9
+ ## Core Workflow
10
+
11
+ Every browser automation follows this pattern:
12
+
13
+ 1. **Navigate**: `agent-browser open <url>`
14
+ 2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
15
+ 3. **Interact**: Use refs to click, fill, select
16
+ 4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
17
+
18
+ ```bash
19
+ agent-browser open https://example.com/form
20
+ agent-browser snapshot -i
21
+ # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
22
+
23
+ agent-browser fill @e1 "user@example.com"
24
+ agent-browser fill @e2 "password123"
25
+ agent-browser click @e3
26
+ agent-browser wait --load networkidle
27
+ agent-browser snapshot -i # Check result
28
+ ```
29
+
30
+ ## Essential Commands
31
+
32
+ ```bash
33
+ # Navigation
34
+ agent-browser open <url> # Navigate (aliases: goto, navigate)
35
+ agent-browser close # Close browser
36
+
37
+ # Snapshot
38
+ agent-browser snapshot -i # Interactive elements with refs (recommended)
39
+ agent-browser snapshot -i -C # Include cursor-interactive elements (divs with onclick, cursor:pointer)
40
+ agent-browser snapshot -s "#selector" # Scope to CSS selector
41
+
42
+ # Interaction (use @refs from snapshot)
43
+ agent-browser click @e1 # Click element
44
+ agent-browser fill @e2 "text" # Clear and type text
45
+ agent-browser type @e2 "text" # Type without clearing
46
+ agent-browser select @e1 "option" # Select dropdown option
47
+ agent-browser check @e1 # Check checkbox
48
+ agent-browser press Enter # Press key
49
+ agent-browser scroll down 500 # Scroll page
50
+
51
+ # Get information
52
+ agent-browser get text @e1 # Get element text
53
+ agent-browser get url # Get current URL
54
+ agent-browser get title # Get page title
55
+
56
+ # Wait
57
+ agent-browser wait @e1 # Wait for element
58
+ agent-browser wait --load networkidle # Wait for network idle
59
+ agent-browser wait --url "**/page" # Wait for URL pattern
60
+ agent-browser wait 2000 # Wait milliseconds
61
+
62
+ # Capture
63
+ agent-browser screenshot # Screenshot to temp dir
64
+ agent-browser screenshot --full # Full page screenshot
65
+ agent-browser pdf output.pdf # Save as PDF
66
+ ```
67
+
68
+ ## Common Patterns
69
+
70
+ ### Form Submission
71
+
72
+ ```bash
73
+ agent-browser open https://example.com/signup
74
+ agent-browser snapshot -i
75
+ agent-browser fill @e1 "Jane Doe"
76
+ agent-browser fill @e2 "jane@example.com"
77
+ agent-browser select @e3 "California"
78
+ agent-browser check @e4
79
+ agent-browser click @e5
80
+ agent-browser wait --load networkidle
81
+ ```
82
+
83
+ ### Authentication with State Persistence
84
+
85
+ ```bash
86
+ # Login once and save state
87
+ agent-browser open https://app.example.com/login
88
+ agent-browser snapshot -i
89
+ agent-browser fill @e1 "$USERNAME"
90
+ agent-browser fill @e2 "$PASSWORD"
91
+ agent-browser click @e3
92
+ agent-browser wait --url "**/dashboard"
93
+ agent-browser state save auth.json
94
+
95
+ # Reuse in future sessions
96
+ agent-browser state load auth.json
97
+ agent-browser open https://app.example.com/dashboard
98
+ ```
99
+
100
+ ### Data Extraction
101
+
102
+ ```bash
103
+ agent-browser open https://example.com/products
104
+ agent-browser snapshot -i
105
+ agent-browser get text @e5 # Get specific element text
106
+ agent-browser get text body > page.txt # Get all page text
107
+
108
+ # JSON output for parsing
109
+ agent-browser snapshot -i --json
110
+ agent-browser get text @e1 --json
111
+ ```
112
+
113
+ ### Parallel Sessions
114
+
115
+ ```bash
116
+ agent-browser --session site1 open https://site-a.com
117
+ agent-browser --session site2 open https://site-b.com
118
+
119
+ agent-browser --session site1 snapshot -i
120
+ agent-browser --session site2 snapshot -i
121
+
122
+ agent-browser session list
123
+ ```
124
+
125
+ ### Visual Browser (Debugging)
126
+
127
+ ```bash
128
+ agent-browser --headed open https://example.com
129
+ agent-browser highlight @e1 # Highlight element
130
+ agent-browser record start demo.webm # Record session
131
+ ```
132
+
133
+ ### Local Files (PDFs, HTML)
134
+
135
+ ```bash
136
+ # Open local files with file:// URLs
137
+ agent-browser --allow-file-access open file:///path/to/document.pdf
138
+ agent-browser --allow-file-access open file:///path/to/page.html
139
+ agent-browser screenshot output.png
140
+ ```
141
+
142
+ ### iOS Simulator (Mobile Safari)
143
+
144
+ ```bash
145
+ # List available iOS simulators
146
+ agent-browser device list
147
+
148
+ # Launch Safari on a specific device
149
+ agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
150
+
151
+ # Same workflow as desktop - snapshot, interact, re-snapshot
152
+ agent-browser -p ios snapshot -i
153
+ agent-browser -p ios tap @e1 # Tap (alias for click)
154
+ agent-browser -p ios fill @e2 "text"
155
+ agent-browser -p ios swipe up # Mobile-specific gesture
156
+
157
+ # Take screenshot
158
+ agent-browser -p ios screenshot mobile.png
159
+
160
+ # Close session (shuts down simulator)
161
+ agent-browser -p ios close
162
+ ```
163
+
164
+ **Requirements:** macOS with Xcode, Appium (`npm install -g appium && appium driver install xcuitest`)
165
+
166
+ **Real devices:** Works with physical iOS devices if pre-configured. Use `--device "<UDID>"` where UDID is from `xcrun xctrace list devices`.
167
+
168
+ ## Ref Lifecycle (Important)
169
+
170
+ Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after:
171
+
172
+ - Clicking links or buttons that navigate
173
+ - Form submissions
174
+ - Dynamic content loading (dropdowns, modals)
175
+
176
+ ```bash
177
+ agent-browser click @e5 # Navigates to new page
178
+ agent-browser snapshot -i # MUST re-snapshot
179
+ agent-browser click @e1 # Use new refs
180
+ ```
181
+
182
+ ## Semantic Locators (Alternative to Refs)
183
+
184
+ When refs are unavailable or unreliable, use semantic locators:
185
+
186
+ ```bash
187
+ agent-browser find text "Sign In" click
188
+ agent-browser find label "Email" fill "user@test.com"
189
+ agent-browser find role button click --name "Submit"
190
+ agent-browser find placeholder "Search" type "query"
191
+ agent-browser find testid "submit-btn" click
192
+ ```
193
+
194
+ ## Deep-Dive Documentation
195
+
196
+ | Reference | When to Use |
197
+ |-----------|-------------|
198
+ | [references/commands.md](references/commands.md) | Full command reference with all options |
199
+ | [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting |
200
+ | [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
201
+ | [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
202
+ | [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation |
203
+ | [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies |
204
+
205
+ ## Ready-to-Use Templates
206
+
207
+ | Template | Description |
208
+ |----------|-------------|
209
+ | [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation |
210
+ | [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state |
211
+ | [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots |
212
+
213
+ ```bash
214
+ ./templates/form-automation.sh https://example.com/form
215
+ ./templates/authenticated-session.sh https://app.example.com/login
216
+ ./templates/capture-workflow.sh https://example.com ./output
217
+ ```
@@ -0,0 +1,202 @@
1
+ # Authentication Patterns
2
+
3
+ Login flows, session persistence, OAuth, 2FA, and authenticated browsing.
4
+
5
+ **Related**: [session-management.md](session-management.md) for state persistence details, [SKILL.md](../SKILL.md) for quick start.
6
+
7
+ ## Contents
8
+
9
+ - [Basic Login Flow](#basic-login-flow)
10
+ - [Saving Authentication State](#saving-authentication-state)
11
+ - [Restoring Authentication](#restoring-authentication)
12
+ - [OAuth / SSO Flows](#oauth--sso-flows)
13
+ - [Two-Factor Authentication](#two-factor-authentication)
14
+ - [HTTP Basic Auth](#http-basic-auth)
15
+ - [Cookie-Based Auth](#cookie-based-auth)
16
+ - [Token Refresh Handling](#token-refresh-handling)
17
+ - [Security Best Practices](#security-best-practices)
18
+
19
+ ## Basic Login Flow
20
+
21
+ ```bash
22
+ # Navigate to login page
23
+ agent-browser open https://app.example.com/login
24
+ agent-browser wait --load networkidle
25
+
26
+ # Get form elements
27
+ agent-browser snapshot -i
28
+ # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In"
29
+
30
+ # Fill credentials
31
+ agent-browser fill @e1 "user@example.com"
32
+ agent-browser fill @e2 "password123"
33
+
34
+ # Submit
35
+ agent-browser click @e3
36
+ agent-browser wait --load networkidle
37
+
38
+ # Verify login succeeded
39
+ agent-browser get url # Should be dashboard, not login
40
+ ```
41
+
42
+ ## Saving Authentication State
43
+
44
+ After logging in, save state for reuse:
45
+
46
+ ```bash
47
+ # Login first (see above)
48
+ agent-browser open https://app.example.com/login
49
+ agent-browser snapshot -i
50
+ agent-browser fill @e1 "user@example.com"
51
+ agent-browser fill @e2 "password123"
52
+ agent-browser click @e3
53
+ agent-browser wait --url "**/dashboard"
54
+
55
+ # Save authenticated state
56
+ agent-browser state save ./auth-state.json
57
+ ```
58
+
59
+ ## Restoring Authentication
60
+
61
+ Skip login by loading saved state:
62
+
63
+ ```bash
64
+ # Load saved auth state
65
+ agent-browser state load ./auth-state.json
66
+
67
+ # Navigate directly to protected page
68
+ agent-browser open https://app.example.com/dashboard
69
+
70
+ # Verify authenticated
71
+ agent-browser snapshot -i
72
+ ```
73
+
74
+ ## OAuth / SSO Flows
75
+
76
+ For OAuth redirects:
77
+
78
+ ```bash
79
+ # Start OAuth flow
80
+ agent-browser open https://app.example.com/auth/google
81
+
82
+ # Handle redirects automatically
83
+ agent-browser wait --url "**/accounts.google.com**"
84
+ agent-browser snapshot -i
85
+
86
+ # Fill Google credentials
87
+ agent-browser fill @e1 "user@gmail.com"
88
+ agent-browser click @e2 # Next button
89
+ agent-browser wait 2000
90
+ agent-browser snapshot -i
91
+ agent-browser fill @e3 "password"
92
+ agent-browser click @e4 # Sign in
93
+
94
+ # Wait for redirect back
95
+ agent-browser wait --url "**/app.example.com**"
96
+ agent-browser state save ./oauth-state.json
97
+ ```
98
+
99
+ ## Two-Factor Authentication
100
+
101
+ Handle 2FA with manual intervention:
102
+
103
+ ```bash
104
+ # Login with credentials
105
+ agent-browser open https://app.example.com/login --headed # Show browser
106
+ agent-browser snapshot -i
107
+ agent-browser fill @e1 "user@example.com"
108
+ agent-browser fill @e2 "password123"
109
+ agent-browser click @e3
110
+
111
+ # Wait for user to complete 2FA manually
112
+ echo "Complete 2FA in the browser window..."
113
+ agent-browser wait --url "**/dashboard" --timeout 120000
114
+
115
+ # Save state after 2FA
116
+ agent-browser state save ./2fa-state.json
117
+ ```
118
+
119
+ ## HTTP Basic Auth
120
+
121
+ For sites using HTTP Basic Authentication:
122
+
123
+ ```bash
124
+ # Set credentials before navigation
125
+ agent-browser set credentials username password
126
+
127
+ # Navigate to protected resource
128
+ agent-browser open https://protected.example.com/api
129
+ ```
130
+
131
+ ## Cookie-Based Auth
132
+
133
+ Manually set authentication cookies:
134
+
135
+ ```bash
136
+ # Set auth cookie
137
+ agent-browser cookies set session_token "abc123xyz"
138
+
139
+ # Navigate to protected page
140
+ agent-browser open https://app.example.com/dashboard
141
+ ```
142
+
143
+ ## Token Refresh Handling
144
+
145
+ For sessions with expiring tokens:
146
+
147
+ ```bash
148
+ #!/bin/bash
149
+ # Wrapper that handles token refresh
150
+
151
+ STATE_FILE="./auth-state.json"
152
+
153
+ # Try loading existing state
154
+ if [[ -f "$STATE_FILE" ]]; then
155
+ agent-browser state load "$STATE_FILE"
156
+ agent-browser open https://app.example.com/dashboard
157
+
158
+ # Check if session is still valid
159
+ URL=$(agent-browser get url)
160
+ if [[ "$URL" == *"/login"* ]]; then
161
+ echo "Session expired, re-authenticating..."
162
+ # Perform fresh login
163
+ agent-browser snapshot -i
164
+ agent-browser fill @e1 "$USERNAME"
165
+ agent-browser fill @e2 "$PASSWORD"
166
+ agent-browser click @e3
167
+ agent-browser wait --url "**/dashboard"
168
+ agent-browser state save "$STATE_FILE"
169
+ fi
170
+ else
171
+ # First-time login
172
+ agent-browser open https://app.example.com/login
173
+ # ... login flow ...
174
+ fi
175
+ ```
176
+
177
+ ## Security Best Practices
178
+
179
+ 1. **Never commit state files** - They contain session tokens
180
+ ```bash
181
+ echo "*.auth-state.json" >> .gitignore
182
+ ```
183
+
184
+ 2. **Use environment variables for credentials**
185
+ ```bash
186
+ agent-browser fill @e1 "$APP_USERNAME"
187
+ agent-browser fill @e2 "$APP_PASSWORD"
188
+ ```
189
+
190
+ 3. **Clean up after automation**
191
+ ```bash
192
+ agent-browser cookies clear
193
+ rm -f ./auth-state.json
194
+ ```
195
+
196
+ 4. **Use short-lived sessions for CI/CD**
197
+ ```bash
198
+ # Don't persist state in CI
199
+ agent-browser open https://app.example.com/login
200
+ # ... login and perform actions ...
201
+ agent-browser close # Session ends, nothing persisted
202
+ ```