@dyyz1993/agent-browser 0.9.2 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/dist/__tests__/utils/parseCli.d.ts +1 -0
  2. package/dist/__tests__/utils/parseCli.d.ts.map +1 -1
  3. package/dist/__tests__/utils/parseCli.js +18 -10
  4. package/dist/__tests__/utils/parseCli.js.map +1 -1
  5. package/dist/actions.d.ts.map +1 -1
  6. package/dist/actions.js +63 -3
  7. package/dist/actions.js.map +1 -1
  8. package/dist/browser.d.ts +46 -2
  9. package/dist/browser.d.ts.map +1 -1
  10. package/dist/browser.js +343 -13
  11. package/dist/browser.js.map +1 -1
  12. package/dist/cli/commands.d.ts.map +1 -1
  13. package/dist/cli/commands.js +8 -3
  14. package/dist/cli/commands.js.map +1 -1
  15. package/dist/cli/connection.d.ts.map +1 -1
  16. package/dist/cli/connection.js +39 -1
  17. package/dist/cli/connection.js.map +1 -1
  18. package/dist/cli/help.d.ts.map +1 -1
  19. package/dist/cli/help.js +27 -20
  20. package/dist/cli/help.js.map +1 -1
  21. package/dist/cli/output.d.ts.map +1 -1
  22. package/dist/cli/output.js +5 -0
  23. package/dist/cli/output.js.map +1 -1
  24. package/dist/cli.js +20 -0
  25. package/dist/cli.js.map +1 -1
  26. package/dist/daemon.d.ts.map +1 -1
  27. package/dist/daemon.js +147 -1
  28. package/dist/daemon.js.map +1 -1
  29. package/dist/message-bridge.d.ts.map +1 -1
  30. package/dist/message-bridge.js +22 -4
  31. package/dist/message-bridge.js.map +1 -1
  32. package/dist/openapi.d.ts +22 -0
  33. package/dist/openapi.d.ts.map +1 -0
  34. package/dist/openapi.js +382 -0
  35. package/dist/openapi.js.map +1 -0
  36. package/dist/protocol.d.ts.map +1 -1
  37. package/dist/protocol.js +18 -0
  38. package/dist/protocol.js.map +1 -1
  39. package/dist/recorder/inject.js +61 -134
  40. package/dist/stream-server-standalone.d.ts +10 -0
  41. package/dist/stream-server-standalone.d.ts.map +1 -1
  42. package/dist/stream-server-standalone.js +594 -74
  43. package/dist/stream-server-standalone.js.map +1 -1
  44. package/dist/stream-server.d.ts +67 -2
  45. package/dist/stream-server.d.ts.map +1 -1
  46. package/dist/stream-server.js +371 -51
  47. package/dist/stream-server.js.map +1 -1
  48. package/dist/swagger-ui.d.ts +6 -0
  49. package/dist/swagger-ui.d.ts.map +1 -0
  50. package/dist/swagger-ui.js +51 -0
  51. package/dist/swagger-ui.js.map +1 -0
  52. package/dist/test-live.d.ts +2 -0
  53. package/dist/test-live.d.ts.map +1 -0
  54. package/dist/test-live.js +333 -0
  55. package/dist/test-live.js.map +1 -0
  56. package/dist/types.d.ts +7 -1
  57. package/dist/types.d.ts.map +1 -1
  58. package/dist/types.js.map +1 -1
  59. package/dist/viewer-html.d.ts.map +1 -1
  60. package/dist/viewer-html.js +270 -58
  61. package/dist/viewer-html.js.map +1 -1
  62. package/dist/viewer-script.d.ts +20 -2
  63. package/dist/viewer-script.d.ts.map +1 -1
  64. package/dist/viewer-script.js +911 -154
  65. package/dist/viewer-script.js.map +1 -1
  66. package/package.json +1 -1
  67. package/scripts/postinstall.js +6 -32
  68. package/scripts/test-cli-help.sh +51 -0
  69. package/scripts/verify-form.sh +67 -0
  70. package/scripts/verify-login.sh +65 -0
  71. package/scripts/verify-recording.sh +80 -0
  72. package/scripts/verify-upload.sh +41 -0
  73. package/skills/agent-browser/SKILL.md +297 -160
  74. package/skills/agent-browser/references/commands.md +3 -0
  75. package/skills/agent-browser/references/mobile-viewer.md +188 -0
  76. package/skills/agent-browser/references/network-monitoring.md +232 -0
  77. package/skills/agent-browser/references/recorder.md +319 -0
  78. package/skills/agent-browser/references/viewer-mode.md +148 -0
  79. package/skills/agent-browser/templates/api-interception.sh +3 -1
  80. package/skills/agent-browser/templates/data-extraction.sh +8 -4
  81. package/skills/agent-browser/templates/form-automation.sh +18 -23
  82. package/skills/agent-browser/templates/network-intercept-crawl.sh +256 -0
  83. package/skills/agent-browser/templates/recorder-workflow.sh +51 -0
  84. package/skills/agent-browser/templates/viewer-remote.sh +41 -0
  85. package/dist/__tests__/test-iframe.d.ts +0 -2
  86. package/dist/__tests__/test-iframe.d.ts.map +0 -1
  87. package/dist/__tests__/test-iframe.js +0 -52
  88. package/dist/__tests__/test-iframe.js.map +0 -1
  89. package/dist/cli-new.d.ts +0 -3
  90. package/dist/cli-new.d.ts.map +0 -1
  91. package/dist/cli-new.js +0 -308
  92. package/dist/cli-new.js.map +0 -1
  93. package/dist/cli-old.d.ts +0 -3
  94. package/dist/cli-old.d.ts.map +0 -1
  95. package/dist/cli-old.js +0 -1101
  96. package/dist/cli-old.js.map +0 -1
  97. package/dist/recorder/binding.d.ts +0 -24
  98. package/dist/recorder/binding.d.ts.map +0 -1
  99. package/dist/recorder/binding.js +0 -215
  100. package/dist/recorder/binding.js.map +0 -1
  101. package/dist/recorder/index.d.ts +0 -4
  102. package/dist/recorder/index.d.ts.map +0 -1
  103. package/dist/recorder/index.js +0 -4
  104. package/dist/recorder/index.js.map +0 -1
  105. package/dist/recorder/recorder.d.ts +0 -19
  106. package/dist/recorder/recorder.d.ts.map +0 -1
  107. package/dist/recorder/recorder.js +0 -101
  108. package/dist/recorder/recorder.js.map +0 -1
  109. package/dist/recorder/store.d.ts +0 -22
  110. package/dist/recorder/store.d.ts.map +0 -1
  111. package/dist/recorder/store.js +0 -150
  112. package/dist/recorder/store.js.map +0 -1
  113. package/dist/recorder/types.d.ts +0 -73
  114. package/dist/recorder/types.d.ts.map +0 -1
  115. package/dist/recorder/types.js +0 -5
  116. package/dist/recorder/types.js.map +0 -1
@@ -0,0 +1,319 @@
1
+ # Recorder (Action Recording & Replay)
2
+
3
+ Record user interactions as structured steps that can be replayed or exported for LLM processing.
4
+
5
+ **Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start.
6
+
7
+ ## Contents
8
+
9
+ - [Basic Recording](#basic-recording)
10
+ - [Recording Workflow](#recording-workflow)
11
+ - [Supported Actions](#supported-actions)
12
+ - [YAML Output Format](#yaml-output-format)
13
+ - [Replay Feature](#replay-feature)
14
+ - [Use Cases](#use-cases)
15
+ - [Best Practices](#best-practices)
16
+
17
+ ## Basic Recording
18
+
19
+ ```bash
20
+ # Start recording session
21
+ agent-browser recorder start
22
+
23
+ # Perform actions
24
+ agent-browser open https://example.com
25
+ agent-browser snapshot -i
26
+ agent-browser click @e1
27
+ agent-browser fill @e2 "test input"
28
+ agent-browser select @e3 "option"
29
+
30
+ # Stop recording and save to file
31
+ agent-browser recorder stop --output session.yaml
32
+ ```
33
+
34
+ ## Recording Workflow
35
+
36
+ The recorder captures all browser interactions including:
37
+
38
+ 1. **Navigation**: Page loads and URL changes
39
+ 2. **Input**: Text entry in form fields
40
+ 3. **Selection**: Dropdown choices
41
+ 4. **Clicks**: Button and link clicks
42
+ 5. **Scrolling**: Page scroll events
43
+ 6. **Mouse Movement**: Trajectory data for human-like behavior
44
+
45
+ ```bash
46
+ # Example: Complete form submission workflow
47
+ agent-browser recorder start --session form-test
48
+
49
+ # Navigate to form
50
+ agent-browser open https://example.com/form
51
+ agent-browser snapshot -i
52
+
53
+ # Fill form fields
54
+ agent-browser fill @e1 "John Doe"
55
+ agent-browser fill @e2 "john@example.com"
56
+ agent-browser select @e3 "United States"
57
+ agent-browser check @e4
58
+
59
+ # Submit form
60
+ agent-browser click @e5
61
+ agent-browser wait --load networkidle
62
+
63
+ # Save recording
64
+ agent-browser recorder stop --output form-submission.yaml
65
+ ```
66
+
67
+ ## Supported Actions
68
+
69
+ | Action | Description | Example |
70
+ |--------|-------------|---------|
71
+ | `navigate` | Page navigation | `agent-browser open https://example.com` |
72
+ | `fill` | Text input | `agent-browser fill @e1 "text"` |
73
+ | `select` | Dropdown selection | `agent-browser select @e2 "option"` |
74
+ | `click` | Element click | `agent-browser click @e3` |
75
+ | `check` | Checkbox check | `agent-browser check @e4` |
76
+ | `uncheck` | Checkbox uncheck | `agent-browser uncheck @e5` |
77
+ | `scroll` | Page scroll | `agent-browser scroll down 500` |
78
+ | `trajectory` | Mouse movement | Captured automatically |
79
+
80
+ ## YAML Output Format
81
+
82
+ The recorder generates a structured YAML file with:
83
+
84
+ ```yaml
85
+ session:
86
+ id: recorder-1234567890
87
+ startTime: 19:46:49
88
+ endTime: 19:48:28
89
+ steps: 83
90
+
91
+ pages:
92
+ - url: https://example.com
93
+ title: Example Domain
94
+ firstVisitTime: 19:46:53
95
+
96
+ steps:
97
+ - id: step-1234567890
98
+ time: 19:47:00
99
+ action: fill
100
+ selector: "#username"
101
+ xpath: "//*[@id='username']"
102
+ value: "testuser"
103
+
104
+ - id: step-1234567891
105
+ time: 19:47:05
106
+ action: click
107
+ selector: "#submit-btn"
108
+ xpath: "//*[@id='submit-btn']"
109
+
110
+ - id: step-1234567892
111
+ time: 19:47:10
112
+ action: scroll
113
+ x: 0
114
+ y: 500
115
+
116
+ # CLI Commands section contains executable commands
117
+ # for direct replay in terminal
118
+ ```
119
+
120
+ ## Replay Feature
121
+
122
+ Replay recorded interactions from YAML file:
123
+
124
+ ```bash
125
+ # Replay most recent recording
126
+ agent-browser recorder replay
127
+
128
+ # Replay specific file
129
+ agent-browser recorder replay form-submission.yaml
130
+
131
+ # Replay with verbose output
132
+ agent-browser recorder replay session.yaml --verbose
133
+ ```
134
+
135
+ The replay feature:
136
+ 1. Parses the YAML file
137
+ 2. Executes each step in sequence
138
+ 3. Handles both navigation and interactions
139
+ 4. Supports all recorded action types
140
+
141
+ ## Use Cases
142
+
143
+ ### 1. Test Automation Documentation
144
+
145
+ Record manual test sessions for documentation:
146
+
147
+ ```bash
148
+ agent-browser recorder start --session checkout-flow
149
+
150
+ # Perform checkout process
151
+ agent-browser open https://shop.example.com/cart
152
+ agent-browser snapshot -i
153
+ agent-browser fill @e1 "123 Main St"
154
+ agent-browser fill @e2 "New York"
155
+ agent-browser fill @e3 "10001"
156
+ agent-browser click @e4
157
+ agent-browser wait --load networkidle
158
+
159
+ # Save for documentation
160
+ agent-browser recorder stop --output docs/checkout-flow.yaml
161
+ ```
162
+
163
+ ### 2. Regression Testing
164
+
165
+ Create reusable test scenarios:
166
+
167
+ ```bash
168
+ # Record once
169
+ agent-browser recorder start
170
+ agent-browser open https://app.example.com/login
171
+ agent-browser snapshot -i
172
+ agent-browser fill @e1 "$USERNAME"
173
+ agent-browser fill @e2 "$PASSWORD"
174
+ agent-browser click @e3
175
+ agent-browser recorder stop --output tests/login.yaml
176
+
177
+ # Replay in CI/CD
178
+ agent-browser recorder replay tests/login.yaml
179
+ ```
180
+
181
+ ### 3. Workflow Automation
182
+
183
+ Capture complex workflows for automation:
184
+
185
+ ```bash
186
+ # Record multi-step workflow
187
+ agent-browser recorder start
188
+ agent-browser open https://dashboard.example.com
189
+ agent-browser snapshot -i
190
+ agent-browser click @e1 # Navigate to reports
191
+ agent-browser click @e2 # Select date range
192
+ agent-browser click @e3 # Export CSV
193
+ agent-browser recorder stop --output workflows/export-data.yaml
194
+ ```
195
+
196
+ ### 4. Debugging & Analysis
197
+
198
+ Record sessions for debugging:
199
+
200
+ ```bash
201
+ agent-browser recorder start --session debug-$(date +%s)
202
+
203
+ # Run problematic workflow
204
+ agent-browser open https://example.com
205
+ # ... interactions ...
206
+
207
+ # Save for analysis
208
+ agent-browser recorder stop --output debug/session.yaml
209
+ ```
210
+
211
+ ## Best Practices
212
+
213
+ ### 1. Use Session Names
214
+
215
+ ```bash
216
+ # Good: Descriptive session names
217
+ agent-browser recorder start --session user-registration
218
+ agent-browser recorder start --session checkout-payment
219
+ agent-browser recorder start --session search-functionality
220
+
221
+ # Avoid: Generic names
222
+ agent-browser recorder start --session test1
223
+ agent-browser recorder start --session recording
224
+ ```
225
+
226
+ ### 2. Add Wait Times for Stability
227
+
228
+ ```bash
229
+ agent-browser recorder start
230
+
231
+ # Add waits after critical actions
232
+ agent-browser click @e1
233
+ agent-browser wait --load networkidle # Wait for page load
234
+
235
+ agent-browser fill @e2 "text"
236
+ agent-browser wait 1000 # Wait for dynamic content
237
+
238
+ agent-browser click @e3
239
+ ```
240
+
241
+ ### 3. Use Snapshots for Ref Stability
242
+
243
+ ```bash
244
+ agent-browser recorder start
245
+
246
+ # Always snapshot before interactions
247
+ agent-browser snapshot -i
248
+ agent-browser click @e1
249
+
250
+ # Re-snapshot after navigation
251
+ agent-browser wait --load networkidle
252
+ agent-browser snapshot -i
253
+ agent-browser click @e2
254
+ ```
255
+
256
+ ### 4. Organize Recordings
257
+
258
+ ```bash
259
+ # Create organized directory structure
260
+ recordings/
261
+ ├── tests/
262
+ │ ├── login.yaml
263
+ │ ├── registration.yaml
264
+ │ └── checkout.yaml
265
+ ├── workflows/
266
+ │ ├── data-export.yaml
267
+ │ └── report-generation.yaml
268
+ └── docs/
269
+ ├── user-guide.yaml
270
+ └── api-demo.yaml
271
+ ```
272
+
273
+ ### 5. Review Generated Commands
274
+
275
+ The YAML file includes a CLI Commands section at the end with executable commands. Review these commands to:
276
+
277
+ - Verify the captured selectors
278
+ - Check for redundant steps
279
+ - Identify opportunities for optimization
280
+ - Ensure actions are in correct order
281
+
282
+ ## Advanced Features
283
+
284
+ ### Session-Based Recording
285
+
286
+ ```bash
287
+ # Record with specific session
288
+ agent-browser recorder start --session my-test --timeout 60000
289
+
290
+ # Use session for all commands
291
+ agent-browser open https://example.com --session my-test
292
+ agent-browser snapshot -i --session my-test
293
+ agent-browser click @e1 --session my-test
294
+
295
+ # Stop recording
296
+ agent-browser recorder stop --output my-test.yaml --session my-test
297
+ ```
298
+
299
+ ### Timeout Configuration
300
+
301
+ ```bash
302
+ # Set recording timeout (default: 60 seconds)
303
+ agent-browser recorder start --timeout 120000 # 2 minutes
304
+
305
+ # Useful for long-running workflows
306
+ ```
307
+
308
+ ## Limitations
309
+
310
+ - Refs (`@e1`, `@e2`) are session-specific and not portable
311
+ - Convert to CSS selectors for cross-session reuse
312
+ - Some dynamic content may require additional wait handling
313
+ - Replay requires same page structure as recording
314
+
315
+ ## See Also
316
+
317
+ - [snapshot-refs.md](snapshot-refs.md) - Understanding refs and their lifecycle
318
+ - [authentication.md](authentication.md) - Recording login flows
319
+ - [video-recording.md](video-recording.md) - Video recording for debugging
@@ -0,0 +1,148 @@
1
+ # Viewer / Streaming Mode
2
+
3
+ ## Overview
4
+
5
+ The viewer mode provides a **real-time visual remote browser interface**. It streams browser frames (JPEG/WebP) over WebSocket and forwards user input (mouse, keyboard, touch) back to the daemon. This enables:
6
+
7
+ - **Remote debugging** — see what the browser sees in real time
8
+ - **Mobile device control** — operate a desktop browser from your phone
9
+ - **Presentation/demo** — show browser activity to an audience
10
+ - **Collaboration** — share a browser session with others
11
+
12
+ ## Starting the Viewer
13
+
14
+ ```bash
15
+ # Prerequisite: have a browser session running
16
+ agent-browser open https://example.com
17
+
18
+ # Start viewer (opens URL in default browser)
19
+ agent-browser viewer
20
+
21
+ # Get connection details as JSON (for scripting/embedding)
22
+ agent-browser viewer --json
23
+ # Output: {"url":"http://localhost:5005/view?session=default","ws":"ws://...","port":5005}
24
+ ```
25
+
26
+ ## Viewer URL Parameters
27
+
28
+ | Parameter | Description |
29
+ | ------------------ | -------------------------------------- |
30
+ | `?session=<id>` | Connect to a specific named session |
31
+ | `?instanceId=<id>` | Connect to a specific browser instance |
32
+
33
+ ## Architecture
34
+
35
+ ```
36
+ ┌─────────────┐ IPC ┌───────────────────┐ WebSocket ┌──────────┐
37
+ │ Browser │ ───────→ │ Daemon Process │ ←────────────→ │ Viewer │
38
+ │ (Playwright) │ │ (:5000 socket) │ │ (Browser) │
39
+ └─────────────┘ └────────┬─────────┘ └──────────┘
40
+
41
+ standalone HTTP+WS server (:5005)
42
+ serves viewer.html + proxies messages
43
+ ```
44
+
45
+ **Data flow:**
46
+
47
+ 1. **Frames**: Browser -> Daemon -> Standalone Server -> Viewer (binary JPEG/WebP via WS)
48
+ 2. **Input**: Viewer -> Standalone Server -> Daemon -> Browser (JSON messages)
49
+
50
+ ## Viewer Page Features
51
+
52
+ ### Desktop Mode (PC/Mac)
53
+
54
+ | Feature | Description |
55
+ | ------------ | ------------------------------------------------------------------- |
56
+ | Screen area | Shows streamed frame, click/drag/scroll sends input to remote |
57
+ | Toolbar | URL bar, connection status, quality badge, record button |
58
+ | Hidden input | Invisible capture field for keyboard events (auto-focused on click) |
59
+ | Cursor | Red dot showing remote mouse position |
60
+
61
+ ### Mobile Mode (Touch Device)
62
+
63
+ Automatically activates on touch devices. See [mobile-viewer.md](mobile-viewer.md) for full details.
64
+
65
+ | Feature | Description |
66
+ | ---------------- | --------------------------------------------------- |
67
+ | Touchpad | Bottom gesture area for cursor simulation |
68
+ | Input Panel | Text input popup when tapping remote input fields |
69
+ | Keyboard toolbar | Virtual keys: Tab, Arrows, Enter, Backspace, Escape |
70
+ | IME support | Chinese/Japanese composition (pinyin, kana, etc.) |
71
+
72
+ ## Element Selector / Crop Mode
73
+
74
+ Crop the video stream to a specific DOM element's bounds:
75
+
76
+ ```bash
77
+ # Via viewer UI: click element selector button, then click target element
78
+ # The stream is cropped to that element's rectangle
79
+ ```
80
+
81
+ When element mode is active:
82
+
83
+ - Server crops frames to element bounds using Sharp
84
+ - Mouse coordinates auto-map to element-local space
85
+ - Falls back to "degraded mode" (full page) if element not found or disappears
86
+ - `deviceWidth`/`deviceHeight` in metadata reflect element dimensions
87
+
88
+ Use cases:
89
+
90
+ - Focus testing on a specific component
91
+ - Recording interactions within a widget
92
+ - Bandwidth savings (only stream the element, not full page)
93
+
94
+ ## Message Types (Viewer <-> Server)
95
+
96
+ ### Server → Viewer (over WebSocket)
97
+
98
+ | Type | Purpose |
99
+ | --------------- | ------------------------------------------------------------------ |
100
+ | `frame` | Binary frame data with metadata (dimensions, format, element info) |
101
+ | `status` | Connection status, viewport changes |
102
+ | `navigation` | URL/title changes |
103
+ | `input_focused` | Remote element received focus → triggers input panel (mobile) |
104
+ | `input_value` | Remote input value changed |
105
+ | `input_blur` | Remote element lost focus |
106
+
107
+ ### Viewer → Server (over WebSocket)
108
+
109
+ | Type | Purpose |
110
+ | ---------------------- | --------------------------------------------------- |
111
+ | `input_mouse` | Mouse move/press/release/wheel |
112
+ | `input_keyboard` | Key down/up with modifiers |
113
+ | `input_fill` | Full text value sync (mobile input panel) |
114
+ | `input_blur_element` | Blur remote element (mobile input commit) |
115
+ | `keyboard_insert_text` | Character-by-character insert (desktop hiddenInput) |
116
+ | `user_activity` | Keep-alive signal (resumes streaming if paused) |
117
+ | `selector_element` | Request crop to specific element |
118
+
119
+ ## Troubleshooting
120
+
121
+ ### Black screen
122
+
123
+ - Check daemon is running: `agent-browser status`
124
+ - Verify browser launched: `agent-browser open https://example.com` should work first
125
+
126
+ ### Connection refused
127
+
128
+ - The viewer command auto-starts the standalone server on port 5005
129
+ - If port conflicts, check: `lsof -i :5005`
130
+ - Kill stale process: `kill $(lsof -t -i :5005)`
131
+
132
+ ### Laggy updates
133
+
134
+ - Frame compression is JPEG by default (adjustable)
135
+ - Quality badge shows current state: "interacting" / "static" / "compressed"
136
+ - Network latency between viewer and server affects frame rate
137
+
138
+ ### Element not found (degraded mode)
139
+
140
+ - Yellow toast appears: "Element not found, showing full page"
141
+ - Element may have been removed by SPA navigation or animation
142
+ - Re-select the element or exit selector mode
143
+
144
+ ### Viewer shows but no frame
145
+
146
+ - Check daemon log: `~/.agent-browser/default.log`
147
+ - Look for "Browser not launched" errors
148
+ - Ensure `agent-browser open` was called before `agent-browser viewer`
@@ -1,5 +1,6 @@
1
1
  #!/bin/bash
2
2
  # API Interception Template - Passively capture API responses
3
+ set -euo pipefail
3
4
  # Usage: ./api-interception.sh [target_url] [output_file]
4
5
 
5
6
  TARGET_URL="${1:-https://example.com/user/profile}"
@@ -12,7 +13,8 @@ sleep 1
12
13
 
13
14
  echo ""
14
15
  echo "=== 2. Open blank page ==="
15
- export https_proxy=http://127.0.0.1:7890
16
+ # Optional: set PROXY_URL if using a proxy
17
+ export https_proxy=${PROXY_URL:-}
16
18
  agent-browser open "about:blank"
17
19
  sleep 1
18
20
 
@@ -34,7 +34,8 @@ case "$MODE" in
34
34
  api)
35
35
  echo ""
36
36
  echo "=== 2. API Interception Mode ==="
37
- export https_proxy=http://127.0.0.1:7890
37
+ # Optional: set PROXY_URL if using a proxy
38
+ export https_proxy=${PROXY_URL:-}
38
39
  agent-browser open "about:blank"
39
40
  sleep 1
40
41
 
@@ -64,7 +65,8 @@ case "$MODE" in
64
65
  scroll)
65
66
  echo ""
66
67
  echo "=== 2. Infinite Scroll Mode ==="
67
- export https_proxy=http://127.0.0.1:7890
68
+ # Optional: set PROXY_URL if using a proxy
69
+ export https_proxy=${PROXY_URL:-}
68
70
  agent-browser open "$TARGET_URL"
69
71
  sleep 2
70
72
 
@@ -125,7 +127,8 @@ print(json.dumps(unique, ensure_ascii=False))
125
127
  js)
126
128
  echo ""
127
129
  echo "=== 2. JS Variable Extraction Mode ==="
128
- export https_proxy=http://127.0.0.1:7890
130
+ # Optional: set PROXY_URL if using a proxy
131
+ export https_proxy=${PROXY_URL:-}
129
132
  agent-browser open "$TARGET_URL"
130
133
  sleep 3
131
134
 
@@ -153,7 +156,8 @@ print(json.dumps(unique, ensure_ascii=False))
153
156
  dom|*)
154
157
  echo ""
155
158
  echo "=== 2. DOM Extraction Mode ==="
156
- export https_proxy=http://127.0.0.1:7890
159
+ # Optional: set PROXY_URL if using a proxy
160
+ export https_proxy=${PROXY_URL:-}
157
161
  agent-browser open "$TARGET_URL"
158
162
  sleep 2
159
163
 
@@ -1,32 +1,27 @@
1
- #!/bin/bash
1
+ #!/usr/bin/env bash
2
2
  # Template: Form Automation Workflow
3
3
  # Purpose: Fill and submit web forms with validation
4
4
  # Usage: ./form-automation.sh <form-url>
5
5
  #
6
- # This template demonstrates the snapshot-interact-verify pattern:
7
- # 1. Navigate to form
8
- # 2. Snapshot to get element refs
9
- # 3. Fill fields using refs
10
- # 4. Submit and verify result
11
- #
12
- # Customize: Update the refs (@e1, @e2, etc.) based on your form's snapshot output
6
+ # Demonstrates: snapshot -> interact -> verify pattern
13
7
 
14
8
  set -euo pipefail
15
9
 
16
10
  FORM_URL="${1:?Usage: $0 <form-url>}"
11
+ SESSION="form-$(date +%s)"
17
12
 
18
- echo "Form automation: $FORM_URL"
13
+ echo "=== Form Automation: $FORM_URL ==="
19
14
 
20
15
  # Step 1: Navigate to form
21
- agent-browser open "$FORM_URL"
22
- agent-browser wait --load networkidle
16
+ agent-browser --session "$SESSION" open "$FORM_URL"
17
+ agent-browser --session "$SESSION" wait --load networkidle
23
18
 
24
19
  # Step 2: Snapshot to discover form elements
25
20
  echo ""
26
21
  echo "Form structure:"
27
- agent-browser snapshot -i
22
+ agent-browser --session "$SESSION" snapshot -i
28
23
 
29
- # Step 3: Fill form fields (customize these refs based on snapshot output)
24
+ # Step 3: Fill form fields (customize refs based on snapshot output above)
30
25
  #
31
26
  # Common field types:
32
27
  # agent-browser fill @e1 "John Doe" # Text input
@@ -34,27 +29,27 @@ agent-browser snapshot -i
34
29
  # agent-browser fill @e3 "SecureP@ss123" # Password input
35
30
  # agent-browser select @e4 "Option Value" # Dropdown
36
31
  # agent-browser check @e5 # Checkbox
37
- # agent-browser click @e6 # Radio button
32
+ # agent-browser click @e6 # Radio button / Submit button
38
33
  # agent-browser fill @e7 "Multi-line text" # Textarea
39
34
  # agent-browser upload @e8 /path/to/file.pdf # File upload
40
35
  #
41
36
  # Uncomment and modify:
42
- # agent-browser fill @e1 "Test User"
43
- # agent-browser fill @e2 "test@example.com"
44
- # agent-browser click @e3 # Submit button
37
+ # agent-browser --session "$SESSION" fill @e1 "Test User"
38
+ # agent-browser --session "$SESSION" fill @e2 "test@example.com"
39
+ # agent-browser --session "$SESSION" click @e3 # Submit button
45
40
 
46
- # Step 4: Wait for submission
47
- # agent-browser wait --load networkidle
48
- # agent-browser wait --url "**/success" # Or wait for redirect
41
+ # Step 4: Wait for submission to complete
42
+ agent-browser --session "$SESSION" wait --load networkidle
43
+ # agent-browser --session "$SESSION" wait --url "**/success" # Or wait for redirect
49
44
 
50
45
  # Step 5: Verify result
51
46
  echo ""
52
47
  echo "Result:"
53
- agent-browser get url
54
- agent-browser snapshot -i
48
+ agent-browser --session "$SESSION" get url
49
+ agent-browser --session "$SESSION" snapshot -i
55
50
 
56
51
  # Optional: Capture evidence
57
- agent-browser screenshot /tmp/form-result.png
52
+ agent-browser --session "$SESSION" screenshot /tmp/form-result.png
58
53
  echo "Screenshot saved: /tmp/form-result.png"
59
54
 
60
55
  # Cleanup