@dyyz1993/agent-browser 0.9.2 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/utils/parseCli.d.ts +1 -0
- package/dist/__tests__/utils/parseCli.d.ts.map +1 -1
- package/dist/__tests__/utils/parseCli.js +18 -10
- package/dist/__tests__/utils/parseCli.js.map +1 -1
- package/dist/actions.d.ts.map +1 -1
- package/dist/actions.js +63 -3
- package/dist/actions.js.map +1 -1
- package/dist/browser.d.ts +46 -2
- package/dist/browser.d.ts.map +1 -1
- package/dist/browser.js +343 -13
- package/dist/browser.js.map +1 -1
- package/dist/cli/commands.d.ts.map +1 -1
- package/dist/cli/commands.js +8 -3
- package/dist/cli/commands.js.map +1 -1
- package/dist/cli/connection.d.ts.map +1 -1
- package/dist/cli/connection.js +39 -1
- package/dist/cli/connection.js.map +1 -1
- package/dist/cli/help.d.ts.map +1 -1
- package/dist/cli/help.js +27 -20
- package/dist/cli/help.js.map +1 -1
- package/dist/cli/output.d.ts.map +1 -1
- package/dist/cli/output.js +5 -0
- package/dist/cli/output.js.map +1 -1
- package/dist/cli.js +20 -0
- package/dist/cli.js.map +1 -1
- package/dist/daemon.d.ts.map +1 -1
- package/dist/daemon.js +147 -1
- package/dist/daemon.js.map +1 -1
- package/dist/message-bridge.d.ts.map +1 -1
- package/dist/message-bridge.js +22 -4
- package/dist/message-bridge.js.map +1 -1
- package/dist/openapi.d.ts +22 -0
- package/dist/openapi.d.ts.map +1 -0
- package/dist/openapi.js +382 -0
- package/dist/openapi.js.map +1 -0
- package/dist/protocol.d.ts.map +1 -1
- package/dist/protocol.js +18 -0
- package/dist/protocol.js.map +1 -1
- package/dist/recorder/inject.js +61 -134
- package/dist/stream-server-standalone.d.ts +10 -0
- package/dist/stream-server-standalone.d.ts.map +1 -1
- package/dist/stream-server-standalone.js +594 -74
- package/dist/stream-server-standalone.js.map +1 -1
- package/dist/stream-server.d.ts +67 -2
- package/dist/stream-server.d.ts.map +1 -1
- package/dist/stream-server.js +371 -51
- package/dist/stream-server.js.map +1 -1
- package/dist/swagger-ui.d.ts +6 -0
- package/dist/swagger-ui.d.ts.map +1 -0
- package/dist/swagger-ui.js +51 -0
- package/dist/swagger-ui.js.map +1 -0
- package/dist/test-live.d.ts +2 -0
- package/dist/test-live.d.ts.map +1 -0
- package/dist/test-live.js +333 -0
- package/dist/test-live.js.map +1 -0
- package/dist/types.d.ts +7 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/viewer-html.d.ts.map +1 -1
- package/dist/viewer-html.js +270 -58
- package/dist/viewer-html.js.map +1 -1
- package/dist/viewer-script.d.ts +20 -2
- package/dist/viewer-script.d.ts.map +1 -1
- package/dist/viewer-script.js +911 -154
- package/dist/viewer-script.js.map +1 -1
- package/package.json +1 -1
- package/scripts/postinstall.js +6 -32
- package/scripts/test-cli-help.sh +51 -0
- package/scripts/verify-form.sh +67 -0
- package/scripts/verify-login.sh +65 -0
- package/scripts/verify-recording.sh +80 -0
- package/scripts/verify-upload.sh +41 -0
- package/skills/agent-browser/SKILL.md +297 -160
- package/skills/agent-browser/references/commands.md +3 -0
- package/skills/agent-browser/references/mobile-viewer.md +188 -0
- package/skills/agent-browser/references/network-monitoring.md +232 -0
- package/skills/agent-browser/references/recorder.md +319 -0
- package/skills/agent-browser/references/viewer-mode.md +148 -0
- package/skills/agent-browser/templates/api-interception.sh +3 -1
- package/skills/agent-browser/templates/data-extraction.sh +8 -4
- package/skills/agent-browser/templates/form-automation.sh +18 -23
- package/skills/agent-browser/templates/network-intercept-crawl.sh +256 -0
- package/skills/agent-browser/templates/recorder-workflow.sh +51 -0
- package/skills/agent-browser/templates/viewer-remote.sh +41 -0
- package/dist/__tests__/test-iframe.d.ts +0 -2
- package/dist/__tests__/test-iframe.d.ts.map +0 -1
- package/dist/__tests__/test-iframe.js +0 -52
- package/dist/__tests__/test-iframe.js.map +0 -1
- package/dist/cli-new.d.ts +0 -3
- package/dist/cli-new.d.ts.map +0 -1
- package/dist/cli-new.js +0 -308
- package/dist/cli-new.js.map +0 -1
- package/dist/cli-old.d.ts +0 -3
- package/dist/cli-old.d.ts.map +0 -1
- package/dist/cli-old.js +0 -1101
- package/dist/cli-old.js.map +0 -1
- package/dist/recorder/binding.d.ts +0 -24
- package/dist/recorder/binding.d.ts.map +0 -1
- package/dist/recorder/binding.js +0 -215
- package/dist/recorder/binding.js.map +0 -1
- package/dist/recorder/index.d.ts +0 -4
- package/dist/recorder/index.d.ts.map +0 -1
- package/dist/recorder/index.js +0 -4
- package/dist/recorder/index.js.map +0 -1
- package/dist/recorder/recorder.d.ts +0 -19
- package/dist/recorder/recorder.d.ts.map +0 -1
- package/dist/recorder/recorder.js +0 -101
- package/dist/recorder/recorder.js.map +0 -1
- package/dist/recorder/store.d.ts +0 -22
- package/dist/recorder/store.d.ts.map +0 -1
- package/dist/recorder/store.js +0 -150
- package/dist/recorder/store.js.map +0 -1
- package/dist/recorder/types.d.ts +0 -73
- package/dist/recorder/types.d.ts.map +0 -1
- package/dist/recorder/types.js +0 -5
- package/dist/recorder/types.js.map +0 -1
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
# Recorder (Action Recording & Replay)
|
|
2
|
+
|
|
3
|
+
Record user interactions as structured steps that can be replayed or exported for LLM processing.
|
|
4
|
+
|
|
5
|
+
**Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start.
|
|
6
|
+
|
|
7
|
+
## Contents
|
|
8
|
+
|
|
9
|
+
- [Basic Recording](#basic-recording)
|
|
10
|
+
- [Recording Workflow](#recording-workflow)
|
|
11
|
+
- [Supported Actions](#supported-actions)
|
|
12
|
+
- [YAML Output Format](#yaml-output-format)
|
|
13
|
+
- [Replay Feature](#replay-feature)
|
|
14
|
+
- [Use Cases](#use-cases)
|
|
15
|
+
- [Best Practices](#best-practices)
|
|
16
|
+
|
|
17
|
+
## Basic Recording
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Start recording session
|
|
21
|
+
agent-browser recorder start
|
|
22
|
+
|
|
23
|
+
# Perform actions
|
|
24
|
+
agent-browser open https://example.com
|
|
25
|
+
agent-browser snapshot -i
|
|
26
|
+
agent-browser click @e1
|
|
27
|
+
agent-browser fill @e2 "test input"
|
|
28
|
+
agent-browser select @e3 "option"
|
|
29
|
+
|
|
30
|
+
# Stop recording and save to file
|
|
31
|
+
agent-browser recorder stop --output session.yaml
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Recording Workflow
|
|
35
|
+
|
|
36
|
+
The recorder captures all browser interactions including:
|
|
37
|
+
|
|
38
|
+
1. **Navigation**: Page loads and URL changes
|
|
39
|
+
2. **Input**: Text entry in form fields
|
|
40
|
+
3. **Selection**: Dropdown choices
|
|
41
|
+
4. **Clicks**: Button and link clicks
|
|
42
|
+
5. **Scrolling**: Page scroll events
|
|
43
|
+
6. **Mouse Movement**: Trajectory data for human-like behavior
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
# Example: Complete form submission workflow
|
|
47
|
+
agent-browser recorder start --session form-test
|
|
48
|
+
|
|
49
|
+
# Navigate to form
|
|
50
|
+
agent-browser open https://example.com/form
|
|
51
|
+
agent-browser snapshot -i
|
|
52
|
+
|
|
53
|
+
# Fill form fields
|
|
54
|
+
agent-browser fill @e1 "John Doe"
|
|
55
|
+
agent-browser fill @e2 "john@example.com"
|
|
56
|
+
agent-browser select @e3 "United States"
|
|
57
|
+
agent-browser check @e4
|
|
58
|
+
|
|
59
|
+
# Submit form
|
|
60
|
+
agent-browser click @e5
|
|
61
|
+
agent-browser wait --load networkidle
|
|
62
|
+
|
|
63
|
+
# Save recording
|
|
64
|
+
agent-browser recorder stop --output form-submission.yaml
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Supported Actions
|
|
68
|
+
|
|
69
|
+
| Action | Description | Example |
|
|
70
|
+
|--------|-------------|---------|
|
|
71
|
+
| `navigate` | Page navigation | `agent-browser open https://example.com` |
|
|
72
|
+
| `fill` | Text input | `agent-browser fill @e1 "text"` |
|
|
73
|
+
| `select` | Dropdown selection | `agent-browser select @e2 "option"` |
|
|
74
|
+
| `click` | Element click | `agent-browser click @e3` |
|
|
75
|
+
| `check` | Checkbox check | `agent-browser check @e4` |
|
|
76
|
+
| `uncheck` | Checkbox uncheck | `agent-browser uncheck @e5` |
|
|
77
|
+
| `scroll` | Page scroll | `agent-browser scroll down 500` |
|
|
78
|
+
| `trajectory` | Mouse movement | Captured automatically |
|
|
79
|
+
|
|
80
|
+
## YAML Output Format
|
|
81
|
+
|
|
82
|
+
The recorder generates a structured YAML file with:
|
|
83
|
+
|
|
84
|
+
```yaml
|
|
85
|
+
session:
|
|
86
|
+
id: recorder-1234567890
|
|
87
|
+
startTime: 19:46:49
|
|
88
|
+
endTime: 19:48:28
|
|
89
|
+
steps: 83
|
|
90
|
+
|
|
91
|
+
pages:
|
|
92
|
+
- url: https://example.com
|
|
93
|
+
title: Example Domain
|
|
94
|
+
firstVisitTime: 19:46:53
|
|
95
|
+
|
|
96
|
+
steps:
|
|
97
|
+
- id: step-1234567890
|
|
98
|
+
time: 19:47:00
|
|
99
|
+
action: fill
|
|
100
|
+
selector: "#username"
|
|
101
|
+
xpath: "//*[@id='username']"
|
|
102
|
+
value: "testuser"
|
|
103
|
+
|
|
104
|
+
- id: step-1234567891
|
|
105
|
+
time: 19:47:05
|
|
106
|
+
action: click
|
|
107
|
+
selector: "#submit-btn"
|
|
108
|
+
xpath: "//*[@id='submit-btn']"
|
|
109
|
+
|
|
110
|
+
- id: step-1234567892
|
|
111
|
+
time: 19:47:10
|
|
112
|
+
action: scroll
|
|
113
|
+
x: 0
|
|
114
|
+
y: 500
|
|
115
|
+
|
|
116
|
+
# CLI Commands section contains executable commands
|
|
117
|
+
# for direct replay in terminal
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Replay Feature
|
|
121
|
+
|
|
122
|
+
Replay recorded interactions from YAML file:
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
# Replay most recent recording
|
|
126
|
+
agent-browser recorder replay
|
|
127
|
+
|
|
128
|
+
# Replay specific file
|
|
129
|
+
agent-browser recorder replay form-submission.yaml
|
|
130
|
+
|
|
131
|
+
# Replay with verbose output
|
|
132
|
+
agent-browser recorder replay session.yaml --verbose
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
The replay feature:
|
|
136
|
+
1. Parses the YAML file
|
|
137
|
+
2. Executes each step in sequence
|
|
138
|
+
3. Handles both navigation and interactions
|
|
139
|
+
4. Supports all recorded action types
|
|
140
|
+
|
|
141
|
+
## Use Cases
|
|
142
|
+
|
|
143
|
+
### 1. Test Automation Documentation
|
|
144
|
+
|
|
145
|
+
Record manual test sessions for documentation:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
agent-browser recorder start --session checkout-flow
|
|
149
|
+
|
|
150
|
+
# Perform checkout process
|
|
151
|
+
agent-browser open https://shop.example.com/cart
|
|
152
|
+
agent-browser snapshot -i
|
|
153
|
+
agent-browser fill @e1 "123 Main St"
|
|
154
|
+
agent-browser fill @e2 "New York"
|
|
155
|
+
agent-browser fill @e3 "10001"
|
|
156
|
+
agent-browser click @e4
|
|
157
|
+
agent-browser wait --load networkidle
|
|
158
|
+
|
|
159
|
+
# Save for documentation
|
|
160
|
+
agent-browser recorder stop --output docs/checkout-flow.yaml
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### 2. Regression Testing
|
|
164
|
+
|
|
165
|
+
Create reusable test scenarios:
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# Record once
|
|
169
|
+
agent-browser recorder start
|
|
170
|
+
agent-browser open https://app.example.com/login
|
|
171
|
+
agent-browser snapshot -i
|
|
172
|
+
agent-browser fill @e1 "$USERNAME"
|
|
173
|
+
agent-browser fill @e2 "$PASSWORD"
|
|
174
|
+
agent-browser click @e3
|
|
175
|
+
agent-browser recorder stop --output tests/login.yaml
|
|
176
|
+
|
|
177
|
+
# Replay in CI/CD
|
|
178
|
+
agent-browser recorder replay tests/login.yaml
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### 3. Workflow Automation
|
|
182
|
+
|
|
183
|
+
Capture complex workflows for automation:
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
# Record multi-step workflow
|
|
187
|
+
agent-browser recorder start
|
|
188
|
+
agent-browser open https://dashboard.example.com
|
|
189
|
+
agent-browser snapshot -i
|
|
190
|
+
agent-browser click @e1 # Navigate to reports
|
|
191
|
+
agent-browser click @e2 # Select date range
|
|
192
|
+
agent-browser click @e3 # Export CSV
|
|
193
|
+
agent-browser recorder stop --output workflows/export-data.yaml
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### 4. Debugging & Analysis
|
|
197
|
+
|
|
198
|
+
Record sessions for debugging:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
agent-browser recorder start --session debug-$(date +%s)
|
|
202
|
+
|
|
203
|
+
# Run problematic workflow
|
|
204
|
+
agent-browser open https://example.com
|
|
205
|
+
# ... interactions ...
|
|
206
|
+
|
|
207
|
+
# Save for analysis
|
|
208
|
+
agent-browser recorder stop --output debug/session.yaml
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Best Practices
|
|
212
|
+
|
|
213
|
+
### 1. Use Session Names
|
|
214
|
+
|
|
215
|
+
```bash
|
|
216
|
+
# Good: Descriptive session names
|
|
217
|
+
agent-browser recorder start --session user-registration
|
|
218
|
+
agent-browser recorder start --session checkout-payment
|
|
219
|
+
agent-browser recorder start --session search-functionality
|
|
220
|
+
|
|
221
|
+
# Avoid: Generic names
|
|
222
|
+
agent-browser recorder start --session test1
|
|
223
|
+
agent-browser recorder start --session recording
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### 2. Add Wait Times for Stability
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
agent-browser recorder start
|
|
230
|
+
|
|
231
|
+
# Add waits after critical actions
|
|
232
|
+
agent-browser click @e1
|
|
233
|
+
agent-browser wait --load networkidle # Wait for page load
|
|
234
|
+
|
|
235
|
+
agent-browser fill @e2 "text"
|
|
236
|
+
agent-browser wait 1000 # Wait for dynamic content
|
|
237
|
+
|
|
238
|
+
agent-browser click @e3
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### 3. Use Snapshots for Ref Stability
|
|
242
|
+
|
|
243
|
+
```bash
|
|
244
|
+
agent-browser recorder start
|
|
245
|
+
|
|
246
|
+
# Always snapshot before interactions
|
|
247
|
+
agent-browser snapshot -i
|
|
248
|
+
agent-browser click @e1
|
|
249
|
+
|
|
250
|
+
# Re-snapshot after navigation
|
|
251
|
+
agent-browser wait --load networkidle
|
|
252
|
+
agent-browser snapshot -i
|
|
253
|
+
agent-browser click @e2
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### 4. Organize Recordings
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
# Create organized directory structure
|
|
260
|
+
recordings/
|
|
261
|
+
├── tests/
|
|
262
|
+
│ ├── login.yaml
|
|
263
|
+
│ ├── registration.yaml
|
|
264
|
+
│ └── checkout.yaml
|
|
265
|
+
├── workflows/
|
|
266
|
+
│ ├── data-export.yaml
|
|
267
|
+
│ └── report-generation.yaml
|
|
268
|
+
└── docs/
|
|
269
|
+
├── user-guide.yaml
|
|
270
|
+
└── api-demo.yaml
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### 5. Review Generated Commands
|
|
274
|
+
|
|
275
|
+
The YAML file includes a CLI Commands section at the end with executable commands. Review these commands to:
|
|
276
|
+
|
|
277
|
+
- Verify the captured selectors
|
|
278
|
+
- Check for redundant steps
|
|
279
|
+
- Identify opportunities for optimization
|
|
280
|
+
- Ensure actions are in correct order
|
|
281
|
+
|
|
282
|
+
## Advanced Features
|
|
283
|
+
|
|
284
|
+
### Session-Based Recording
|
|
285
|
+
|
|
286
|
+
```bash
|
|
287
|
+
# Record with specific session
|
|
288
|
+
agent-browser recorder start --session my-test --timeout 60000
|
|
289
|
+
|
|
290
|
+
# Use session for all commands
|
|
291
|
+
agent-browser open https://example.com --session my-test
|
|
292
|
+
agent-browser snapshot -i --session my-test
|
|
293
|
+
agent-browser click @e1 --session my-test
|
|
294
|
+
|
|
295
|
+
# Stop recording
|
|
296
|
+
agent-browser recorder stop --output my-test.yaml --session my-test
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
### Timeout Configuration
|
|
300
|
+
|
|
301
|
+
```bash
|
|
302
|
+
# Set recording timeout (default: 60 seconds)
|
|
303
|
+
agent-browser recorder start --timeout 120000 # 2 minutes
|
|
304
|
+
|
|
305
|
+
# Useful for long-running workflows
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
## Limitations
|
|
309
|
+
|
|
310
|
+
- Refs (`@e1`, `@e2`) are session-specific and not portable
|
|
311
|
+
- Convert to CSS selectors for cross-session reuse
|
|
312
|
+
- Some dynamic content may require additional wait handling
|
|
313
|
+
- Replay requires same page structure as recording
|
|
314
|
+
|
|
315
|
+
## See Also
|
|
316
|
+
|
|
317
|
+
- [snapshot-refs.md](snapshot-refs.md) - Understanding refs and their lifecycle
|
|
318
|
+
- [authentication.md](authentication.md) - Recording login flows
|
|
319
|
+
- [video-recording.md](video-recording.md) - Video recording for debugging
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Viewer / Streaming Mode
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The viewer mode provides a **real-time visual remote browser interface**. It streams browser frames (JPEG/WebP) over WebSocket and forwards user input (mouse, keyboard, touch) back to the daemon. This enables:
|
|
6
|
+
|
|
7
|
+
- **Remote debugging** — see what the browser sees in real time
|
|
8
|
+
- **Mobile device control** — operate a desktop browser from your phone
|
|
9
|
+
- **Presentation/demo** — show browser activity to an audience
|
|
10
|
+
- **Collaboration** — share a browser session with others
|
|
11
|
+
|
|
12
|
+
## Starting the Viewer
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
# Prerequisite: have a browser session running
|
|
16
|
+
agent-browser open https://example.com
|
|
17
|
+
|
|
18
|
+
# Start viewer (opens URL in default browser)
|
|
19
|
+
agent-browser viewer
|
|
20
|
+
|
|
21
|
+
# Get connection details as JSON (for scripting/embedding)
|
|
22
|
+
agent-browser viewer --json
|
|
23
|
+
# Output: {"url":"http://localhost:5005/view?session=default","ws":"ws://...","port":5005}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Viewer URL Parameters
|
|
27
|
+
|
|
28
|
+
| Parameter | Description |
|
|
29
|
+
| ------------------ | -------------------------------------- |
|
|
30
|
+
| `?session=<id>` | Connect to a specific named session |
|
|
31
|
+
| `?instanceId=<id>` | Connect to a specific browser instance |
|
|
32
|
+
|
|
33
|
+
## Architecture
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
┌─────────────┐ IPC ┌───────────────────┐ WebSocket ┌──────────┐
|
|
37
|
+
│ Browser │ ───────→ │ Daemon Process │ ←────────────→ │ Viewer │
|
|
38
|
+
│ (Playwright) │ │ (:5000 socket) │ │ (Browser) │
|
|
39
|
+
└─────────────┘ └────────┬─────────┘ └──────────┘
|
|
40
|
+
│
|
|
41
|
+
standalone HTTP+WS server (:5005)
|
|
42
|
+
serves viewer.html + proxies messages
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Data flow:**
|
|
46
|
+
|
|
47
|
+
1. **Frames**: Browser -> Daemon -> Standalone Server -> Viewer (binary JPEG/WebP via WS)
|
|
48
|
+
2. **Input**: Viewer -> Standalone Server -> Daemon -> Browser (JSON messages)
|
|
49
|
+
|
|
50
|
+
## Viewer Page Features
|
|
51
|
+
|
|
52
|
+
### Desktop Mode (PC/Mac)
|
|
53
|
+
|
|
54
|
+
| Feature | Description |
|
|
55
|
+
| ------------ | ------------------------------------------------------------------- |
|
|
56
|
+
| Screen area | Shows streamed frame, click/drag/scroll sends input to remote |
|
|
57
|
+
| Toolbar | URL bar, connection status, quality badge, record button |
|
|
58
|
+
| Hidden input | Invisible capture field for keyboard events (auto-focused on click) |
|
|
59
|
+
| Cursor | Red dot showing remote mouse position |
|
|
60
|
+
|
|
61
|
+
### Mobile Mode (Touch Device)
|
|
62
|
+
|
|
63
|
+
Automatically activates on touch devices. See [mobile-viewer.md](mobile-viewer.md) for full details.
|
|
64
|
+
|
|
65
|
+
| Feature | Description |
|
|
66
|
+
| ---------------- | --------------------------------------------------- |
|
|
67
|
+
| Touchpad | Bottom gesture area for cursor simulation |
|
|
68
|
+
| Input Panel | Text input popup when tapping remote input fields |
|
|
69
|
+
| Keyboard toolbar | Virtual keys: Tab, Arrows, Enter, Backspace, Escape |
|
|
70
|
+
| IME support | Chinese/Japanese composition (pinyin, kana, etc.) |
|
|
71
|
+
|
|
72
|
+
## Element Selector / Crop Mode
|
|
73
|
+
|
|
74
|
+
Crop the video stream to a specific DOM element's bounds:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# Via viewer UI: click element selector button, then click target element
|
|
78
|
+
# The stream is cropped to that element's rectangle
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
When element mode is active:
|
|
82
|
+
|
|
83
|
+
- Server crops frames to element bounds using Sharp
|
|
84
|
+
- Mouse coordinates auto-map to element-local space
|
|
85
|
+
- Falls back to "degraded mode" (full page) if element not found or disappears
|
|
86
|
+
- `deviceWidth`/`deviceHeight` in metadata reflect element dimensions
|
|
87
|
+
|
|
88
|
+
Use cases:
|
|
89
|
+
|
|
90
|
+
- Focus testing on a specific component
|
|
91
|
+
- Recording interactions within a widget
|
|
92
|
+
- Bandwidth savings (only stream the element, not full page)
|
|
93
|
+
|
|
94
|
+
## Message Types (Viewer <-> Server)
|
|
95
|
+
|
|
96
|
+
### Server → Viewer (over WebSocket)
|
|
97
|
+
|
|
98
|
+
| Type | Purpose |
|
|
99
|
+
| --------------- | ------------------------------------------------------------------ |
|
|
100
|
+
| `frame` | Binary frame data with metadata (dimensions, format, element info) |
|
|
101
|
+
| `status` | Connection status, viewport changes |
|
|
102
|
+
| `navigation` | URL/title changes |
|
|
103
|
+
| `input_focused` | Remote element received focus → triggers input panel (mobile) |
|
|
104
|
+
| `input_value` | Remote input value changed |
|
|
105
|
+
| `input_blur` | Remote element lost focus |
|
|
106
|
+
|
|
107
|
+
### Viewer → Server (over WebSocket)
|
|
108
|
+
|
|
109
|
+
| Type | Purpose |
|
|
110
|
+
| ---------------------- | --------------------------------------------------- |
|
|
111
|
+
| `input_mouse` | Mouse move/press/release/wheel |
|
|
112
|
+
| `input_keyboard` | Key down/up with modifiers |
|
|
113
|
+
| `input_fill` | Full text value sync (mobile input panel) |
|
|
114
|
+
| `input_blur_element` | Blur remote element (mobile input commit) |
|
|
115
|
+
| `keyboard_insert_text` | Character-by-character insert (desktop hiddenInput) |
|
|
116
|
+
| `user_activity` | Keep-alive signal (resumes streaming if paused) |
|
|
117
|
+
| `selector_element` | Request crop to specific element |
|
|
118
|
+
|
|
119
|
+
## Troubleshooting
|
|
120
|
+
|
|
121
|
+
### Black screen
|
|
122
|
+
|
|
123
|
+
- Check daemon is running: `agent-browser status`
|
|
124
|
+
- Verify browser launched: `agent-browser open https://example.com` should work first
|
|
125
|
+
|
|
126
|
+
### Connection refused
|
|
127
|
+
|
|
128
|
+
- The viewer command auto-starts the standalone server on port 5005
|
|
129
|
+
- If port conflicts, check: `lsof -i :5005`
|
|
130
|
+
- Kill stale process: `kill $(lsof -t -i :5005)`
|
|
131
|
+
|
|
132
|
+
### Laggy updates
|
|
133
|
+
|
|
134
|
+
- Frame compression is JPEG by default (adjustable)
|
|
135
|
+
- Quality badge shows current state: "interacting" / "static" / "compressed"
|
|
136
|
+
- Network latency between viewer and server affects frame rate
|
|
137
|
+
|
|
138
|
+
### Element not found (degraded mode)
|
|
139
|
+
|
|
140
|
+
- Yellow toast appears: "Element not found, showing full page"
|
|
141
|
+
- Element may have been removed by SPA navigation or animation
|
|
142
|
+
- Re-select the element or exit selector mode
|
|
143
|
+
|
|
144
|
+
### Viewer shows but no frame
|
|
145
|
+
|
|
146
|
+
- Check daemon log: `~/.agent-browser/default.log`
|
|
147
|
+
- Look for "Browser not launched" errors
|
|
148
|
+
- Ensure `agent-browser open` was called before `agent-browser viewer`
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
# API Interception Template - Passively capture API responses
|
|
3
|
+
set -euo pipefail
|
|
3
4
|
# Usage: ./api-interception.sh [target_url] [output_file]
|
|
4
5
|
|
|
5
6
|
TARGET_URL="${1:-https://example.com/user/profile}"
|
|
@@ -12,7 +13,8 @@ sleep 1
|
|
|
12
13
|
|
|
13
14
|
echo ""
|
|
14
15
|
echo "=== 2. Open blank page ==="
|
|
15
|
-
|
|
16
|
+
# Optional: set PROXY_URL if using a proxy
|
|
17
|
+
export https_proxy=${PROXY_URL:-}
|
|
16
18
|
agent-browser open "about:blank"
|
|
17
19
|
sleep 1
|
|
18
20
|
|
|
@@ -34,7 +34,8 @@ case "$MODE" in
|
|
|
34
34
|
api)
|
|
35
35
|
echo ""
|
|
36
36
|
echo "=== 2. API Interception Mode ==="
|
|
37
|
-
|
|
37
|
+
# Optional: set PROXY_URL if using a proxy
|
|
38
|
+
export https_proxy=${PROXY_URL:-}
|
|
38
39
|
agent-browser open "about:blank"
|
|
39
40
|
sleep 1
|
|
40
41
|
|
|
@@ -64,7 +65,8 @@ case "$MODE" in
|
|
|
64
65
|
scroll)
|
|
65
66
|
echo ""
|
|
66
67
|
echo "=== 2. Infinite Scroll Mode ==="
|
|
67
|
-
|
|
68
|
+
# Optional: set PROXY_URL if using a proxy
|
|
69
|
+
export https_proxy=${PROXY_URL:-}
|
|
68
70
|
agent-browser open "$TARGET_URL"
|
|
69
71
|
sleep 2
|
|
70
72
|
|
|
@@ -125,7 +127,8 @@ print(json.dumps(unique, ensure_ascii=False))
|
|
|
125
127
|
js)
|
|
126
128
|
echo ""
|
|
127
129
|
echo "=== 2. JS Variable Extraction Mode ==="
|
|
128
|
-
|
|
130
|
+
# Optional: set PROXY_URL if using a proxy
|
|
131
|
+
export https_proxy=${PROXY_URL:-}
|
|
129
132
|
agent-browser open "$TARGET_URL"
|
|
130
133
|
sleep 3
|
|
131
134
|
|
|
@@ -153,7 +156,8 @@ print(json.dumps(unique, ensure_ascii=False))
|
|
|
153
156
|
dom|*)
|
|
154
157
|
echo ""
|
|
155
158
|
echo "=== 2. DOM Extraction Mode ==="
|
|
156
|
-
|
|
159
|
+
# Optional: set PROXY_URL if using a proxy
|
|
160
|
+
export https_proxy=${PROXY_URL:-}
|
|
157
161
|
agent-browser open "$TARGET_URL"
|
|
158
162
|
sleep 2
|
|
159
163
|
|
|
@@ -1,32 +1,27 @@
|
|
|
1
|
-
#!/bin/bash
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
2
|
# Template: Form Automation Workflow
|
|
3
3
|
# Purpose: Fill and submit web forms with validation
|
|
4
4
|
# Usage: ./form-automation.sh <form-url>
|
|
5
5
|
#
|
|
6
|
-
#
|
|
7
|
-
# 1. Navigate to form
|
|
8
|
-
# 2. Snapshot to get element refs
|
|
9
|
-
# 3. Fill fields using refs
|
|
10
|
-
# 4. Submit and verify result
|
|
11
|
-
#
|
|
12
|
-
# Customize: Update the refs (@e1, @e2, etc.) based on your form's snapshot output
|
|
6
|
+
# Demonstrates: snapshot -> interact -> verify pattern
|
|
13
7
|
|
|
14
8
|
set -euo pipefail
|
|
15
9
|
|
|
16
10
|
FORM_URL="${1:?Usage: $0 <form-url>}"
|
|
11
|
+
SESSION="form-$(date +%s)"
|
|
17
12
|
|
|
18
|
-
echo "Form
|
|
13
|
+
echo "=== Form Automation: $FORM_URL ==="
|
|
19
14
|
|
|
20
15
|
# Step 1: Navigate to form
|
|
21
|
-
agent-browser open "$FORM_URL"
|
|
22
|
-
agent-browser wait --load networkidle
|
|
16
|
+
agent-browser --session "$SESSION" open "$FORM_URL"
|
|
17
|
+
agent-browser --session "$SESSION" wait --load networkidle
|
|
23
18
|
|
|
24
19
|
# Step 2: Snapshot to discover form elements
|
|
25
20
|
echo ""
|
|
26
21
|
echo "Form structure:"
|
|
27
|
-
agent-browser snapshot -i
|
|
22
|
+
agent-browser --session "$SESSION" snapshot -i
|
|
28
23
|
|
|
29
|
-
# Step 3: Fill form fields (customize
|
|
24
|
+
# Step 3: Fill form fields (customize refs based on snapshot output above)
|
|
30
25
|
#
|
|
31
26
|
# Common field types:
|
|
32
27
|
# agent-browser fill @e1 "John Doe" # Text input
|
|
@@ -34,27 +29,27 @@ agent-browser snapshot -i
|
|
|
34
29
|
# agent-browser fill @e3 "SecureP@ss123" # Password input
|
|
35
30
|
# agent-browser select @e4 "Option Value" # Dropdown
|
|
36
31
|
# agent-browser check @e5 # Checkbox
|
|
37
|
-
# agent-browser click @e6 # Radio button
|
|
32
|
+
# agent-browser click @e6 # Radio button / Submit button
|
|
38
33
|
# agent-browser fill @e7 "Multi-line text" # Textarea
|
|
39
34
|
# agent-browser upload @e8 /path/to/file.pdf # File upload
|
|
40
35
|
#
|
|
41
36
|
# Uncomment and modify:
|
|
42
|
-
# agent-browser fill @e1 "Test User"
|
|
43
|
-
# agent-browser fill @e2 "test@example.com"
|
|
44
|
-
# agent-browser click @e3 # Submit button
|
|
37
|
+
# agent-browser --session "$SESSION" fill @e1 "Test User"
|
|
38
|
+
# agent-browser --session "$SESSION" fill @e2 "test@example.com"
|
|
39
|
+
# agent-browser --session "$SESSION" click @e3 # Submit button
|
|
45
40
|
|
|
46
|
-
# Step 4: Wait for submission
|
|
47
|
-
|
|
48
|
-
# agent-browser wait --url "**/success" # Or wait for redirect
|
|
41
|
+
# Step 4: Wait for submission to complete
|
|
42
|
+
agent-browser --session "$SESSION" wait --load networkidle
|
|
43
|
+
# agent-browser --session "$SESSION" wait --url "**/success" # Or wait for redirect
|
|
49
44
|
|
|
50
45
|
# Step 5: Verify result
|
|
51
46
|
echo ""
|
|
52
47
|
echo "Result:"
|
|
53
|
-
agent-browser get url
|
|
54
|
-
agent-browser snapshot -i
|
|
48
|
+
agent-browser --session "$SESSION" get url
|
|
49
|
+
agent-browser --session "$SESSION" snapshot -i
|
|
55
50
|
|
|
56
51
|
# Optional: Capture evidence
|
|
57
|
-
agent-browser screenshot /tmp/form-result.png
|
|
52
|
+
agent-browser --session "$SESSION" screenshot /tmp/form-result.png
|
|
58
53
|
echo "Screenshot saved: /tmp/form-result.png"
|
|
59
54
|
|
|
60
55
|
# Cleanup
|