agent-browser-priv 0.27.3-priv.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +1564 -0
- package/bin/agent-browser.js +125 -0
- package/package.json +52 -0
- package/scripts/build-all-platforms.sh +76 -0
- package/scripts/check-version-sync.js +51 -0
- package/scripts/copy-native.js +36 -0
- package/scripts/postinstall.js +327 -0
- package/scripts/sync-version.js +81 -0
- package/scripts/windows-debug/provision.sh +220 -0
- package/scripts/windows-debug/run.sh +92 -0
- package/scripts/windows-debug/start.sh +43 -0
- package/scripts/windows-debug/stop.sh +28 -0
- package/scripts/windows-debug/sync.sh +27 -0
- package/skill-data/agentcore/SKILL.md +115 -0
- package/skill-data/core/SKILL.md +488 -0
- package/skill-data/core/references/authentication.md +303 -0
- package/skill-data/core/references/commands.md +403 -0
- package/skill-data/core/references/profiling.md +120 -0
- package/skill-data/core/references/proxy-support.md +194 -0
- package/skill-data/core/references/session-management.md +193 -0
- package/skill-data/core/references/snapshot-refs.md +219 -0
- package/skill-data/core/references/trust-boundaries.md +89 -0
- package/skill-data/core/references/video-recording.md +175 -0
- package/skill-data/core/templates/authenticated-session.sh +105 -0
- package/skill-data/core/templates/capture-workflow.sh +69 -0
- package/skill-data/core/templates/form-automation.sh +62 -0
- package/skill-data/dogfood/SKILL.md +220 -0
- package/skill-data/dogfood/references/issue-taxonomy.md +109 -0
- package/skill-data/dogfood/templates/dogfood-report-template.md +53 -0
- package/skill-data/electron/SKILL.md +236 -0
- package/skill-data/slack/SKILL.md +285 -0
- package/skill-data/slack/references/slack-tasks.md +348 -0
- package/skill-data/slack/templates/slack-report-template.md +163 -0
- package/skill-data/vercel-sandbox/SKILL.md +280 -0
- package/skills/agent-browser/SKILL.md +55 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Template: Form Automation Workflow
|
|
3
|
+
# Purpose: Fill and submit web forms with validation
|
|
4
|
+
# Usage: ./form-automation.sh <form-url>
|
|
5
|
+
#
|
|
6
|
+
# This template demonstrates the snapshot-interact-verify pattern:
|
|
7
|
+
# 1. Navigate to form
|
|
8
|
+
# 2. Snapshot to get element refs
|
|
9
|
+
# 3. Fill fields using refs
|
|
10
|
+
# 4. Submit and verify result
|
|
11
|
+
#
|
|
12
|
+
# Customize: Update the refs (@e1, @e2, etc.) based on your form's snapshot output
|
|
13
|
+
|
|
14
|
+
set -euo pipefail
|
|
15
|
+
|
|
16
|
+
FORM_URL="${1:?Usage: $0 <form-url>}"
|
|
17
|
+
|
|
18
|
+
echo "Form automation: $FORM_URL"
|
|
19
|
+
|
|
20
|
+
# Step 1: Navigate to form
|
|
21
|
+
agent-browser open "$FORM_URL"
|
|
22
|
+
agent-browser wait --load networkidle
|
|
23
|
+
|
|
24
|
+
# Step 2: Snapshot to discover form elements
|
|
25
|
+
echo ""
|
|
26
|
+
echo "Form structure:"
|
|
27
|
+
agent-browser snapshot -i
|
|
28
|
+
|
|
29
|
+
# Step 3: Fill form fields (customize these refs based on snapshot output)
|
|
30
|
+
#
|
|
31
|
+
# Common field types:
|
|
32
|
+
# agent-browser fill @e1 "John Doe" # Text input
|
|
33
|
+
# agent-browser fill @e2 "user@example.com" # Email input
|
|
34
|
+
# agent-browser fill @e3 "SecureP@ss123" # Password input
|
|
35
|
+
# agent-browser select @e4 "Option Value" # Dropdown
|
|
36
|
+
# agent-browser check @e5 # Checkbox
|
|
37
|
+
# agent-browser click @e6 # Radio button
|
|
38
|
+
# agent-browser fill @e7 "Multi-line text" # Textarea
|
|
39
|
+
# agent-browser upload @e8 /path/to/file.pdf # File upload
|
|
40
|
+
#
|
|
41
|
+
# Uncomment and modify:
|
|
42
|
+
# agent-browser fill @e1 "Test User"
|
|
43
|
+
# agent-browser fill @e2 "test@example.com"
|
|
44
|
+
# agent-browser click @e3 # Submit button
|
|
45
|
+
|
|
46
|
+
# Step 4: Wait for submission
|
|
47
|
+
# agent-browser wait --load networkidle
|
|
48
|
+
# agent-browser wait --url "**/success" # Or wait for redirect
|
|
49
|
+
|
|
50
|
+
# Step 5: Verify result
|
|
51
|
+
echo ""
|
|
52
|
+
echo "Result:"
|
|
53
|
+
agent-browser get url
|
|
54
|
+
agent-browser snapshot -i
|
|
55
|
+
|
|
56
|
+
# Optional: Capture evidence
|
|
57
|
+
agent-browser screenshot /tmp/form-result.png
|
|
58
|
+
echo "Screenshot saved: /tmp/form-result.png"
|
|
59
|
+
|
|
60
|
+
# Cleanup
|
|
61
|
+
agent-browser close
|
|
62
|
+
echo "Done"
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: dogfood
|
|
3
|
+
description: Systematically explore and test a web application to find bugs, UX issues, and other problems. Use when asked to "dogfood", "QA", "exploratory test", "find issues", "bug hunt", "test this app/site/platform", or review the quality of a web application. Produces a structured report with full reproduction evidence -- step-by-step screenshots, repro videos, and detailed repro steps for every issue -- so findings can be handed directly to the responsible teams.
|
|
4
|
+
allowed-tools: Bash(agent-browser:*), Bash(npx agent-browser:*)
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Dogfood
|
|
8
|
+
|
|
9
|
+
Systematically explore a web application, find issues, and produce a report with full reproduction evidence for every finding.
|
|
10
|
+
|
|
11
|
+
## Setup
|
|
12
|
+
|
|
13
|
+
Only the **Target URL** is required. Everything else has sensible defaults -- use them unless the user explicitly provides an override.
|
|
14
|
+
|
|
15
|
+
| Parameter | Default | Example override |
|
|
16
|
+
|-----------|---------|-----------------|
|
|
17
|
+
| **Target URL** | _(required)_ | `vercel.com`, `http://localhost:3000` |
|
|
18
|
+
| **Session name** | Slugified domain (e.g., `vercel.com` -> `vercel-com`) | `--session my-session` |
|
|
19
|
+
| **Output directory** | `./dogfood-output/` | `Output directory: /tmp/qa` |
|
|
20
|
+
| **Scope** | Full app | `Focus on the billing page` |
|
|
21
|
+
| **Authentication** | None | `Sign in to user@example.com` |
|
|
22
|
+
|
|
23
|
+
If the user says something like "dogfood vercel.com", start immediately with defaults. Do not ask clarifying questions unless authentication is mentioned but credentials are missing.
|
|
24
|
+
|
|
25
|
+
Always use `agent-browser` directly -- never `npx agent-browser`. The direct binary uses the fast Rust client. `npx` routes through Node.js and is significantly slower.
|
|
26
|
+
|
|
27
|
+
## Workflow
|
|
28
|
+
|
|
29
|
+
```
|
|
30
|
+
1. Initialize Set up session, output dirs, report file
|
|
31
|
+
2. Authenticate Sign in if needed, save state
|
|
32
|
+
3. Orient Navigate to starting point, take initial snapshot
|
|
33
|
+
4. Explore Systematically visit pages and test features
|
|
34
|
+
5. Document Screenshot + record each issue as found
|
|
35
|
+
6. Wrap up Update summary counts, close session
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 1. Initialize
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
mkdir -p {OUTPUT_DIR}/screenshots {OUTPUT_DIR}/videos
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Copy the report template into the output directory and fill in the header fields:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
cp {SKILL_DIR}/templates/dogfood-report-template.md {OUTPUT_DIR}/report.md
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Start a named session:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
agent-browser --session {SESSION} open {TARGET_URL}
|
|
54
|
+
agent-browser --session {SESSION} wait --load networkidle
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### 2. Authenticate
|
|
58
|
+
|
|
59
|
+
If the app requires login:
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
agent-browser --session {SESSION} snapshot -i
|
|
63
|
+
# Identify login form refs, fill credentials
|
|
64
|
+
agent-browser --session {SESSION} fill @e1 "{EMAIL}"
|
|
65
|
+
agent-browser --session {SESSION} fill @e2 "{PASSWORD}"
|
|
66
|
+
agent-browser --session {SESSION} click @e3
|
|
67
|
+
agent-browser --session {SESSION} wait --load networkidle
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
For OTP/email codes: ask the user, wait for their response, then enter the code.
|
|
71
|
+
|
|
72
|
+
After successful login, save state for potential reuse:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
agent-browser --session {SESSION} state save {OUTPUT_DIR}/auth-state.json
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 3. Orient
|
|
79
|
+
|
|
80
|
+
Take an initial annotated screenshot and snapshot to understand the app structure:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
agent-browser --session {SESSION} screenshot --annotate {OUTPUT_DIR}/screenshots/initial.png
|
|
84
|
+
agent-browser --session {SESSION} snapshot -i
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Identify the main navigation elements and map out the sections to visit.
|
|
88
|
+
|
|
89
|
+
### 4. Explore
|
|
90
|
+
|
|
91
|
+
Read [references/issue-taxonomy.md](references/issue-taxonomy.md) for the full list of what to look for and the exploration checklist.
|
|
92
|
+
|
|
93
|
+
**Strategy -- work through the app systematically:**
|
|
94
|
+
|
|
95
|
+
- Start from the main navigation. Visit each top-level section.
|
|
96
|
+
- Within each section, test interactive elements: click buttons, fill forms, open dropdowns/modals.
|
|
97
|
+
- Check edge cases: empty states, error handling, boundary inputs.
|
|
98
|
+
- Try realistic end-to-end workflows (create, edit, delete flows).
|
|
99
|
+
- Check the browser console for errors periodically.
|
|
100
|
+
|
|
101
|
+
**At each page:**
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
agent-browser --session {SESSION} snapshot -i
|
|
105
|
+
agent-browser --session {SESSION} screenshot --annotate {OUTPUT_DIR}/screenshots/{page-name}.png
|
|
106
|
+
agent-browser --session {SESSION} errors
|
|
107
|
+
agent-browser --session {SESSION} console
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Use your judgment on how deep to go. Spend more time on core features and less on peripheral pages. If you find a cluster of issues in one area, investigate deeper.
|
|
111
|
+
|
|
112
|
+
### 5. Document Issues (Repro-First)
|
|
113
|
+
|
|
114
|
+
Steps 4 and 5 happen together -- explore and document in a single pass. When you find an issue, stop exploring and document it immediately before moving on. Do not explore the whole app first and document later.
|
|
115
|
+
|
|
116
|
+
Every issue must be reproducible. When you find something wrong, do not just note it -- prove it with evidence. The goal is that someone reading the report can see exactly what happened and replay it.
|
|
117
|
+
|
|
118
|
+
**Choose the right level of evidence for the issue:**
|
|
119
|
+
|
|
120
|
+
#### Interactive / behavioral issues (functional, ux, console errors on action)
|
|
121
|
+
|
|
122
|
+
These require user interaction to reproduce -- use full repro with video and step-by-step screenshots:
|
|
123
|
+
|
|
124
|
+
1. **Start a repro video** _before_ reproducing:
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
agent-browser --session {SESSION} record start {OUTPUT_DIR}/videos/issue-{NNN}-repro.webm
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
2. **Walk through the steps at human pace.** Pause 1-2 seconds between actions so the video is watchable. Take a screenshot at each step:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
agent-browser --session {SESSION} screenshot {OUTPUT_DIR}/screenshots/issue-{NNN}-step-1.png
|
|
134
|
+
sleep 1
|
|
135
|
+
# Perform action (click, fill, etc.)
|
|
136
|
+
sleep 1
|
|
137
|
+
agent-browser --session {SESSION} screenshot {OUTPUT_DIR}/screenshots/issue-{NNN}-step-2.png
|
|
138
|
+
sleep 1
|
|
139
|
+
# ...continue until the issue manifests
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
3. **Capture the broken state.** Pause so the viewer can see it, then take an annotated screenshot:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
sleep 2
|
|
146
|
+
agent-browser --session {SESSION} screenshot --annotate {OUTPUT_DIR}/screenshots/issue-{NNN}-result.png
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
4. **Stop the video:**
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
agent-browser --session {SESSION} record stop
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
5. Write numbered repro steps in the report, each referencing its screenshot.
|
|
156
|
+
|
|
157
|
+
#### Static / visible-on-load issues (typos, placeholder text, clipped text, misalignment, console errors on load)
|
|
158
|
+
|
|
159
|
+
These are visible without interaction -- a single annotated screenshot is sufficient. No video, no multi-step repro:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
agent-browser --session {SESSION} screenshot --annotate {OUTPUT_DIR}/screenshots/issue-{NNN}.png
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Write a brief description and reference the screenshot in the report. Set **Repro Video** to `N/A`.
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
**For all issues:**
|
|
170
|
+
|
|
171
|
+
1. **Append to the report immediately.** Do not batch issues for later. Write each one as you find it so nothing is lost if the session is interrupted.
|
|
172
|
+
|
|
173
|
+
2. **Increment the issue counter** (ISSUE-001, ISSUE-002, ...).
|
|
174
|
+
|
|
175
|
+
### 6. Wrap Up
|
|
176
|
+
|
|
177
|
+
Aim to find **5-10 well-documented issues**, then wrap up. Depth of evidence matters more than total count -- 5 issues with full repro beats 20 with vague descriptions.
|
|
178
|
+
|
|
179
|
+
After exploring:
|
|
180
|
+
|
|
181
|
+
1. Re-read the report and update the summary severity counts so they match the actual issues. Every `### ISSUE-` block must be reflected in the totals.
|
|
182
|
+
2. Close the session:
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
agent-browser --session {SESSION} close
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
3. Tell the user the report is ready and summarize findings: total issues, breakdown by severity, and the most critical items.
|
|
189
|
+
|
|
190
|
+
## Guidance
|
|
191
|
+
|
|
192
|
+
- **Repro is everything.** Every issue needs proof -- but match the evidence to the issue. Interactive bugs need video and step-by-step screenshots. Static bugs (typos, placeholder text, visual glitches visible on load) only need a single annotated screenshot.
|
|
193
|
+
- **Verify reproducibility before collecting evidence.** Before recording video or taking screenshots, verify the issue is reproducible with at least one retry. If it can't be reproduced consistently, it's not a valid issue.
|
|
194
|
+
- **Don't record video for static issues.** A typo or clipped text doesn't benefit from a video. Save video for issues that involve user interaction, timing, or state changes.
|
|
195
|
+
- **For interactive issues, screenshot each step.** Capture the before, the action, and the after -- so someone can see the full sequence.
|
|
196
|
+
- **Write repro steps that map to screenshots.** Each numbered step in the report should reference its corresponding screenshot. A reader should be able to follow the steps visually without touching a browser.
|
|
197
|
+
- **Use the right snapshot command.**
|
|
198
|
+
- `snapshot -i` — for finding clickable/fillable elements (buttons, inputs, links)
|
|
199
|
+
- `snapshot` (no flag) — for reading page content (text, headings, data lists)
|
|
200
|
+
- **Be thorough but use judgment.** You are not following a test script -- you are exploring like a real user would. If something feels off, investigate.
|
|
201
|
+
- **Write findings incrementally.** Append each issue to the report as you discover it. If the session is interrupted, findings are preserved. Never batch all issues for the end.
|
|
202
|
+
- **Never delete output files.** Do not `rm` screenshots, videos, or the report mid-session. Do not close the session and restart. Work forward, not backward.
|
|
203
|
+
- **Never read the target app's source code.** You are testing as a user, not auditing code. Do not read HTML, JS, or config files of the app under test. All findings must come from what you observe in the browser.
|
|
204
|
+
- **Check the console.** Many issues are invisible in the UI but show up as JS errors or failed requests.
|
|
205
|
+
- **Test like a user, not a robot.** Try common workflows end-to-end. Click things a real user would click. Enter realistic data.
|
|
206
|
+
- **Type like a human.** When filling form fields during video recording, use `type` instead of `fill` -- it types character-by-character. Use `fill` only outside of video recording when speed matters.
|
|
207
|
+
- **Pace repro videos for humans.** Add `sleep 1` between actions and `sleep 2` before the final result screenshot. Videos should be watchable at 1x speed -- a human reviewing the report needs to see what happened, not a blur of instant state changes.
|
|
208
|
+
- **Be efficient with commands.** Batch multiple `agent-browser` commands in a single shell call when they are independent (e.g., `agent-browser ... screenshot ... && agent-browser ... console`). Use `agent-browser --session {SESSION} scroll down 300` for scrolling -- do not use `key` or `evaluate` to scroll.
|
|
209
|
+
|
|
210
|
+
## References
|
|
211
|
+
|
|
212
|
+
| Reference | When to Read |
|
|
213
|
+
|-----------|--------------|
|
|
214
|
+
| [references/issue-taxonomy.md](references/issue-taxonomy.md) | Start of session -- calibrate what to look for, severity levels, exploration checklist |
|
|
215
|
+
|
|
216
|
+
## Templates
|
|
217
|
+
|
|
218
|
+
| Template | Purpose |
|
|
219
|
+
|----------|---------|
|
|
220
|
+
| [templates/dogfood-report-template.md](templates/dogfood-report-template.md) | Copy into output directory as the report file |
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Issue Taxonomy
|
|
2
|
+
|
|
3
|
+
Reference for categorizing issues found during dogfooding. Read this at the start of a dogfood session to calibrate what to look for.
|
|
4
|
+
|
|
5
|
+
## Contents
|
|
6
|
+
|
|
7
|
+
- [Severity Levels](#severity-levels)
|
|
8
|
+
- [Categories](#categories)
|
|
9
|
+
- [Exploration Checklist](#exploration-checklist)
|
|
10
|
+
|
|
11
|
+
## Severity Levels
|
|
12
|
+
|
|
13
|
+
| Severity | Definition |
|
|
14
|
+
|----------|------------|
|
|
15
|
+
| **critical** | Blocks a core workflow, causes data loss, or crashes the app |
|
|
16
|
+
| **high** | Major feature broken or unusable, no workaround |
|
|
17
|
+
| **medium** | Feature works but with noticeable problems, workaround exists |
|
|
18
|
+
| **low** | Minor cosmetic or polish issue |
|
|
19
|
+
|
|
20
|
+
## Categories
|
|
21
|
+
|
|
22
|
+
### Visual / UI
|
|
23
|
+
|
|
24
|
+
- Layout broken or misaligned elements
|
|
25
|
+
- Overlapping or clipped text
|
|
26
|
+
- Inconsistent spacing, padding, or margins
|
|
27
|
+
- Missing or broken icons/images
|
|
28
|
+
- Dark mode / light mode rendering issues
|
|
29
|
+
- Responsive layout problems (viewport sizes)
|
|
30
|
+
- Z-index stacking issues (elements hidden behind others)
|
|
31
|
+
- Font rendering issues (wrong font, size, weight)
|
|
32
|
+
- Color contrast problems
|
|
33
|
+
- Animation glitches or jank
|
|
34
|
+
|
|
35
|
+
### Functional
|
|
36
|
+
|
|
37
|
+
- Broken links (404, wrong destination)
|
|
38
|
+
- Buttons or controls that do nothing on click
|
|
39
|
+
- Form validation that rejects valid input or accepts invalid input
|
|
40
|
+
- Incorrect redirects
|
|
41
|
+
- Features that fail silently
|
|
42
|
+
- State not persisted when expected (lost on refresh, navigation)
|
|
43
|
+
- Race conditions (double-submit, stale data)
|
|
44
|
+
- Broken search or filtering
|
|
45
|
+
- Pagination issues
|
|
46
|
+
- File upload/download failures
|
|
47
|
+
|
|
48
|
+
### UX
|
|
49
|
+
|
|
50
|
+
- Confusing or unclear navigation
|
|
51
|
+
- Missing loading indicators or feedback after actions
|
|
52
|
+
- Slow or unresponsive interactions (>300ms perceived delay)
|
|
53
|
+
- Unclear error messages
|
|
54
|
+
- Missing confirmation for destructive actions
|
|
55
|
+
- Dead ends (no way to go back or proceed)
|
|
56
|
+
- Inconsistent patterns across similar features
|
|
57
|
+
- Missing keyboard shortcuts or focus management
|
|
58
|
+
- Unintuitive defaults
|
|
59
|
+
- Missing empty states or unhelpful empty states
|
|
60
|
+
|
|
61
|
+
### Content
|
|
62
|
+
|
|
63
|
+
- Typos or grammatical errors
|
|
64
|
+
- Outdated or incorrect text
|
|
65
|
+
- Placeholder or lorem ipsum content left in
|
|
66
|
+
- Truncated text without tooltip or expansion
|
|
67
|
+
- Missing or wrong labels
|
|
68
|
+
- Inconsistent terminology
|
|
69
|
+
|
|
70
|
+
### Performance
|
|
71
|
+
|
|
72
|
+
- Slow page loads (>3s)
|
|
73
|
+
- Janky scrolling or animations
|
|
74
|
+
- Large layout shifts (content jumping)
|
|
75
|
+
- Excessive network requests (check via console/network)
|
|
76
|
+
- Memory leaks (page slows over time)
|
|
77
|
+
- Unoptimized images (large file sizes)
|
|
78
|
+
|
|
79
|
+
### Console / Errors
|
|
80
|
+
|
|
81
|
+
- JavaScript exceptions in console
|
|
82
|
+
- Failed network requests (4xx, 5xx)
|
|
83
|
+
- Deprecation warnings
|
|
84
|
+
- CORS errors
|
|
85
|
+
- Mixed content warnings
|
|
86
|
+
- Unhandled promise rejections
|
|
87
|
+
|
|
88
|
+
### Accessibility
|
|
89
|
+
|
|
90
|
+
- Missing alt text on images
|
|
91
|
+
- Unlabeled form inputs
|
|
92
|
+
- Poor keyboard navigation (can't tab to elements)
|
|
93
|
+
- Focus traps
|
|
94
|
+
- Insufficient color contrast
|
|
95
|
+
- Missing ARIA attributes on dynamic content
|
|
96
|
+
- Screen reader incompatible patterns
|
|
97
|
+
|
|
98
|
+
## Exploration Checklist
|
|
99
|
+
|
|
100
|
+
Use this as a guide for what to test on each page/feature:
|
|
101
|
+
|
|
102
|
+
1. **Visual scan** -- Take an annotated screenshot. Look for layout, alignment, and rendering issues.
|
|
103
|
+
2. **Interactive elements** -- Click every button, link, and control. Do they work? Is there feedback?
|
|
104
|
+
3. **Forms** -- Fill and submit. Test empty submission, invalid input, and edge cases.
|
|
105
|
+
4. **Navigation** -- Follow all navigation paths. Check breadcrumbs, back button, deep links.
|
|
106
|
+
5. **States** -- Check empty states, loading states, error states, and full/overflow states.
|
|
107
|
+
6. **Console** -- Check for JS errors, failed requests, and warnings.
|
|
108
|
+
7. **Responsiveness** -- If relevant, test at different viewport sizes.
|
|
109
|
+
8. **Auth boundaries** -- Test what happens when not logged in, with different roles if applicable.
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Dogfood Report: {APP_NAME}
|
|
2
|
+
|
|
3
|
+
| Field | Value |
|
|
4
|
+
|-------|-------|
|
|
5
|
+
| **Date** | {DATE} |
|
|
6
|
+
| **App URL** | {URL} |
|
|
7
|
+
| **Session** | {SESSION_NAME} |
|
|
8
|
+
| **Scope** | {SCOPE} |
|
|
9
|
+
|
|
10
|
+
## Summary
|
|
11
|
+
|
|
12
|
+
| Severity | Count |
|
|
13
|
+
|----------|-------|
|
|
14
|
+
| Critical | 0 |
|
|
15
|
+
| High | 0 |
|
|
16
|
+
| Medium | 0 |
|
|
17
|
+
| Low | 0 |
|
|
18
|
+
| **Total** | **0** |
|
|
19
|
+
|
|
20
|
+
## Issues
|
|
21
|
+
|
|
22
|
+
<!-- Copy this block for each issue found. Interactive issues need video + step-by-step screenshots. Static issues (typos, visual glitches) only need a single screenshot -- set Repro Video to N/A. -->
|
|
23
|
+
|
|
24
|
+
### ISSUE-001: {Short title}
|
|
25
|
+
|
|
26
|
+
| Field | Value |
|
|
27
|
+
|-------|-------|
|
|
28
|
+
| **Severity** | critical / high / medium / low |
|
|
29
|
+
| **Category** | visual / functional / ux / content / performance / console / accessibility |
|
|
30
|
+
| **URL** | {page URL where issue was found} |
|
|
31
|
+
| **Repro Video** | {path to video, or N/A for static issues} |
|
|
32
|
+
|
|
33
|
+
**Description**
|
|
34
|
+
|
|
35
|
+
{What is wrong, what was expected, and what actually happened.}
|
|
36
|
+
|
|
37
|
+
**Repro Steps**
|
|
38
|
+
|
|
39
|
+
<!-- Each step has a screenshot. A reader should be able to follow along visually. -->
|
|
40
|
+
|
|
41
|
+
1. Navigate to {URL}
|
|
42
|
+

|
|
43
|
+
|
|
44
|
+
2. {Action -- e.g., click "Settings" in the sidebar}
|
|
45
|
+

|
|
46
|
+
|
|
47
|
+
3. {Action -- e.g., type "test" in the search field and press Enter}
|
|
48
|
+

|
|
49
|
+
|
|
50
|
+
4. **Observe:** {what goes wrong -- e.g., the page shows a blank white screen instead of search results}
|
|
51
|
+

|
|
52
|
+
|
|
53
|
+
---
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: electron
|
|
3
|
+
description: Automate Electron desktop apps (VS Code, Slack, Discord, Figma, Notion, Spotify, etc.) using agent-browser via Chrome DevTools Protocol. Use when the user needs to interact with an Electron app, automate a desktop app, connect to a running app, control a native app, or test an Electron application. Triggers include "automate Slack app", "control VS Code", "interact with Discord app", "test this Electron app", "connect to desktop app", or any task requiring automation of a native Electron application.
|
|
4
|
+
allowed-tools: Bash(agent-browser:*), Bash(npx agent-browser:*)
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Electron App Automation
|
|
8
|
+
|
|
9
|
+
Automate any Electron desktop app using agent-browser. Electron apps are built on Chromium and expose a Chrome DevTools Protocol (CDP) port that agent-browser can connect to, enabling the same snapshot-interact workflow used for web pages.
|
|
10
|
+
|
|
11
|
+
## Core Workflow
|
|
12
|
+
|
|
13
|
+
1. **Launch** the Electron app with remote debugging enabled
|
|
14
|
+
2. **Connect** agent-browser to the CDP port
|
|
15
|
+
3. **Snapshot** to discover interactive elements
|
|
16
|
+
4. **Interact** using element refs
|
|
17
|
+
5. **Re-snapshot** after navigation or state changes
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
# Launch an Electron app with remote debugging
|
|
21
|
+
open -a "Slack" --args --remote-debugging-port=9222
|
|
22
|
+
|
|
23
|
+
# Connect agent-browser to the app
|
|
24
|
+
agent-browser connect 9222
|
|
25
|
+
|
|
26
|
+
# Standard workflow from here
|
|
27
|
+
agent-browser snapshot -i
|
|
28
|
+
agent-browser click @e5
|
|
29
|
+
agent-browser screenshot slack-desktop.png
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Launching Electron Apps with CDP
|
|
33
|
+
|
|
34
|
+
Every Electron app supports the `--remote-debugging-port` flag since it's built into Chromium.
|
|
35
|
+
|
|
36
|
+
### macOS
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Slack
|
|
40
|
+
open -a "Slack" --args --remote-debugging-port=9222
|
|
41
|
+
|
|
42
|
+
# VS Code
|
|
43
|
+
open -a "Visual Studio Code" --args --remote-debugging-port=9223
|
|
44
|
+
|
|
45
|
+
# Discord
|
|
46
|
+
open -a "Discord" --args --remote-debugging-port=9224
|
|
47
|
+
|
|
48
|
+
# Figma
|
|
49
|
+
open -a "Figma" --args --remote-debugging-port=9225
|
|
50
|
+
|
|
51
|
+
# Notion
|
|
52
|
+
open -a "Notion" --args --remote-debugging-port=9226
|
|
53
|
+
|
|
54
|
+
# Spotify
|
|
55
|
+
open -a "Spotify" --args --remote-debugging-port=9227
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Linux
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
slack --remote-debugging-port=9222
|
|
62
|
+
code --remote-debugging-port=9223
|
|
63
|
+
discord --remote-debugging-port=9224
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Windows
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
"C:\Users\%USERNAME%\AppData\Local\slack\slack.exe" --remote-debugging-port=9222
|
|
70
|
+
"C:\Users\%USERNAME%\AppData\Local\Programs\Microsoft VS Code\Code.exe" --remote-debugging-port=9223
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
**Important:** If the app is already running, quit it first, then relaunch with the flag. The `--remote-debugging-port` flag must be present at launch time.
|
|
74
|
+
|
|
75
|
+
## Connecting
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
# Connect to a specific port
|
|
79
|
+
agent-browser connect 9222
|
|
80
|
+
|
|
81
|
+
# Or use --cdp on each command
|
|
82
|
+
agent-browser --cdp 9222 snapshot -i
|
|
83
|
+
|
|
84
|
+
# Auto-discover a running Chromium-based app
|
|
85
|
+
agent-browser --auto-connect snapshot -i
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
After `connect`, all subsequent commands target the connected app without needing `--cdp`.
|
|
89
|
+
|
|
90
|
+
## Tab Management
|
|
91
|
+
|
|
92
|
+
Electron apps often have multiple windows or webviews. Use tab commands to list and switch between them:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# List all available targets (windows, webviews, etc.)
|
|
96
|
+
agent-browser tab
|
|
97
|
+
|
|
98
|
+
# Switch to a specific tab by index
|
|
99
|
+
agent-browser tab 2
|
|
100
|
+
|
|
101
|
+
# Switch by URL pattern
|
|
102
|
+
agent-browser tab --url "*settings*"
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Webview Support
|
|
106
|
+
|
|
107
|
+
Electron `<webview>` elements are automatically discovered and can be controlled like regular pages. Webviews appear as separate targets in the tab list with `type: "webview"`:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Connect to running Electron app
|
|
111
|
+
agent-browser connect 9222
|
|
112
|
+
|
|
113
|
+
# List targets -- webviews appear alongside pages
|
|
114
|
+
agent-browser tab
|
|
115
|
+
# Example output:
|
|
116
|
+
# 0: [page] Slack - Main Window https://app.slack.com/
|
|
117
|
+
# 1: [webview] Embedded Content https://example.com/widget
|
|
118
|
+
|
|
119
|
+
# Switch to a webview
|
|
120
|
+
agent-browser tab 1
|
|
121
|
+
|
|
122
|
+
# Interact with the webview normally
|
|
123
|
+
agent-browser snapshot -i
|
|
124
|
+
agent-browser click @e3
|
|
125
|
+
agent-browser screenshot webview.png
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Note:** Webview support works via raw CDP connection.
|
|
129
|
+
|
|
130
|
+
## Common Patterns
|
|
131
|
+
|
|
132
|
+
### Inspect and Navigate an App
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
open -a "Slack" --args --remote-debugging-port=9222
|
|
136
|
+
sleep 3 # Wait for app to start
|
|
137
|
+
agent-browser connect 9222
|
|
138
|
+
agent-browser snapshot -i
|
|
139
|
+
# Read the snapshot output to identify UI elements
|
|
140
|
+
agent-browser click @e10 # Navigate to a section
|
|
141
|
+
agent-browser snapshot -i # Re-snapshot after navigation
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Take Screenshots of Desktop Apps
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
agent-browser connect 9222
|
|
148
|
+
agent-browser screenshot app-state.png
|
|
149
|
+
agent-browser screenshot --full full-app.png
|
|
150
|
+
agent-browser screenshot --annotate annotated-app.png
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Extract Data from a Desktop App
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
agent-browser connect 9222
|
|
157
|
+
agent-browser snapshot -i
|
|
158
|
+
agent-browser get text @e5
|
|
159
|
+
agent-browser snapshot --json > app-state.json
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
### Fill Forms in Desktop Apps
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
agent-browser connect 9222
|
|
166
|
+
agent-browser snapshot -i
|
|
167
|
+
agent-browser fill @e3 "search query"
|
|
168
|
+
agent-browser press Enter
|
|
169
|
+
agent-browser wait 1000
|
|
170
|
+
agent-browser snapshot -i
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### Run Multiple Apps Simultaneously
|
|
174
|
+
|
|
175
|
+
Use named sessions to control multiple Electron apps at the same time:
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
# Connect to Slack
|
|
179
|
+
agent-browser --session slack connect 9222
|
|
180
|
+
|
|
181
|
+
# Connect to VS Code
|
|
182
|
+
agent-browser --session vscode connect 9223
|
|
183
|
+
|
|
184
|
+
# Interact with each independently
|
|
185
|
+
agent-browser --session slack snapshot -i
|
|
186
|
+
agent-browser --session vscode snapshot -i
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Color Scheme
|
|
190
|
+
|
|
191
|
+
The default color scheme when connecting via CDP may be `light`. To preserve dark mode:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
agent-browser connect 9222
|
|
195
|
+
agent-browser --color-scheme dark snapshot -i
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Or set it globally:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
AGENT_BROWSER_COLOR_SCHEME=dark agent-browser connect 9222
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
## Troubleshooting
|
|
205
|
+
|
|
206
|
+
### "Connection refused" or "Cannot connect"
|
|
207
|
+
|
|
208
|
+
- Make sure the app was launched with `--remote-debugging-port=NNNN`
|
|
209
|
+
- If the app was already running, quit and relaunch with the flag
|
|
210
|
+
- Check that the port isn't in use by another process: `lsof -i :9222`
|
|
211
|
+
|
|
212
|
+
### App launches but connect fails
|
|
213
|
+
|
|
214
|
+
- Wait a few seconds after launch before connecting (`sleep 3`)
|
|
215
|
+
- Some apps take time to initialize their webview
|
|
216
|
+
|
|
217
|
+
### Elements not appearing in snapshot
|
|
218
|
+
|
|
219
|
+
- The app may use multiple webviews. Use `agent-browser tab` to list targets and switch to the right one
|
|
220
|
+
|
|
221
|
+
### Cannot type in input fields
|
|
222
|
+
|
|
223
|
+
- Try `agent-browser keyboard type "text"` to type at the current focus without a selector
|
|
224
|
+
- Some Electron apps use custom input components; use `agent-browser keyboard inserttext "text"` to bypass key events
|
|
225
|
+
|
|
226
|
+
## Supported Apps
|
|
227
|
+
|
|
228
|
+
Any app built on Electron works, including:
|
|
229
|
+
|
|
230
|
+
- **Communication:** Slack, Discord, Microsoft Teams, Signal, Telegram Desktop
|
|
231
|
+
- **Development:** VS Code, GitHub Desktop, Postman, Insomnia
|
|
232
|
+
- **Design:** Figma, Notion, Obsidian
|
|
233
|
+
- **Media:** Spotify, Tidal
|
|
234
|
+
- **Productivity:** Todoist, Linear, 1Password
|
|
235
|
+
|
|
236
|
+
If an app is built with Electron, it supports `--remote-debugging-port` and can be automated with agent-browser.
|