@openrig/cli 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/daemon/assets/guidance/openrig-start.md +16 -1
  2. package/daemon/dist/adapters/claude-code-adapter.d.ts +12 -0
  3. package/daemon/dist/adapters/claude-code-adapter.d.ts.map +1 -1
  4. package/daemon/dist/adapters/claude-code-adapter.js +92 -3
  5. package/daemon/dist/adapters/claude-code-adapter.js.map +1 -1
  6. package/daemon/dist/adapters/codex-runtime-adapter.d.ts +5 -0
  7. package/daemon/dist/adapters/codex-runtime-adapter.d.ts.map +1 -1
  8. package/daemon/dist/adapters/codex-runtime-adapter.js +82 -2
  9. package/daemon/dist/adapters/codex-runtime-adapter.js.map +1 -1
  10. package/daemon/dist/domain/agent-manifest.d.ts.map +1 -1
  11. package/daemon/dist/domain/agent-manifest.js +2 -1
  12. package/daemon/dist/domain/agent-manifest.js.map +1 -1
  13. package/daemon/dist/domain/native-resume-probe.d.ts.map +1 -1
  14. package/daemon/dist/domain/native-resume-probe.js +24 -1
  15. package/daemon/dist/domain/native-resume-probe.js.map +1 -1
  16. package/daemon/dist/domain/profile-resolver.js +1 -1
  17. package/daemon/dist/domain/profile-resolver.js.map +1 -1
  18. package/daemon/dist/domain/runtime-adapter.d.ts +1 -0
  19. package/daemon/dist/domain/runtime-adapter.d.ts.map +1 -1
  20. package/daemon/dist/domain/runtime-adapter.js.map +1 -1
  21. package/daemon/dist/domain/startup-orchestrator.d.ts.map +1 -1
  22. package/daemon/dist/domain/startup-orchestrator.js +10 -1
  23. package/daemon/dist/domain/startup-orchestrator.js.map +1 -1
  24. package/daemon/specs/agents/analyst/agent.yaml +10 -1
  25. package/daemon/specs/agents/design/agent.yaml +10 -1
  26. package/daemon/specs/agents/design/guidance/role.md +13 -0
  27. package/daemon/specs/agents/impl/agent.yaml +10 -1
  28. package/daemon/specs/agents/impl/guidance/role.md +20 -0
  29. package/daemon/specs/agents/lead/agent.yaml +10 -1
  30. package/daemon/specs/agents/lead/guidance/role.md +18 -0
  31. package/daemon/specs/agents/qa/agent.yaml +10 -1
  32. package/daemon/specs/agents/qa/guidance/role.md +52 -0
  33. package/daemon/specs/agents/reviewer/agent.yaml +10 -1
  34. package/daemon/specs/agents/reviewer/guidance/role.md +13 -0
  35. package/daemon/specs/agents/shared/agent.yaml +38 -0
  36. package/daemon/specs/agents/shared/skills/agent-browser/LOCAL-INSIGHTS.md +189 -0
  37. package/daemon/specs/agents/shared/skills/agent-browser/SKILL.md +417 -0
  38. package/daemon/specs/agents/shared/skills/brainstorming/SKILL.md +96 -0
  39. package/daemon/specs/agents/shared/skills/containerized-e2e/SKILL.md +256 -0
  40. package/daemon/specs/agents/shared/skills/containerized-e2e/scripts/Dockerfile +39 -0
  41. package/daemon/specs/agents/shared/skills/containerized-e2e/scripts/build-e2e-image.sh +37 -0
  42. package/daemon/specs/agents/shared/skills/containerized-e2e/templates/control-plane-test.yaml +40 -0
  43. package/daemon/specs/agents/shared/skills/containerized-e2e/templates/e2e-report-template.md +94 -0
  44. package/daemon/specs/agents/shared/skills/containerized-e2e/templates/expansion-collision-fragment.yaml +13 -0
  45. package/daemon/specs/agents/shared/skills/containerized-e2e/templates/expansion-pod-fragment.yaml +14 -0
  46. package/daemon/specs/agents/shared/skills/development-team/SKILL.md +149 -0
  47. package/daemon/specs/agents/shared/skills/dogfood/SKILL.md +220 -0
  48. package/daemon/specs/agents/shared/skills/dogfood/references/issue-taxonomy.md +109 -0
  49. package/daemon/specs/agents/shared/skills/dogfood/templates/dogfood-report-template.md +53 -0
  50. package/daemon/specs/agents/shared/skills/executing-plans/SKILL.md +84 -0
  51. package/daemon/specs/agents/shared/skills/frontend-design/LICENSE.txt +177 -0
  52. package/daemon/specs/agents/shared/skills/frontend-design/SKILL.md +42 -0
  53. package/daemon/specs/agents/shared/skills/openrig-user/SKILL.md +468 -0
  54. package/daemon/specs/agents/shared/skills/orchestration-team/SKILL.md +234 -0
  55. package/daemon/specs/agents/shared/skills/review-team/SKILL.md +210 -0
  56. package/daemon/specs/agents/shared/skills/systematic-debugging/CREATION-LOG.md +119 -0
  57. package/daemon/specs/agents/shared/skills/systematic-debugging/SKILL.md +296 -0
  58. package/daemon/specs/agents/shared/skills/systematic-debugging/condition-based-waiting-example.ts +158 -0
  59. package/daemon/specs/agents/shared/skills/systematic-debugging/condition-based-waiting.md +115 -0
  60. package/daemon/specs/agents/shared/skills/systematic-debugging/defense-in-depth.md +122 -0
  61. package/daemon/specs/agents/shared/skills/systematic-debugging/find-polluter.sh +63 -0
  62. package/daemon/specs/agents/shared/skills/systematic-debugging/root-cause-tracing.md +169 -0
  63. package/daemon/specs/agents/shared/skills/systematic-debugging/test-academic.md +14 -0
  64. package/daemon/specs/agents/shared/skills/systematic-debugging/test-pressure-1.md +58 -0
  65. package/daemon/specs/agents/shared/skills/systematic-debugging/test-pressure-2.md +68 -0
  66. package/daemon/specs/agents/shared/skills/systematic-debugging/test-pressure-3.md +69 -0
  67. package/daemon/specs/agents/shared/skills/test-driven-development/SKILL.md +371 -0
  68. package/daemon/specs/agents/shared/skills/test-driven-development/testing-anti-patterns.md +299 -0
  69. package/daemon/specs/agents/shared/skills/using-superpowers/SKILL.md +95 -0
  70. package/daemon/specs/agents/shared/skills/verification-before-completion/SKILL.md +139 -0
  71. package/daemon/specs/agents/shared/skills/writing-plans/SKILL.md +116 -0
  72. package/daemon/specs/agents/synthesizer/agent.yaml +10 -1
  73. package/daemon/specs/demo.CULTURE.md +92 -0
  74. package/daemon/specs/demo.yaml +91 -0
  75. package/daemon/specs/implementation-pair.yaml +3 -3
  76. package/daemon/specs/product-team.CULTURE.md +137 -0
  77. package/daemon/specs/product-team.yaml +5 -4
  78. package/dist/client.d.ts +8 -1
  79. package/dist/client.d.ts.map +1 -1
  80. package/dist/client.js +15 -6
  81. package/dist/client.js.map +1 -1
  82. package/dist/commands/daemon.d.ts.map +1 -1
  83. package/dist/commands/daemon.js +5 -1
  84. package/dist/commands/daemon.js.map +1 -1
  85. package/dist/commands/up.js +2 -2
  86. package/dist/commands/up.js.map +1 -1
  87. package/dist/daemon-lifecycle.d.ts.map +1 -1
  88. package/dist/daemon-lifecycle.js +54 -7
  89. package/dist/daemon-lifecycle.js.map +1 -1
  90. package/dist/fetch-with-timeout.d.ts +9 -0
  91. package/dist/fetch-with-timeout.d.ts.map +1 -0
  92. package/dist/fetch-with-timeout.js +41 -0
  93. package/dist/fetch-with-timeout.js.map +1 -0
  94. package/dist/index.d.ts.map +1 -1
  95. package/dist/index.js +2 -1
  96. package/dist/index.js.map +1 -1
  97. package/dist/mcp-server.d.ts.map +1 -1
  98. package/dist/mcp-server.js +2 -1
  99. package/dist/mcp-server.js.map +1 -1
  100. package/dist/version.d.ts +2 -0
  101. package/dist/version.d.ts.map +1 -0
  102. package/dist/version.js +8 -0
  103. package/dist/version.js.map +1 -0
  104. package/package.json +1 -1
@@ -0,0 +1,149 @@
1
+ ---
2
+ name: development-team
3
+ description: How the development pod coordinates implementation, QA, and design without skipping gates.
4
+ ---
5
+
6
+ # Development Team
7
+
8
+ You are part of the development pod. Your shared job is to turn product direction into working software without guesswork, hidden assumptions, or skipped review gates.
9
+
10
+ ## Startup sequence
11
+
12
+ Before the pod starts real implementation:
13
+ - load the packaged skills named in your role startup checklist
14
+ - run `rig whoami --json`
15
+ - confirm who is playing implementer, QA, and design in this run
16
+ - wait for the orchestrator's real assignment instead of freelancing off a partial guess
17
+
18
+ The development pod should feel like a real working pod, not three isolated agents improvising alone.
19
+
20
+ ## Pod shape
21
+
22
+ The development pod may include:
23
+ - an implementer who writes the change
24
+ - a QA partner who gates every edit
25
+ - a designer who clarifies product behavior and UX before implementation fills in the blanks
26
+
27
+ Some starters only launch the implementer and QA. Others also launch a designer. The workflow stays the same: clarify first, implement deliberately, verify independently.
28
+
29
+ ## Shared loop
30
+
31
+ This is the default loop for product work:
32
+
33
+ ```
34
+ 1. Clarify the work and the acceptance criteria
35
+ 2. Implementer sends a pre-edit proposal to QA
36
+ 3. QA approves or rejects with specifics
37
+ 4. Implementer changes code with TDD
38
+ 5. Implementer sends the diff and verification output back to QA
39
+ 6. QA approves or rejects with specifics
40
+ 7. If commit authority is enabled, the implementer may commit
41
+ 8. If commit authority is not enabled, stop at a QA-approved working tree and report that state clearly
42
+ ```
43
+
44
+ Skip no gates. If the task is ambiguous, resolve the ambiguity before editing.
45
+
46
+ ## What the implementer must hand QA
47
+
48
+ Pre-edit proposal should include:
49
+ - the files expected to change
50
+ - the behavior or acceptance criteria being targeted
51
+ - the first failing test or verification step
52
+ - any likely edge cases or invariants
53
+
54
+ Post-edit review bundle should include:
55
+ - what changed
56
+ - the actual verification commands run
57
+ - the result of those commands
58
+ - any remaining uncertainty or follow-up risk
59
+
60
+ QA should not have to reverse-engineer what the implementer thought they were doing.
61
+
62
+ ## Implementer
63
+
64
+ Before proposing:
65
+ - read the task fully
66
+ - inspect the relevant code before promising a solution
67
+ - name the files, tests, and acceptance criteria in the proposal
68
+
69
+ After QA rejection:
70
+ - read the exact feedback
71
+ - fix the issue instead of arguing around it
72
+ - resubmit with the changes called out explicitly
73
+
74
+ ## QA
75
+
76
+ QA is not a rubber stamp. QA is a product voice — not just a test gate.
77
+
78
+ When reviewing a proposal:
79
+ - reject if the scope is wrong
80
+ - check whether the planned tests actually prove the contract
81
+ - flag hidden risks and missing failure cases
82
+
83
+ When reviewing a diff:
84
+ - read the actual code, not just the summary
85
+ - verify independently when possible
86
+ - if you cannot verify independently, require real output in the review bundle and inspect it critically
87
+
88
+ If the implementer stalls on a permission or approval prompt, call that out immediately. Do not treat a blocked pane as finished implementation.
89
+
90
+ ### QA dogfood mode
91
+
92
+ When QA is dogfooding (testing existing features rather than gating new code), QA works solo with full autonomy:
93
+ - find issues AND fix them in a loop
94
+ - test the fix, then move to the next issue
95
+ - only escalate architecture-level concerns to the orchestrator
96
+ - do not wait for approval to fix obvious bugs during dogfood
97
+ - report findings to the chatroom so the rig has visibility
98
+
99
+ ### QA as a product voice
100
+
101
+ QA sees the product from the user's perspective. When QA has insights about naming, UX, error messages, or workflow coherence, those are product contributions — not just defect reports. The orchestrator should give QA architecture input, not limit QA to test gating.
102
+
103
+ ## Designer
104
+
105
+ When present, the designer should work ahead of implementation:
106
+ - turn vague goals into concrete flows, states, copy, and interaction choices
107
+ - surface edge cases before engineering has to guess
108
+ - review built results for coherence, not just visual polish
109
+
110
+ The designer is part of the development pod, not a decorative sidecar.
111
+
112
+ ## Browser testing and dogfood tools
113
+
114
+ The development pod has access to browser automation and structured dogfood testing tools:
115
+
116
+ - **`agent-browser`** — browser automation CLI. Navigate to the daemon UI, snapshot interactive elements, take annotated screenshots, record repro videos. Use `agent-browser open <url>`, `agent-browser snapshot -i`, `agent-browser screenshot --annotate`.
117
+ - **`dogfood`** — structured exploratory testing workflow. Produces a report with screenshots, repro videos, and step-by-step evidence for every finding.
118
+ - **`containerized-e2e`** — Docker-based clean-install testing. Simulates a fresh user environment.
119
+
120
+ QA typically drives browser and dogfood testing, but both impl and QA should know these tools exist and can use them. When dogfooding UI:
121
+ 1. Load `/agent-browser` and `/dogfood`
122
+ 2. Open the daemon UI: `agent-browser open http://127.0.0.1:7433`
123
+ 3. Systematically explore surfaces, take screenshots as proof
124
+ 4. Report findings using the PASS/FAIL/GAP format to the chatroom
125
+
126
+ ## When the pod is blocked
127
+
128
+ If the blocker is:
129
+ - ambiguity: pull in design or ask the orchestrator for clarification
130
+ - failing tests / unexpected behavior: use `systematic-debugging`
131
+ - code changes: use `test-driven-development`
132
+ - completion claims: use `verification-before-completion`
133
+
134
+ Do not hand-wave around blockers. Name them and route them.
135
+
136
+ ## Communication
137
+
138
+ - Pre-edit proposal: `rig send <qa-session> "PRE-EDIT: ..." --verify`
139
+ - Review bundle: `rig send <qa-session> "REVIEW BUNDLE: ..." --verify`
140
+ - Design clarification: `rig send <design-session> "Need product/design input on ..." --verify`
141
+
142
+ ## When blocked
143
+
144
+ If permissions block tests, file access, or commits:
145
+ 1. identify the exact blocked command
146
+ 2. tell the human what that prevents
147
+ 3. continue with the work you can still do
148
+
149
+ Do not silently stall. Do not pretend blocked verification is complete.
@@ -0,0 +1,220 @@
1
+ ---
2
+ name: dogfood
3
+ description: Systematically explore and test a web application to find bugs, UX issues, and other problems. Use when asked to "dogfood", "QA", "exploratory test", "find issues", "bug hunt", "test this app/site/platform", or review the quality of a web application. Produces a structured report with full reproduction evidence -- step-by-step screenshots, repro videos, and detailed repro steps for every issue -- so findings can be handed directly to the responsible teams.
4
+ allowed-tools: Bash(agent-browser:*), Bash(npx agent-browser:*)
5
+ ---
6
+
7
+ # Dogfood
8
+
9
+ Systematically explore a web application, find issues, and produce a report with full reproduction evidence for every finding.
10
+
11
+ ## Setup
12
+
13
+ Only the **Target URL** is required. Everything else has sensible defaults -- use them unless the user explicitly provides an override.
14
+
15
+ | Parameter | Default | Example override |
16
+ |-----------|---------|-----------------|
17
+ | **Target URL** | _(required)_ | `vercel.com`, `http://localhost:3000` |
18
+ | **Session name** | Slugified domain (e.g., `vercel.com` -> `vercel-com`) | `--session my-session` |
19
+ | **Output directory** | `./dogfood-output/` | `Output directory: /tmp/qa` |
20
+ | **Scope** | Full app | `Focus on the billing page` |
21
+ | **Authentication** | None | `Sign in to user@example.com` |
22
+
23
+ If the user says something like "dogfood vercel.com", start immediately with defaults. Do not ask clarifying questions unless authentication is mentioned but credentials are missing.
24
+
25
+ Always use `agent-browser` directly -- never `npx agent-browser`. The direct binary uses the fast Rust client. `npx` routes through Node.js and is significantly slower.
26
+
27
+ ## Workflow
28
+
29
+ ```
30
+ 1. Initialize Set up session, output dirs, report file
31
+ 2. Authenticate Sign in if needed, save state
32
+ 3. Orient Navigate to starting point, take initial snapshot
33
+ 4. Explore Systematically visit pages and test features
34
+ 5. Document Screenshot + record each issue as found
35
+ 6. Wrap up Update summary counts, close session
36
+ ```
37
+
38
+ ### 1. Initialize
39
+
40
+ ```bash
41
+ mkdir -p {OUTPUT_DIR}/screenshots {OUTPUT_DIR}/videos
42
+ ```
43
+
44
+ Copy the report template into the output directory and fill in the header fields:
45
+
46
+ ```bash
47
+ cp {SKILL_DIR}/templates/dogfood-report-template.md {OUTPUT_DIR}/report.md
48
+ ```
49
+
50
+ Start a named session:
51
+
52
+ ```bash
53
+ agent-browser --session {SESSION} open {TARGET_URL}
54
+ agent-browser --session {SESSION} wait --load networkidle
55
+ ```
56
+
57
+ ### 2. Authenticate
58
+
59
+ If the app requires login:
60
+
61
+ ```bash
62
+ agent-browser --session {SESSION} snapshot -i
63
+ # Identify login form refs, fill credentials
64
+ agent-browser --session {SESSION} fill @e1 "{EMAIL}"
65
+ agent-browser --session {SESSION} fill @e2 "{PASSWORD}"
66
+ agent-browser --session {SESSION} click @e3
67
+ agent-browser --session {SESSION} wait --load networkidle
68
+ ```
69
+
70
+ For OTP/email codes: ask the user, wait for their response, then enter the code.
71
+
72
+ After successful login, save state for potential reuse:
73
+
74
+ ```bash
75
+ agent-browser --session {SESSION} state save {OUTPUT_DIR}/auth-state.json
76
+ ```
77
+
78
+ ### 3. Orient
79
+
80
+ Take an initial annotated screenshot and snapshot to understand the app structure:
81
+
82
+ ```bash
83
+ agent-browser --session {SESSION} screenshot --annotate {OUTPUT_DIR}/screenshots/initial.png
84
+ agent-browser --session {SESSION} snapshot -i
85
+ ```
86
+
87
+ Identify the main navigation elements and map out the sections to visit.
88
+
89
+ ### 4. Explore
90
+
91
+ Read [references/issue-taxonomy.md](references/issue-taxonomy.md) for the full list of what to look for and the exploration checklist.
92
+
93
+ **Strategy -- work through the app systematically:**
94
+
95
+ - Start from the main navigation. Visit each top-level section.
96
+ - Within each section, test interactive elements: click buttons, fill forms, open dropdowns/modals.
97
+ - Check edge cases: empty states, error handling, boundary inputs.
98
+ - Try realistic end-to-end workflows (create, edit, delete flows).
99
+ - Check the browser console for errors periodically.
100
+
101
+ **At each page:**
102
+
103
+ ```bash
104
+ agent-browser --session {SESSION} snapshot -i
105
+ agent-browser --session {SESSION} screenshot --annotate {OUTPUT_DIR}/screenshots/{page-name}.png
106
+ agent-browser --session {SESSION} errors
107
+ agent-browser --session {SESSION} console
108
+ ```
109
+
110
+ Use your judgment on how deep to go. Spend more time on core features and less on peripheral pages. If you find a cluster of issues in one area, investigate deeper.
111
+
112
+ ### 5. Document Issues (Repro-First)
113
+
114
+ Steps 4 and 5 happen together -- explore and document in a single pass. When you find an issue, stop exploring and document it immediately before moving on. Do not explore the whole app first and document later.
115
+
116
+ Every issue must be reproducible. When you find something wrong, do not just note it -- prove it with evidence. The goal is that someone reading the report can see exactly what happened and replay it.
117
+
118
+ **Choose the right level of evidence for the issue:**
119
+
120
+ #### Interactive / behavioral issues (functional, ux, console errors on action)
121
+
122
+ These require user interaction to reproduce -- use full repro with video and step-by-step screenshots:
123
+
124
+ 1. **Start a repro video** _before_ reproducing:
125
+
126
+ ```bash
127
+ agent-browser --session {SESSION} record start {OUTPUT_DIR}/videos/issue-{NNN}-repro.webm
128
+ ```
129
+
130
+ 2. **Walk through the steps at human pace.** Pause 1-2 seconds between actions so the video is watchable. Take a screenshot at each step:
131
+
132
+ ```bash
133
+ agent-browser --session {SESSION} screenshot {OUTPUT_DIR}/screenshots/issue-{NNN}-step-1.png
134
+ sleep 1
135
+ # Perform action (click, fill, etc.)
136
+ sleep 1
137
+ agent-browser --session {SESSION} screenshot {OUTPUT_DIR}/screenshots/issue-{NNN}-step-2.png
138
+ sleep 1
139
+ # ...continue until the issue manifests
140
+ ```
141
+
142
+ 3. **Capture the broken state.** Pause so the viewer can see it, then take an annotated screenshot:
143
+
144
+ ```bash
145
+ sleep 2
146
+ agent-browser --session {SESSION} screenshot --annotate {OUTPUT_DIR}/screenshots/issue-{NNN}-result.png
147
+ ```
148
+
149
+ 4. **Stop the video:**
150
+
151
+ ```bash
152
+ agent-browser --session {SESSION} record stop
153
+ ```
154
+
155
+ 5. Write numbered repro steps in the report, each referencing its screenshot.
156
+
157
+ #### Static / visible-on-load issues (typos, placeholder text, clipped text, misalignment, console errors on load)
158
+
159
+ These are visible without interaction -- a single annotated screenshot is sufficient. No video, no multi-step repro:
160
+
161
+ ```bash
162
+ agent-browser --session {SESSION} screenshot --annotate {OUTPUT_DIR}/screenshots/issue-{NNN}.png
163
+ ```
164
+
165
+ Write a brief description and reference the screenshot in the report. Set **Repro Video** to `N/A`.
166
+
167
+ ---
168
+
169
+ **For all issues:**
170
+
171
+ 1. **Append to the report immediately.** Do not batch issues for later. Write each one as you find it so nothing is lost if the session is interrupted.
172
+
173
+ 2. **Increment the issue counter** (ISSUE-001, ISSUE-002, ...).
174
+
175
+ ### 6. Wrap Up
176
+
177
+ Aim to find **5-10 well-documented issues**, then wrap up. Depth of evidence matters more than total count -- 5 issues with full repro beats 20 with vague descriptions.
178
+
179
+ After exploring:
180
+
181
+ 1. Re-read the report and update the summary severity counts so they match the actual issues. Every `### ISSUE-` block must be reflected in the totals.
182
+ 2. Close the session:
183
+
184
+ ```bash
185
+ agent-browser --session {SESSION} close
186
+ ```
187
+
188
+ 3. Tell the user the report is ready and summarize findings: total issues, breakdown by severity, and the most critical items.
189
+
190
+ ## Guidance
191
+
192
+ - **Repro is everything.** Every issue needs proof -- but match the evidence to the issue. Interactive bugs need video and step-by-step screenshots. Static bugs (typos, placeholder text, visual glitches visible on load) only need a single annotated screenshot.
193
+ - **Verify reproducibility before collecting evidence.** Before recording video or taking screenshots, verify the issue is reproducible with at least one retry. If it can't be reproduced consistently, it's not a valid issue.
194
+ - **Don't record video for static issues.** A typo or clipped text doesn't benefit from a video. Save video for issues that involve user interaction, timing, or state changes.
195
+ - **For interactive issues, screenshot each step.** Capture the before, the action, and the after -- so someone can see the full sequence.
196
+ - **Write repro steps that map to screenshots.** Each numbered step in the report should reference its corresponding screenshot. A reader should be able to follow the steps visually without touching a browser.
197
+ - **Use the right snapshot command.**
198
+ - `snapshot -i` — for finding clickable/fillable elements (buttons, inputs, links)
199
+ - `snapshot` (no flag) — for reading page content (text, headings, data lists)
200
+ - **Be thorough but use judgment.** You are not following a test script -- you are exploring like a real user would. If something feels off, investigate.
201
+ - **Write findings incrementally.** Append each issue to the report as you discover it. If the session is interrupted, findings are preserved. Never batch all issues for the end.
202
+ - **Never delete output files.** Do not `rm` screenshots, videos, or the report mid-session. Do not close the session and restart. Work forward, not backward.
203
+ - **Never read the target app's source code.** You are testing as a user, not auditing code. Do not read HTML, JS, or config files of the app under test. All findings must come from what you observe in the browser.
204
+ - **Check the console.** Many issues are invisible in the UI but show up as JS errors or failed requests.
205
+ - **Test like a user, not a robot.** Try common workflows end-to-end. Click things a real user would click. Enter realistic data.
206
+ - **Type like a human.** When filling form fields during video recording, use `type` instead of `fill` -- it types character-by-character. Use `fill` only outside of video recording when speed matters.
207
+ - **Pace repro videos for humans.** Add `sleep 1` between actions and `sleep 2` before the final result screenshot. Videos should be watchable at 1x speed -- a human reviewing the report needs to see what happened, not a blur of instant state changes.
208
+ - **Be efficient with commands.** Batch multiple `agent-browser` commands in a single shell call when they are independent (e.g., `agent-browser ... screenshot ... && agent-browser ... console`). Use `agent-browser --session {SESSION} scroll down 300` for scrolling -- do not use `key` or `evaluate` to scroll.
209
+
210
+ ## References
211
+
212
+ | Reference | When to Read |
213
+ |-----------|--------------|
214
+ | [references/issue-taxonomy.md](references/issue-taxonomy.md) | Start of session -- calibrate what to look for, severity levels, exploration checklist |
215
+
216
+ ## Templates
217
+
218
+ | Template | Purpose |
219
+ |----------|---------|
220
+ | [templates/dogfood-report-template.md](templates/dogfood-report-template.md) | Copy into output directory as the report file |
@@ -0,0 +1,109 @@
1
+ # Issue Taxonomy
2
+
3
+ Reference for categorizing issues found during dogfooding. Read this at the start of a dogfood session to calibrate what to look for.
4
+
5
+ ## Contents
6
+
7
+ - [Severity Levels](#severity-levels)
8
+ - [Categories](#categories)
9
+ - [Exploration Checklist](#exploration-checklist)
10
+
11
+ ## Severity Levels
12
+
13
+ | Severity | Definition |
14
+ |----------|------------|
15
+ | **critical** | Blocks a core workflow, causes data loss, or crashes the app |
16
+ | **high** | Major feature broken or unusable, no workaround |
17
+ | **medium** | Feature works but with noticeable problems, workaround exists |
18
+ | **low** | Minor cosmetic or polish issue |
19
+
20
+ ## Categories
21
+
22
+ ### Visual / UI
23
+
24
+ - Layout broken or misaligned elements
25
+ - Overlapping or clipped text
26
+ - Inconsistent spacing, padding, or margins
27
+ - Missing or broken icons/images
28
+ - Dark mode / light mode rendering issues
29
+ - Responsive layout problems (viewport sizes)
30
+ - Z-index stacking issues (elements hidden behind others)
31
+ - Font rendering issues (wrong font, size, weight)
32
+ - Color contrast problems
33
+ - Animation glitches or jank
34
+
35
+ ### Functional
36
+
37
+ - Broken links (404, wrong destination)
38
+ - Buttons or controls that do nothing on click
39
+ - Form validation that rejects valid input or accepts invalid input
40
+ - Incorrect redirects
41
+ - Features that fail silently
42
+ - State not persisted when expected (lost on refresh, navigation)
43
+ - Race conditions (double-submit, stale data)
44
+ - Broken search or filtering
45
+ - Pagination issues
46
+ - File upload/download failures
47
+
48
+ ### UX
49
+
50
+ - Confusing or unclear navigation
51
+ - Missing loading indicators or feedback after actions
52
+ - Slow or unresponsive interactions (>300ms perceived delay)
53
+ - Unclear error messages
54
+ - Missing confirmation for destructive actions
55
+ - Dead ends (no way to go back or proceed)
56
+ - Inconsistent patterns across similar features
57
+ - Missing keyboard shortcuts or focus management
58
+ - Unintuitive defaults
59
+ - Missing empty states or unhelpful empty states
60
+
61
+ ### Content
62
+
63
+ - Typos or grammatical errors
64
+ - Outdated or incorrect text
65
+ - Placeholder or lorem ipsum content left in
66
+ - Truncated text without tooltip or expansion
67
+ - Missing or wrong labels
68
+ - Inconsistent terminology
69
+
70
+ ### Performance
71
+
72
+ - Slow page loads (>3s)
73
+ - Janky scrolling or animations
74
+ - Large layout shifts (content jumping)
75
+ - Excessive network requests (check via console/network)
76
+ - Memory leaks (page slows over time)
77
+ - Unoptimized images (large file sizes)
78
+
79
+ ### Console / Errors
80
+
81
+ - JavaScript exceptions in console
82
+ - Failed network requests (4xx, 5xx)
83
+ - Deprecation warnings
84
+ - CORS errors
85
+ - Mixed content warnings
86
+ - Unhandled promise rejections
87
+
88
+ ### Accessibility
89
+
90
+ - Missing alt text on images
91
+ - Unlabeled form inputs
92
+ - Poor keyboard navigation (can't tab to elements)
93
+ - Focus traps
94
+ - Insufficient color contrast
95
+ - Missing ARIA attributes on dynamic content
96
+ - Screen reader incompatible patterns
97
+
98
+ ## Exploration Checklist
99
+
100
+ Use this as a guide for what to test on each page/feature:
101
+
102
+ 1. **Visual scan** -- Take an annotated screenshot. Look for layout, alignment, and rendering issues.
103
+ 2. **Interactive elements** -- Click every button, link, and control. Do they work? Is there feedback?
104
+ 3. **Forms** -- Fill and submit. Test empty submission, invalid input, and edge cases.
105
+ 4. **Navigation** -- Follow all navigation paths. Check breadcrumbs, back button, deep links.
106
+ 5. **States** -- Check empty states, loading states, error states, and full/overflow states.
107
+ 6. **Console** -- Check for JS errors, failed requests, and warnings.
108
+ 7. **Responsiveness** -- If relevant, test at different viewport sizes.
109
+ 8. **Auth boundaries** -- Test what happens when not logged in, with different roles if applicable.
@@ -0,0 +1,53 @@
1
+ # Dogfood Report: {APP_NAME}
2
+
3
+ | Field | Value |
4
+ |-------|-------|
5
+ | **Date** | {DATE} |
6
+ | **App URL** | {URL} |
7
+ | **Session** | {SESSION_NAME} |
8
+ | **Scope** | {SCOPE} |
9
+
10
+ ## Summary
11
+
12
+ | Severity | Count |
13
+ |----------|-------|
14
+ | Critical | 0 |
15
+ | High | 0 |
16
+ | Medium | 0 |
17
+ | Low | 0 |
18
+ | **Total** | **0** |
19
+
20
+ ## Issues
21
+
22
+ <!-- Copy this block for each issue found. Interactive issues need video + step-by-step screenshots. Static issues (typos, visual glitches) only need a single screenshot -- set Repro Video to N/A. -->
23
+
24
+ ### ISSUE-001: {Short title}
25
+
26
+ | Field | Value |
27
+ |-------|-------|
28
+ | **Severity** | critical / high / medium / low |
29
+ | **Category** | visual / functional / ux / content / performance / console / accessibility |
30
+ | **URL** | {page URL where issue was found} |
31
+ | **Repro Video** | {path to video, or N/A for static issues} |
32
+
33
+ **Description**
34
+
35
+ {What is wrong, what was expected, and what actually happened.}
36
+
37
+ **Repro Steps**
38
+
39
+ <!-- Each step has a screenshot. A reader should be able to follow along visually. -->
40
+
41
+ 1. Navigate to {URL}
42
+ ![Step 1](screenshots/issue-001-step-1.png)
43
+
44
+ 2. {Action -- e.g., click "Settings" in the sidebar}
45
+ ![Step 2](screenshots/issue-001-step-2.png)
46
+
47
+ 3. {Action -- e.g., type "test" in the search field and press Enter}
48
+ ![Step 3](screenshots/issue-001-step-3.png)
49
+
50
+ 4. **Observe:** {what goes wrong -- e.g., the page shows a blank white screen instead of search results}
51
+ ![Result](screenshots/issue-001-result.png)
52
+
53
+ ---
@@ -0,0 +1,84 @@
1
+ ---
2
+ name: executing-plans
3
+ description: Use when you have a written implementation plan to execute in a separate session with review checkpoints
4
+ ---
5
+
6
+ # Executing Plans
7
+
8
+ ## Overview
9
+
10
+ Load plan, review critically, execute tasks in batches, report for review between batches.
11
+
12
+ **Core principle:** Batch execution with checkpoints for architect review.
13
+
14
+ **Announce at start:** "I'm using the executing-plans skill to implement this plan."
15
+
16
+ ## The Process
17
+
18
+ ### Step 1: Load and Review Plan
19
+ 1. Read plan file
20
+ 2. Review critically - identify any questions or concerns about the plan
21
+ 3. If concerns: Raise them with your human partner before starting
22
+ 4. If no concerns: Create TodoWrite and proceed
23
+
24
+ ### Step 2: Execute Batch
25
+ **Default: First 3 tasks**
26
+
27
+ For each task:
28
+ 1. Mark as in_progress
29
+ 2. Follow each step exactly (plan has bite-sized steps)
30
+ 3. Run verifications as specified
31
+ 4. Mark as completed
32
+
33
+ ### Step 3: Report
34
+ When batch complete:
35
+ - Show what was implemented
36
+ - Show verification output
37
+ - Say: "Ready for feedback."
38
+
39
+ ### Step 4: Continue
40
+ Based on feedback:
41
+ - Apply changes if needed
42
+ - Execute next batch
43
+ - Repeat until complete
44
+
45
+ ### Step 5: Complete Development
46
+
47
+ After all tasks complete and verified:
48
+ - Announce: "I'm using the finishing-a-development-branch skill to complete this work."
49
+ - **REQUIRED SUB-SKILL:** Use superpowers:finishing-a-development-branch
50
+ - Follow that skill to verify tests, present options, execute choice
51
+
52
+ ## When to Stop and Ask for Help
53
+
54
+ **STOP executing immediately when:**
55
+ - Hit a blocker mid-batch (missing dependency, test fails, instruction unclear)
56
+ - Plan has critical gaps preventing starting
57
+ - You don't understand an instruction
58
+ - Verification fails repeatedly
59
+
60
+ **Ask for clarification rather than guessing.**
61
+
62
+ ## When to Revisit Earlier Steps
63
+
64
+ **Return to Review (Step 1) when:**
65
+ - Partner updates the plan based on your feedback
66
+ - Fundamental approach needs rethinking
67
+
68
+ **Don't force through blockers** - stop and ask.
69
+
70
+ ## Remember
71
+ - Review plan critically first
72
+ - Follow plan steps exactly
73
+ - Don't skip verifications
74
+ - Reference skills when plan says to
75
+ - Between batches: just report and wait
76
+ - Stop when blocked, don't guess
77
+ - Never start implementation on main/master branch without explicit user consent
78
+
79
+ ## Integration
80
+
81
+ **Required workflow skills:**
82
+ - **superpowers:using-git-worktrees** - REQUIRED: Set up isolated workspace before starting
83
+ - **superpowers:writing-plans** - Creates the plan this skill executes
84
+ - **superpowers:finishing-a-development-branch** - Complete development after all tasks