@openrig/cli 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/daemon/assets/guidance/openrig-start.md +16 -1
  2. package/daemon/dist/adapters/claude-code-adapter.d.ts +12 -0
  3. package/daemon/dist/adapters/claude-code-adapter.d.ts.map +1 -1
  4. package/daemon/dist/adapters/claude-code-adapter.js +92 -3
  5. package/daemon/dist/adapters/claude-code-adapter.js.map +1 -1
  6. package/daemon/dist/adapters/codex-runtime-adapter.d.ts +5 -0
  7. package/daemon/dist/adapters/codex-runtime-adapter.d.ts.map +1 -1
  8. package/daemon/dist/adapters/codex-runtime-adapter.js +82 -2
  9. package/daemon/dist/adapters/codex-runtime-adapter.js.map +1 -1
  10. package/daemon/dist/domain/native-resume-probe.d.ts.map +1 -1
  11. package/daemon/dist/domain/native-resume-probe.js +24 -1
  12. package/daemon/dist/domain/native-resume-probe.js.map +1 -1
  13. package/daemon/dist/domain/runtime-adapter.d.ts +1 -0
  14. package/daemon/dist/domain/runtime-adapter.d.ts.map +1 -1
  15. package/daemon/dist/domain/runtime-adapter.js.map +1 -1
  16. package/daemon/dist/domain/spec-library-service.d.ts.map +1 -1
  17. package/daemon/dist/domain/spec-library-service.js +10 -0
  18. package/daemon/dist/domain/spec-library-service.js.map +1 -1
  19. package/daemon/dist/domain/startup-orchestrator.d.ts.map +1 -1
  20. package/daemon/dist/domain/startup-orchestrator.js +10 -1
  21. package/daemon/dist/domain/startup-orchestrator.js.map +1 -1
  22. package/daemon/specs/agents/design/{agent.yaml → product-designer/agent.yaml} +4 -3
  23. package/daemon/specs/agents/design/{guidance → product-designer/guidance}/role.md +13 -0
  24. package/daemon/specs/agents/{impl → development/implementer}/agent.yaml +4 -3
  25. package/daemon/specs/agents/development/implementer/guidance/role.md +47 -0
  26. package/daemon/specs/agents/{qa → development/qa}/agent.yaml +3 -2
  27. package/daemon/specs/agents/development/qa/guidance/role.md +78 -0
  28. package/daemon/specs/agents/{lead → orchestration/orchestrator}/agent.yaml +4 -3
  29. package/daemon/specs/agents/{lead → orchestration/orchestrator}/guidance/role.md +18 -0
  30. package/daemon/specs/agents/{analyst → research/analyst}/agent.yaml +2 -1
  31. package/daemon/specs/agents/{synthesizer → research/synthesizer}/agent.yaml +2 -1
  32. package/daemon/specs/agents/{reviewer → review/independent-reviewer}/agent.yaml +4 -3
  33. package/daemon/specs/agents/{reviewer → review/independent-reviewer}/guidance/role.md +13 -0
  34. package/daemon/specs/agents/shared/agent.yaml +29 -1
  35. package/daemon/specs/agents/shared/skills/core/openrig-user/SKILL.md +468 -0
  36. package/daemon/specs/agents/shared/skills/pods/development-team/SKILL.md +149 -0
  37. package/daemon/specs/agents/shared/skills/pods/orchestration-team/SKILL.md +234 -0
  38. package/daemon/specs/agents/shared/skills/pods/review-team/SKILL.md +210 -0
  39. package/daemon/specs/agents/shared/skills/process/agent-browser/LOCAL-INSIGHTS.md +189 -0
  40. package/daemon/specs/agents/shared/skills/process/agent-browser/SKILL.md +417 -0
  41. package/daemon/specs/agents/shared/skills/process/brainstorming/SKILL.md +96 -0
  42. package/daemon/specs/agents/shared/skills/process/containerized-e2e/SKILL.md +256 -0
  43. package/daemon/specs/agents/shared/skills/process/containerized-e2e/scripts/Dockerfile +39 -0
  44. package/daemon/specs/agents/shared/skills/process/containerized-e2e/scripts/build-e2e-image.sh +37 -0
  45. package/daemon/specs/agents/shared/skills/process/containerized-e2e/templates/control-plane-test.yaml +40 -0
  46. package/daemon/specs/agents/shared/skills/process/containerized-e2e/templates/e2e-report-template.md +94 -0
  47. package/daemon/specs/agents/shared/skills/process/containerized-e2e/templates/expansion-collision-fragment.yaml +13 -0
  48. package/daemon/specs/agents/shared/skills/process/containerized-e2e/templates/expansion-pod-fragment.yaml +14 -0
  49. package/daemon/specs/agents/shared/skills/process/dogfood/SKILL.md +220 -0
  50. package/daemon/specs/agents/shared/skills/process/dogfood/references/issue-taxonomy.md +109 -0
  51. package/daemon/specs/agents/shared/skills/process/dogfood/templates/dogfood-report-template.md +53 -0
  52. package/daemon/specs/agents/shared/skills/process/executing-plans/SKILL.md +84 -0
  53. package/daemon/specs/agents/shared/skills/process/frontend-design/LICENSE.txt +177 -0
  54. package/daemon/specs/agents/shared/skills/process/frontend-design/SKILL.md +42 -0
  55. package/daemon/specs/agents/shared/skills/process/systematic-debugging/CREATION-LOG.md +119 -0
  56. package/daemon/specs/agents/shared/skills/process/systematic-debugging/SKILL.md +296 -0
  57. package/daemon/specs/agents/shared/skills/process/systematic-debugging/condition-based-waiting-example.ts +158 -0
  58. package/daemon/specs/agents/shared/skills/process/systematic-debugging/condition-based-waiting.md +115 -0
  59. package/daemon/specs/agents/shared/skills/process/systematic-debugging/defense-in-depth.md +122 -0
  60. package/daemon/specs/agents/shared/skills/process/systematic-debugging/find-polluter.sh +63 -0
  61. package/daemon/specs/agents/shared/skills/process/systematic-debugging/root-cause-tracing.md +169 -0
  62. package/daemon/specs/agents/shared/skills/process/systematic-debugging/test-academic.md +14 -0
  63. package/daemon/specs/agents/shared/skills/process/systematic-debugging/test-pressure-1.md +58 -0
  64. package/daemon/specs/agents/shared/skills/process/systematic-debugging/test-pressure-2.md +68 -0
  65. package/daemon/specs/agents/shared/skills/process/systematic-debugging/test-pressure-3.md +69 -0
  66. package/daemon/specs/agents/shared/skills/process/test-driven-development/SKILL.md +371 -0
  67. package/daemon/specs/agents/shared/skills/process/test-driven-development/testing-anti-patterns.md +299 -0
  68. package/daemon/specs/agents/shared/skills/process/using-superpowers/SKILL.md +95 -0
  69. package/daemon/specs/agents/shared/skills/process/verification-before-completion/SKILL.md +139 -0
  70. package/daemon/specs/agents/shared/skills/process/writing-plans/SKILL.md +116 -0
  71. package/daemon/specs/{adversarial-review.yaml → rigs/focused/adversarial-review/rig.yaml} +3 -3
  72. package/daemon/specs/{research-team.yaml → rigs/focused/research-team/rig.yaml} +3 -3
  73. package/daemon/specs/rigs/launch/demo/CULTURE.md +92 -0
  74. package/daemon/specs/{product-team.yaml → rigs/launch/demo/rig.yaml} +13 -12
  75. package/daemon/specs/{implementation-pair.yaml → rigs/launch/implementation-pair/rig.yaml} +5 -5
  76. package/daemon/specs/rigs/preview/product-team/CULTURE.md +137 -0
  77. package/daemon/specs/rigs/preview/product-team/rig.yaml +91 -0
  78. package/dist/client.d.ts +17 -7
  79. package/dist/client.d.ts.map +1 -1
  80. package/dist/client.js +33 -23
  81. package/dist/client.js.map +1 -1
  82. package/dist/commands/bootstrap.d.ts.map +1 -1
  83. package/dist/commands/bootstrap.js +2 -1
  84. package/dist/commands/bootstrap.js.map +1 -1
  85. package/dist/commands/daemon.d.ts.map +1 -1
  86. package/dist/commands/daemon.js +5 -1
  87. package/dist/commands/daemon.js.map +1 -1
  88. package/dist/commands/up.d.ts.map +1 -1
  89. package/dist/commands/up.js +4 -3
  90. package/dist/commands/up.js.map +1 -1
  91. package/dist/daemon-lifecycle.d.ts.map +1 -1
  92. package/dist/daemon-lifecycle.js +54 -7
  93. package/dist/daemon-lifecycle.js.map +1 -1
  94. package/dist/fetch-with-timeout.d.ts +9 -0
  95. package/dist/fetch-with-timeout.d.ts.map +1 -0
  96. package/dist/fetch-with-timeout.js +41 -0
  97. package/dist/fetch-with-timeout.js.map +1 -0
  98. package/dist/mcp-server.d.ts.map +1 -1
  99. package/dist/mcp-server.js +2 -1
  100. package/dist/mcp-server.js.map +1 -1
  101. package/package.json +1 -1
  102. package/daemon/specs/agents/impl/guidance/role.md +0 -27
  103. package/daemon/specs/agents/qa/guidance/role.md +0 -26
  104. package/daemon/specs/agents/shared/skills/openrig-user/SKILL.md +0 -264
  105. /package/daemon/specs/agents/{analyst → research/analyst}/guidance/role.md +0 -0
  106. /package/daemon/specs/agents/{synthesizer → research/synthesizer}/guidance/role.md +0 -0
@@ -0,0 +1,417 @@
1
+ ---
2
+ name: agent-browser
3
+ description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", or any task requiring programmatic web interaction.
4
+ allowed-tools: Bash(npx agent-browser:*), Bash(agent-browser:*)
5
+ ---
6
+
7
+ # Browser Automation with agent-browser
8
+
9
+ ## Core Workflow
10
+
11
+ Every browser automation follows this pattern:
12
+
13
+ 1. **Navigate**: `agent-browser open <url>`
14
+ 2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
15
+ 3. **Interact**: Use refs to click, fill, select
16
+ 4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
17
+
18
+ ```bash
19
+ agent-browser open https://example.com/form
20
+ agent-browser snapshot -i
21
+ # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
22
+
23
+ agent-browser fill @e1 "user@example.com"
24
+ agent-browser fill @e2 "password123"
25
+ agent-browser click @e3
26
+ agent-browser wait --load networkidle
27
+ agent-browser snapshot -i # Check result
28
+ ```
29
+
30
+ ## Command Chaining
31
+
32
+ Commands can be chained with `&&` in a single shell invocation. The browser persists between commands via a background daemon, so chaining is safe and more efficient than separate calls.
33
+
34
+ ```bash
35
+ # Chain open + wait + snapshot in one call
36
+ agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser snapshot -i
37
+
38
+ # Chain multiple interactions
39
+ agent-browser fill @e1 "user@example.com" && agent-browser fill @e2 "password123" && agent-browser click @e3
40
+
41
+ # Navigate and capture
42
+ agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser screenshot page.png
43
+ ```
44
+
45
+ **When to chain:** Use `&&` when you don't need to read the output of an intermediate command before proceeding (e.g., open + wait + screenshot). Run commands separately when you need to parse the output first (e.g., snapshot to discover refs, then interact using those refs).
46
+
47
+ ## Essential Commands
48
+
49
+ ```bash
50
+ # Navigation
51
+ agent-browser open <url> # Navigate (aliases: goto, navigate)
52
+ agent-browser close # Close browser
53
+
54
+ # Snapshot
55
+ agent-browser snapshot -i # Interactive elements with refs (recommended)
56
+ agent-browser snapshot -i -C # Include cursor-interactive elements (divs with onclick, cursor:pointer)
57
+ agent-browser snapshot -s "#selector" # Scope to CSS selector
58
+
59
+ # Interaction (use @refs from snapshot)
60
+ agent-browser click @e1 # Click element
61
+ agent-browser click @e1 --new-tab # Click and open in new tab
62
+ agent-browser fill @e2 "text" # Clear and type text
63
+ agent-browser type @e2 "text" # Type without clearing
64
+ agent-browser select @e1 "option" # Select dropdown option
65
+ agent-browser check @e1 # Check checkbox
66
+ agent-browser press Enter # Press key
67
+ agent-browser scroll down 500 # Scroll page
68
+
69
+ # Get information
70
+ agent-browser get text @e1 # Get element text
71
+ agent-browser get url # Get current URL
72
+ agent-browser get title # Get page title
73
+
74
+ # Wait
75
+ agent-browser wait @e1 # Wait for element
76
+ agent-browser wait --load networkidle # Wait for network idle
77
+ agent-browser wait --url "**/page" # Wait for URL pattern
78
+ agent-browser wait 2000 # Wait milliseconds
79
+
80
+ # Capture
81
+ agent-browser screenshot # Screenshot to temp dir
82
+ agent-browser screenshot --full # Full page screenshot
83
+ agent-browser screenshot --annotate # Annotated screenshot with numbered element labels
84
+ agent-browser pdf output.pdf # Save as PDF
85
+
86
+ # Diff (compare page states)
87
+ agent-browser diff snapshot # Compare current vs last snapshot
88
+ agent-browser diff snapshot --baseline before.txt # Compare current vs saved file
89
+ agent-browser diff screenshot --baseline before.png # Visual pixel diff
90
+ agent-browser diff url <url1> <url2> # Compare two pages
91
+ agent-browser diff url <url1> <url2> --wait-until networkidle # Custom wait strategy
92
+ agent-browser diff url <url1> <url2> --selector "#main" # Scope to element
93
+ ```
94
+
95
+ ## Common Patterns
96
+
97
+ ### Form Submission
98
+
99
+ ```bash
100
+ agent-browser open https://example.com/signup
101
+ agent-browser snapshot -i
102
+ agent-browser fill @e1 "Jane Doe"
103
+ agent-browser fill @e2 "jane@example.com"
104
+ agent-browser select @e3 "California"
105
+ agent-browser check @e4
106
+ agent-browser click @e5
107
+ agent-browser wait --load networkidle
108
+ ```
109
+
110
+ ### Authentication with State Persistence
111
+
112
+ ```bash
113
+ # Login once and save state
114
+ agent-browser open https://app.example.com/login
115
+ agent-browser snapshot -i
116
+ agent-browser fill @e1 "$USERNAME"
117
+ agent-browser fill @e2 "$PASSWORD"
118
+ agent-browser click @e3
119
+ agent-browser wait --url "**/dashboard"
120
+ agent-browser state save auth.json
121
+
122
+ # Reuse in future sessions
123
+ agent-browser state load auth.json
124
+ agent-browser open https://app.example.com/dashboard
125
+ ```
126
+
127
+ ### Session Persistence
128
+
129
+ ```bash
130
+ # Auto-save/restore cookies and localStorage across browser restarts
131
+ agent-browser --session-name myapp open https://app.example.com/login
132
+ # ... login flow ...
133
+ agent-browser close # State auto-saved to ~/.agent-browser/sessions/
134
+
135
+ # Next time, state is auto-loaded
136
+ agent-browser --session-name myapp open https://app.example.com/dashboard
137
+
138
+ # Encrypt state at rest
139
+ export AGENT_BROWSER_ENCRYPTION_KEY=$(openssl rand -hex 32)
140
+ agent-browser --session-name secure open https://app.example.com
141
+
142
+ # Manage saved states
143
+ agent-browser state list
144
+ agent-browser state show myapp-default.json
145
+ agent-browser state clear myapp
146
+ agent-browser state clean --older-than 7
147
+ ```
148
+
149
+ ### Data Extraction
150
+
151
+ ```bash
152
+ agent-browser open https://example.com/products
153
+ agent-browser snapshot -i
154
+ agent-browser get text @e5 # Get specific element text
155
+ agent-browser get text body > page.txt # Get all page text
156
+
157
+ # JSON output for parsing
158
+ agent-browser snapshot -i --json
159
+ agent-browser get text @e1 --json
160
+ ```
161
+
162
+ ### Parallel Sessions
163
+
164
+ ```bash
165
+ agent-browser --session site1 open https://site-a.com
166
+ agent-browser --session site2 open https://site-b.com
167
+
168
+ agent-browser --session site1 snapshot -i
169
+ agent-browser --session site2 snapshot -i
170
+
171
+ agent-browser session list
172
+ ```
173
+
174
+ ### Connect to Existing Chrome
175
+
176
+ ```bash
177
+ # Auto-discover running Chrome with remote debugging enabled
178
+ agent-browser --auto-connect open https://example.com
179
+ agent-browser --auto-connect snapshot
180
+
181
+ # Or with explicit CDP port
182
+ agent-browser --cdp 9222 snapshot
183
+ ```
184
+
185
+ ### Visual Browser (Debugging)
186
+
187
+ ```bash
188
+ agent-browser --headed open https://example.com
189
+ agent-browser highlight @e1 # Highlight element
190
+ agent-browser record start demo.webm # Record session
191
+ agent-browser profiler start # Start Chrome DevTools profiling
192
+ agent-browser profiler stop trace.json # Stop and save profile (path optional)
193
+ ```
194
+
195
+ ### Local Files (PDFs, HTML)
196
+
197
+ ```bash
198
+ # Open local files with file:// URLs
199
+ agent-browser --allow-file-access open file:///path/to/document.pdf
200
+ agent-browser --allow-file-access open file:///path/to/page.html
201
+ agent-browser screenshot output.png
202
+ ```
203
+
204
+ ### iOS Simulator (Mobile Safari)
205
+
206
+ ```bash
207
+ # List available iOS simulators
208
+ agent-browser device list
209
+
210
+ # Launch Safari on a specific device
211
+ agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
212
+
213
+ # Same workflow as desktop - snapshot, interact, re-snapshot
214
+ agent-browser -p ios snapshot -i
215
+ agent-browser -p ios tap @e1 # Tap (alias for click)
216
+ agent-browser -p ios fill @e2 "text"
217
+ agent-browser -p ios swipe up # Mobile-specific gesture
218
+
219
+ # Take screenshot
220
+ agent-browser -p ios screenshot mobile.png
221
+
222
+ # Close session (shuts down simulator)
223
+ agent-browser -p ios close
224
+ ```
225
+
226
+ **Requirements:** macOS with Xcode, Appium (`npm install -g appium && appium driver install xcuitest`)
227
+
228
+ **Real devices:** Works with physical iOS devices if pre-configured. Use `--device "<UDID>"` where UDID is from `xcrun xctrace list devices`.
229
+
230
+ ## Diffing (Verifying Changes)
231
+
232
+ Use `diff snapshot` after performing an action to verify it had the intended effect. This compares the current accessibility tree against the last snapshot taken in the session.
233
+
234
+ ```bash
235
+ # Typical workflow: snapshot -> action -> diff
236
+ agent-browser snapshot -i # Take baseline snapshot
237
+ agent-browser click @e2 # Perform action
238
+ agent-browser diff snapshot # See what changed (auto-compares to last snapshot)
239
+ ```
240
+
241
+ For visual regression testing or monitoring:
242
+
243
+ ```bash
244
+ # Save a baseline screenshot, then compare later
245
+ agent-browser screenshot baseline.png
246
+ # ... time passes or changes are made ...
247
+ agent-browser diff screenshot --baseline baseline.png
248
+
249
+ # Compare staging vs production
250
+ agent-browser diff url https://staging.example.com https://prod.example.com --screenshot
251
+ ```
252
+
253
+ `diff snapshot` output uses `+` for additions and `-` for removals, similar to git diff. `diff screenshot` produces a diff image with changed pixels highlighted in red, plus a mismatch percentage.
254
+
255
+ ## Timeouts and Slow Pages
256
+
257
+ The default Playwright timeout is 60 seconds for local browsers. For slow websites or large pages, use explicit waits instead of relying on the default timeout:
258
+
259
+ ```bash
260
+ # Wait for network activity to settle (best for slow pages)
261
+ agent-browser wait --load networkidle
262
+
263
+ # Wait for a specific element to appear
264
+ agent-browser wait "#content"
265
+ agent-browser wait @e1
266
+
267
+ # Wait for a specific URL pattern (useful after redirects)
268
+ agent-browser wait --url "**/dashboard"
269
+
270
+ # Wait for a JavaScript condition
271
+ agent-browser wait --fn "document.readyState === 'complete'"
272
+
273
+ # Wait a fixed duration (milliseconds) as a last resort
274
+ agent-browser wait 5000
275
+ ```
276
+
277
+ When dealing with consistently slow websites, use `wait --load networkidle` after `open` to ensure the page is fully loaded before taking a snapshot. If a specific element is slow to render, wait for it directly with `wait <selector>` or `wait @ref`.
278
+
279
+ ## Session Management and Cleanup
280
+
281
+ When running multiple agents or automations concurrently, always use named sessions to avoid conflicts:
282
+
283
+ ```bash
284
+ # Each agent gets its own isolated session
285
+ agent-browser --session agent1 open site-a.com
286
+ agent-browser --session agent2 open site-b.com
287
+
288
+ # Check active sessions
289
+ agent-browser session list
290
+ ```
291
+
292
+ Always close your browser session when done to avoid leaked processes:
293
+
294
+ ```bash
295
+ agent-browser close # Close default session
296
+ agent-browser --session agent1 close # Close specific session
297
+ ```
298
+
299
+ If a previous session was not closed properly, the daemon may still be running. Use `agent-browser close` to clean it up before starting new work.
300
+
301
+ ## Ref Lifecycle (Important)
302
+
303
+ Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after:
304
+
305
+ - Clicking links or buttons that navigate
306
+ - Form submissions
307
+ - Dynamic content loading (dropdowns, modals)
308
+
309
+ ```bash
310
+ agent-browser click @e5 # Navigates to new page
311
+ agent-browser snapshot -i # MUST re-snapshot
312
+ agent-browser click @e1 # Use new refs
313
+ ```
314
+
315
+ ## Annotated Screenshots (Vision Mode)
316
+
317
+ Use `--annotate` to take a screenshot with numbered labels overlaid on interactive elements. Each label `[N]` maps to ref `@eN`. This also caches refs, so you can interact with elements immediately without a separate snapshot.
318
+
319
+ ```bash
320
+ agent-browser screenshot --annotate
321
+ # Output includes the image path and a legend:
322
+ # [1] @e1 button "Submit"
323
+ # [2] @e2 link "Home"
324
+ # [3] @e3 textbox "Email"
325
+ agent-browser click @e2 # Click using ref from annotated screenshot
326
+ ```
327
+
328
+ Use annotated screenshots when:
329
+ - The page has unlabeled icon buttons or visual-only elements
330
+ - You need to verify visual layout or styling
331
+ - Canvas or chart elements are present (invisible to text snapshots)
332
+ - You need spatial reasoning about element positions
333
+
334
+ ## Semantic Locators (Alternative to Refs)
335
+
336
+ When refs are unavailable or unreliable, use semantic locators:
337
+
338
+ ```bash
339
+ agent-browser find text "Sign In" click
340
+ agent-browser find label "Email" fill "user@test.com"
341
+ agent-browser find role button click --name "Submit"
342
+ agent-browser find placeholder "Search" type "query"
343
+ agent-browser find testid "submit-btn" click
344
+ ```
345
+
346
+ ## JavaScript Evaluation (eval)
347
+
348
+ Use `eval` to run JavaScript in the browser context. **Shell quoting can corrupt complex expressions** -- use `--stdin` or `-b` to avoid issues.
349
+
350
+ ```bash
351
+ # Simple expressions work with regular quoting
352
+ agent-browser eval 'document.title'
353
+ agent-browser eval 'document.querySelectorAll("img").length'
354
+
355
+ # Complex JS: use --stdin with heredoc (RECOMMENDED)
356
+ agent-browser eval --stdin <<'EVALEOF'
357
+ JSON.stringify(
358
+ Array.from(document.querySelectorAll("img"))
359
+ .filter(i => !i.alt)
360
+ .map(i => ({ src: i.src.split("/").pop(), width: i.width }))
361
+ )
362
+ EVALEOF
363
+
364
+ # Alternative: base64 encoding (avoids all shell escaping issues)
365
+ agent-browser eval -b "$(echo -n 'Array.from(document.querySelectorAll("a")).map(a => a.href)' | base64)"
366
+ ```
367
+
368
+ **Why this matters:** When the shell processes your command, inner double quotes, `!` characters (history expansion), backticks, and `$()` can all corrupt the JavaScript before it reaches agent-browser. The `--stdin` and `-b` flags bypass shell interpretation entirely.
369
+
370
+ **Rules of thumb:**
371
+ - Single-line, no nested quotes -> regular `eval 'expression'` with single quotes is fine
372
+ - Nested quotes, arrow functions, template literals, or multiline -> use `eval --stdin <<'EVALEOF'`
373
+ - Programmatic/generated scripts -> use `eval -b` with base64
374
+
375
+ ## Configuration File
376
+
377
+ Create `agent-browser.json` in the project root for persistent settings:
378
+
379
+ ```json
380
+ {
381
+ "headed": true,
382
+ "proxy": "http://localhost:8080",
383
+ "profile": "./browser-data"
384
+ }
385
+ ```
386
+
387
+ Priority (lowest to highest): `~/.agent-browser/config.json` < `./agent-browser.json` < env vars < CLI flags. Use `--config <path>` or `AGENT_BROWSER_CONFIG` env var for a custom config file (exits with error if missing/invalid). All CLI options map to camelCase keys (e.g., `--executable-path` -> `"executablePath"`). Boolean flags accept `true`/`false` values (e.g., `--headed false` overrides config). Extensions from user and project configs are merged, not replaced.
388
+
389
+ ## Deep-Dive Documentation
390
+
391
+ | Reference | When to Use |
392
+ |-----------|-------------|
393
+ | [references/commands.md](references/commands.md) | Full command reference with all options |
394
+ | [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting |
395
+ | [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
396
+ | [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
397
+ | [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation |
398
+ | [references/profiling.md](references/profiling.md) | Chrome DevTools profiling for performance analysis |
399
+ | [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies |
400
+
401
+ ## Ready-to-Use Templates
402
+
403
+ | Template | Description |
404
+ |----------|-------------|
405
+ | [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation |
406
+ | [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state |
407
+ | [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots |
408
+
409
+ ```bash
410
+ ./templates/form-automation.sh https://example.com/form
411
+ ./templates/authenticated-session.sh https://app.example.com/login
412
+ ./templates/capture-workflow.sh https://example.com ./output
413
+ ```
414
+
415
+ ## Local Dev Insights
416
+
417
+ **IMPORTANT:** Read `LOCAL-INSIGHTS.md` in this skill directory for gotchas, corrections, and tested workflows discovered through hands-on use that this upstream skill doesn't cover.
@@ -0,0 +1,96 @@
1
+ ---
2
+ name: brainstorming
3
+ description: "You MUST use this before any creative work - creating features, building components, adding functionality, or modifying behavior. Explores user intent, requirements and design before implementation."
4
+ ---
5
+
6
+ # Brainstorming Ideas Into Designs
7
+
8
+ ## Overview
9
+
10
+ Help turn ideas into fully formed designs and specs through natural collaborative dialogue.
11
+
12
+ Start by understanding the current project context, then ask questions one at a time to refine the idea. Once you understand what you're building, present the design and get user approval.
13
+
14
+ <HARD-GATE>
15
+ Do NOT invoke any implementation skill, write any code, scaffold any project, or take any implementation action until you have presented a design and the user has approved it. This applies to EVERY project regardless of perceived simplicity.
16
+ </HARD-GATE>
17
+
18
+ ## Anti-Pattern: "This Is Too Simple To Need A Design"
19
+
20
+ Every project goes through this process. A todo list, a single-function utility, a config change — all of them. "Simple" projects are where unexamined assumptions cause the most wasted work. The design can be short (a few sentences for truly simple projects), but you MUST present it and get approval.
21
+
22
+ ## Checklist
23
+
24
+ You MUST create a task for each of these items and complete them in order:
25
+
26
+ 1. **Explore project context** — check files, docs, recent commits
27
+ 2. **Ask clarifying questions** — one at a time, understand purpose/constraints/success criteria
28
+ 3. **Propose 2-3 approaches** — with trade-offs and your recommendation
29
+ 4. **Present design** — in sections scaled to their complexity, get user approval after each section
30
+ 5. **Write design doc** — save to `docs/plans/YYYY-MM-DD-<topic>-design.md` and commit
31
+ 6. **Transition to implementation** — invoke writing-plans skill to create implementation plan
32
+
33
+ ## Process Flow
34
+
35
+ ```dot
36
+ digraph brainstorming {
37
+ "Explore project context" [shape=box];
38
+ "Ask clarifying questions" [shape=box];
39
+ "Propose 2-3 approaches" [shape=box];
40
+ "Present design sections" [shape=box];
41
+ "User approves design?" [shape=diamond];
42
+ "Write design doc" [shape=box];
43
+ "Invoke writing-plans skill" [shape=doublecircle];
44
+
45
+ "Explore project context" -> "Ask clarifying questions";
46
+ "Ask clarifying questions" -> "Propose 2-3 approaches";
47
+ "Propose 2-3 approaches" -> "Present design sections";
48
+ "Present design sections" -> "User approves design?";
49
+ "User approves design?" -> "Present design sections" [label="no, revise"];
50
+ "User approves design?" -> "Write design doc" [label="yes"];
51
+ "Write design doc" -> "Invoke writing-plans skill";
52
+ }
53
+ ```
54
+
55
+ **The terminal state is invoking writing-plans.** Do NOT invoke frontend-design, mcp-builder, or any other implementation skill. The ONLY skill you invoke after brainstorming is writing-plans.
56
+
57
+ ## The Process
58
+
59
+ **Understanding the idea:**
60
+ - Check out the current project state first (files, docs, recent commits)
61
+ - Ask questions one at a time to refine the idea
62
+ - Prefer multiple choice questions when possible, but open-ended is fine too
63
+ - Only one question per message - if a topic needs more exploration, break it into multiple questions
64
+ - Focus on understanding: purpose, constraints, success criteria
65
+
66
+ **Exploring approaches:**
67
+ - Propose 2-3 different approaches with trade-offs
68
+ - Present options conversationally with your recommendation and reasoning
69
+ - Lead with your recommended option and explain why
70
+
71
+ **Presenting the design:**
72
+ - Once you believe you understand what you're building, present the design
73
+ - Scale each section to its complexity: a few sentences if straightforward, up to 200-300 words if nuanced
74
+ - Ask after each section whether it looks right so far
75
+ - Cover: architecture, components, data flow, error handling, testing
76
+ - Be ready to go back and clarify if something doesn't make sense
77
+
78
+ ## After the Design
79
+
80
+ **Documentation:**
81
+ - Write the validated design to `docs/plans/YYYY-MM-DD-<topic>-design.md`
82
+ - Use elements-of-style:writing-clearly-and-concisely skill if available
83
+ - Commit the design document to git
84
+
85
+ **Implementation:**
86
+ - Invoke the writing-plans skill to create a detailed implementation plan
87
+ - Do NOT invoke any other skill. writing-plans is the next step.
88
+
89
+ ## Key Principles
90
+
91
+ - **One question at a time** - Don't overwhelm with multiple questions
92
+ - **Multiple choice preferred** - Easier to answer than open-ended when possible
93
+ - **YAGNI ruthlessly** - Remove unnecessary features from all designs
94
+ - **Explore alternatives** - Always propose 2-3 approaches before settling
95
+ - **Incremental validation** - Present design, get approval before moving on
96
+ - **Be flexible** - Go back and clarify when something doesn't make sense