browserctl 0.3.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +47 -0
  3. data/README.md +120 -214
  4. data/bin/browserctl +35 -13
  5. data/bin/browserd +7 -1
  6. data/bin/setup +7 -3
  7. data/examples/cloudflare_hitl.rb +1 -1
  8. data/examples/smoke/params_file.rb +35 -0
  9. data/examples/smoke/store_fetch.rb +39 -0
  10. data/examples/the_internet/add_remove_elements.rb +3 -3
  11. data/examples/the_internet/checkboxes.rb +3 -3
  12. data/examples/the_internet/dropdown.rb +3 -3
  13. data/examples/the_internet/dynamic_loading.rb +3 -3
  14. data/examples/the_internet/login.rb +5 -5
  15. data/lib/browserctl/client.rb +38 -2
  16. data/lib/browserctl/commands/export_cookies.rb +18 -0
  17. data/lib/browserctl/commands/import_cookies.rb +23 -0
  18. data/lib/browserctl/commands/init.rb +11 -0
  19. data/lib/browserctl/commands/{pause_resume.rb → pause.rb} +2 -12
  20. data/lib/browserctl/commands/record.rb +2 -0
  21. data/lib/browserctl/commands/resume.rb +21 -0
  22. data/lib/browserctl/commands/snapshot.rb +5 -5
  23. data/lib/browserctl/commands/status.rb +30 -0
  24. data/lib/browserctl/constants.rb +9 -2
  25. data/lib/browserctl/detectors.rb +23 -0
  26. data/lib/browserctl/errors.rb +25 -0
  27. data/lib/browserctl/logger.rb +40 -5
  28. data/lib/browserctl/policy.rb +36 -0
  29. data/lib/browserctl/recording.rb +81 -15
  30. data/lib/browserctl/runner.rb +23 -4
  31. data/lib/browserctl/server/command_dispatcher.rb +31 -234
  32. data/lib/browserctl/server/handlers/cookies.rb +57 -0
  33. data/lib/browserctl/server/handlers/daemon_control.rb +29 -0
  34. data/lib/browserctl/server/handlers/devtools.rb +22 -0
  35. data/lib/browserctl/server/handlers/hitl.rb +30 -0
  36. data/lib/browserctl/server/handlers/navigation.rb +72 -0
  37. data/lib/browserctl/server/handlers/observation.rb +113 -0
  38. data/lib/browserctl/server/handlers/page_lifecycle.rb +29 -0
  39. data/lib/browserctl/server.rb +18 -2
  40. data/lib/browserctl/version.rb +1 -1
  41. data/lib/browserctl/workflow.rb +41 -3
  42. data/lib/browserctl.rb +12 -2
  43. metadata +48 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1626673b3046c133aa7e1d63ee7d1886ed3071970227c184ab412a8f3cf48a4f
4
- data.tar.gz: 4773002a9052247ec8afe259abd0d4c38cc3aa308be81bbeca756626bb206d44
3
+ metadata.gz: ec75744264ce56f8c3f94ab95518a106c8225ec1dc11dd35a97f43186b590502
4
+ data.tar.gz: 973c3b270b5d1bba3dfa900623790e3c5cb9d5bd8ebe8faa50c28d09e48fbfff
5
5
  SHA512:
6
- metadata.gz: 8e241d9fc064c419285b2e83b5e5154c6ccb639aeffb2179ace9b6ef3ec180069b76772cf5f6485af3cea4ef4616265a87dbbc43d7879cd0a71eec946e022ce8
7
- data.tar.gz: b396184b74f6804e3b84105c4b75a89ef5a63751ad5577e2dc8008ed1c3111614f2d579ac9c3a12f7943968b3c4cab59fab69dfe392f4af51ba59fff0e8684ba
6
+ metadata.gz: 16cd84c58a070de0b9d340ed2bb4e3a951216526dd77a9f18e0f9ea7292311eb51eddf1989c93c11d488cf4bf29163a60a13510f5949599d6027142cf808c1a0
7
+ data.tar.gz: 615213ac9ba1e694c9fb4597a348abebab9d8f2ba04801dd7d59c1dc00829111f0cce9f49391a7883e7502422ed0b246e86d8c553c62b9be406e0f1e23946e8c
data/CHANGELOG.md CHANGED
@@ -1,10 +1,57 @@
1
1
  # Changelog
2
2
 
3
+ > **Do not edit this file manually.** It is generated automatically by
4
+ > [release-please](https://github.com/googleapis/release-please) on every merge to `main`.
5
+ > To include a change in the next release, write a
6
+ > [Conventional Commit](https://www.conventionalcommits.org/) message (`feat:`, `fix:`, `chore:`, etc.).
7
+
3
8
  All notable changes to this project will be documented in this file.
4
9
 
5
10
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
11
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
12
 
13
+ ## [0.5.0](https://github.com/patrick204nqh/browserctl/compare/v0.4.0...v0.5.0) (2026-04-25)
14
+
15
+
16
+ ### Features
17
+
18
+ * add cookie export/import commands and refine interaction guidance ([1dc8b2c](https://github.com/patrick204nqh/browserctl/commit/1dc8b2c4c744a5f0930c28d3bcf93fd017c368b1))
19
+ * rename snapshot format 'ai' to 'elements' ([#22](https://github.com/patrick204nqh/browserctl/issues/22)) ([9fde6af](https://github.com/patrick204nqh/browserctl/commit/9fde6afb9e53b9556c84b4c24987777a5c266adf))
20
+ * v0.5 architecture & protocol lock ([#20](https://github.com/patrick204nqh/browserctl/issues/20)) ([1224f2f](https://github.com/patrick204nqh/browserctl/commit/1224f2fe2fb05119053831e7901b286fe93ad4fc))
21
+
22
+
23
+ ### Bug Fixes
24
+
25
+ * add rake as development dependency ([13902d8](https://github.com/patrick204nqh/browserctl/commit/13902d81fdb986c6ca754fcb16ce61ee850e27ce))
26
+ * improve browser GIF quality and fix terminal font rendering ([17afdb2](https://github.com/patrick204nqh/browserctl/commit/17afdb210916d8ebdabd237130da1fc534cdbd3e))
27
+ * open PR for demo assets instead of pushing directly to main ([f051316](https://github.com/patrick204nqh/browserctl/commit/f051316962d308419c08f60550cfb9910b148f14))
28
+ * quote filtergraph and add -update 1 for palette in browser GIF pipeline ([053d074](https://github.com/patrick204nqh/browserctl/commit/053d074b5fdd3e0ea6fc51589593d72d5f7fcd74))
29
+ * replace undefined REGISTRY constant with Browserctl.registry_snapshot ([d19a6bf](https://github.com/patrick204nqh/browserctl/commit/d19a6bf7ded8a5211f5b2a75511a9532f94e3845))
30
+ * update README for improved clarity and add Quick Start section ([881d914](https://github.com/patrick204nqh/browserctl/commit/881d914c2bf46625ee4ff5c1ca8ef21b462c533d))
31
+ * use app-slug output instead of gh api /app in assets workflow ([ae6a0f8](https://github.com/patrick204nqh/browserctl/commit/ae6a0f80ba2b66c76d681b1f258de5d72bc34c72))
32
+ * use CSS selectors for browser GIF and add full login flow frames ([717b86b](https://github.com/patrick204nqh/browserctl/commit/717b86befa9af6bab1823f984668ebdc29f2d7f8))
33
+
34
+ ## [0.4.0](https://github.com/patrick204nqh/browserctl/compare/v0.3.1...v0.4.0) (2026-04-25)
35
+
36
+
37
+ ### Features
38
+
39
+ * add Claude Code plugin support with installation instructions and plugin metadata ([b568ed2](https://github.com/patrick204nqh/browserctl/commit/b568ed29015d77bc521e4484aada1054b73362e6))
40
+ * add PRODUCT and standardization plan documentation; update VISION, README, and command references ([5594b52](https://github.com/patrick204nqh/browserctl/commit/5594b520f65d19f6c0c022e7944f9a560044549f))
41
+ * allow .browserctl/screenshots/ as project-scoped screenshot directory ([b6914f4](https://github.com/patrick204nqh/browserctl/commit/b6914f42ead76865b8c6e8e8c0e0f88516cb53e3))
42
+ * browserctl v0.4 hardening (security, cookie I/O, store/fetch, params file) ([#15](https://github.com/patrick204nqh/browserctl/issues/15)) ([c702661](https://github.com/patrick204nqh/browserctl/commit/c702661196e0784179afdbd824e6064c1db047bc))
43
+ * DX improvements, doc refresh, and hardening ([#16](https://github.com/patrick204nqh/browserctl/issues/16)) ([6e32e00](https://github.com/patrick204nqh/browserctl/commit/6e32e0038abfa28be9e92ec94f9a7642e29803c4))
44
+
45
+
46
+ ### Bug Fixes
47
+
48
+ * add enabledPlugins section to settings.json for browserctl plugin activation ([924dbee](https://github.com/patrick204nqh/browserctl/commit/924dbeef9c86ea2c2486d4d93118045acc042833))
49
+ * add marketplace.json and correct plugin metadata ([5cfc1d6](https://github.com/patrick204nqh/browserctl/commit/5cfc1d621bc162fb200150c71ad4f5b62b4bdcc7))
50
+ * allow any path within daemon CWD for screenshots, not just .browserctl/screenshots ([817a492](https://github.com/patrick204nqh/browserctl/commit/817a49211e991493b6018b663fc198c4a0ff3742))
51
+ * save login screenshot to ~/.browserctl/screenshots/ instead of docs/ path ([246688e](https://github.com/patrick204nqh/browserctl/commit/246688e5c586d2028b90c178994d0eb7f1e9c626))
52
+ * update marketplace.json to set strict mode to true for browserctl plugin ([99db2ad](https://github.com/patrick204nqh/browserctl/commit/99db2ada2aae08f8bceb78658e15a267246df973))
53
+ * use screenshot_path param in all examples; CI passes docs/screenshots/ explicitly ([64e073d](https://github.com/patrick204nqh/browserctl/commit/64e073d74979d869560d814b9a986c001b400238))
54
+
8
55
  ## [0.3.1](https://github.com/patrick204nqh/browserctl/compare/v0.3.0...v0.3.1) (2026-04-20)
9
56
 
10
57
 
data/README.md CHANGED
@@ -2,287 +2,178 @@
2
2
  <img src=".github/logo.svg" width="96" height="96" alt="browserctl logo"/>
3
3
  </p>
4
4
 
5
- # browserctl
5
+ <h1 align="center">browserctl</h1>
6
6
 
7
- [![CI](https://github.com/patrick204nqh/browserctl/actions/workflows/ci.yml/badge.svg)](https://github.com/patrick204nqh/browserctl/actions/workflows/ci.yml)
8
- [![Gem Version](https://badge.fury.io/rb/browserctl.svg)](https://badge.fury.io/rb/browserctl)
9
- [![Downloads](https://img.shields.io/gem/dt/browserctl)](https://rubygems.org/gems/browserctl)
7
+ <p align="center">
8
+ A persistent browser daemon for AI agents and iterative dev workflows — the session stays alive between commands.
9
+ </p>
10
10
 
11
- A persistent browser automation daemon and CLI, purpose-built for AI agents and developer workflows.
11
+ <p align="center">
12
+ <a href="https://github.com/patrick204nqh/browserctl/actions/workflows/ci.yml"><img src="https://github.com/patrick204nqh/browserctl/actions/workflows/ci.yml/badge.svg" alt="CI"/></a>
13
+ <a href="https://badge.fury.io/rb/browserctl"><img src="https://badge.fury.io/rb/browserctl.svg" alt="Gem Version"/></a>
14
+ <a href="https://rubygems.org/gems/browserctl"><img src="https://img.shields.io/gem/dt/browserctl" alt="Downloads"/></a>
15
+ </p>
12
16
 
13
- Unlike tools that restart the browser on every script run, **browserctl keeps a named browser session alive** — preserving cookies, localStorage, open tabs, and page state across discrete commands.
17
+ ---
18
+
19
+ Every browser automation tool restarts the browser when your script ends. That means re-authenticating, re-navigating, re-loading state — on every run. browserctl doesn't restart. The session stays alive between commands, so you pick up exactly where you left off.
14
20
 
15
21
  ```bash
16
- browserd & # start the daemon (headless)
22
+ browserd & # start the daemon (headless)
17
23
  browserctl open login --url https://example.com/login
18
- browserctl snap login # AI-friendly JSON snapshot with ref IDs
19
- browserctl fill login --ref e1 --value me@example.com # interact by ref
24
+ browserctl snap login # AI-friendly JSON snapshot with ref IDs
25
+ browserctl fill login --ref e1 --value me@example.com # interact by ref, no selectors needed
20
26
  browserctl click login --ref e2
21
27
  browserctl shutdown
22
28
  ```
23
29
 
24
- ![browserctl capturing a login flow](docs/screenshots/the_internet_login.png)
25
- <p align="center"><sub>Login flow captured with <code>browserctl shot</code></sub></p>
26
-
27
30
  ---
28
31
 
29
- ## Why browserctl?
32
+ ## See it in action
30
33
 
31
- Most automation tools are stateless — every script spins up a fresh browser and tears it down. browserctl doesn't.
34
+ <table align="center"><tr>
35
+ <td align="center" width="50%">
32
36
 
33
- | | browserctl | Playwright / Selenium |
34
- |---|---|---|
35
- | Session persists across commands | ✓ | ✗ (per-script lifecycle) |
36
- | Named page handles | ✓ | ✗ |
37
- | AI-friendly DOM snapshot | ✓ | ✗ |
38
- | Lightweight CLI interface | ✓ | ✗ |
39
- | Full browser automation API | — | ✓ |
40
- | Parallel multi-browser testing | — | ✓ |
37
+ **Terminal**<br/>
38
+ <sub>CLI commands, live output, session persistence proof</sub>
41
39
 
42
- **Use browserctl when** you need a browser that stays alive and remembers state — for AI agents, iterative dev workflows, or lightweight smoke tests.
40
+ <img src="docs/assets/terminal.webp" alt="browserctl terminal demo"/>
43
41
 
44
- **Use Playwright/Selenium when** you need parallel test suites, multi-browser support, or a full programmatic API.
42
+ </td>
43
+ <td align="center" width="50%">
45
44
 
46
- ---
45
+ **Browser**<br/>
46
+ <sub>What the browser sees as those commands run</sub>
47
+
48
+ <img src="docs/assets/browser_demo.gif" alt="browserctl browser demo"/>
47
49
 
48
- ## Requirements
50
+ </td>
51
+ </tr></table>
49
52
 
50
- - Ruby >= 3.2
51
- - Chrome or Chromium installed and on `PATH`
53
+ > Demo assets are regenerated automatically on every push to `main` that touches `demo/` or the login example. To regenerate locally:
54
+ >
55
+ > ```bash
56
+ > rake demo # full pipeline: screenshots + browser GIF + terminal GIF
57
+ > rake demo:screenshots # smoke test screenshots only
58
+ > rake demo:browser_gif # browser animation only (requires: ffmpeg)
59
+ > rake demo:terminal # terminal GIF only (requires: vhs)
60
+ > ```
52
61
 
53
62
  ---
54
63
 
55
- ## Installation
64
+ ## Quick Start
56
65
 
57
66
  ```bash
67
+ # 1. Install
58
68
  gem install browserctl
59
- ```
60
-
61
- Or in your `Gemfile`:
62
-
63
- ```ruby
64
- gem "browserctl"
65
- ```
66
69
 
67
- ---
70
+ # 2. Start the daemon
71
+ browserd &
68
72
 
69
- ## Quick Start
73
+ # 3. Open a named page
74
+ browserctl open main --url https://the-internet.herokuapp.com/login
70
75
 
71
- **1. Start the daemon**
76
+ # 4. Snapshot the page — get AI-friendly JSON with ref IDs
77
+ browserctl snap main
72
78
 
73
- ```bash
74
- browserd # headless (default)
75
- browserd --headed # visible browser window
76
- ```
79
+ # 5. Interact using refs
80
+ browserctl fill main --ref e1 --value tomsmith
81
+ browserctl fill main --ref e2 --value SuperSecretPassword!
82
+ browserctl click main --ref e3
77
83
 
78
- **2. Open a named page**
84
+ # 6. Observe
85
+ browserctl url main
86
+ browserctl snap main --diff # only what changed
79
87
 
80
- ```bash
81
- browserctl open login --url https://app.example.com/login
82
- ```
83
-
84
- **3. Snapshot the page to discover refs**
85
-
86
- ```bash
87
- browserctl snap login # AI-friendly JSON with ref IDs (default)
88
- browserctl snap login --format html
88
+ # 7. Done
89
+ browserctl shutdown
89
90
  ```
90
91
 
91
- **4. Interact using refs or selectors**
92
+ [Full Getting Started guide](docs/getting-started.md)
92
93
 
93
- ```bash
94
- browserctl fill login --ref e1 --value user@example.com
95
- browserctl fill login --ref e2 --value s3cr3t
96
- browserctl click login --ref e3
94
+ ---
97
95
 
98
- # or using explicit CSS selectors
99
- browserctl fill login "input[name=email]" user@example.com
100
- browserctl click login "button[type=submit]"
101
- ```
96
+ ## Use cases
102
97
 
103
- **5. Observe the result**
98
+ **AI coding agent authenticating into a staging environment** — the agent logs in once, the session persists, subsequent commands run inside the authenticated context without re-authenticating between steps.
104
99
 
105
- ```bash
106
- browserctl snap login --diff # only changed elements since last snap
107
- browserctl shot login --out /tmp/after-login.png --full
108
- browserctl url login
109
- ```
100
+ **Developer reproducing a multi-step bug report** — navigate to the failure point once, then iterate on the fix with the browser already in the right state; no restarting from the home page each run.
110
101
 
111
- **6. Manage pages and daemon**
112
-
113
- ```bash
114
- browserctl pages
115
- browserctl close login
116
- browserctl ping
117
- browserctl shutdown
118
- ```
102
+ **Automated smoke test that needs human sign-off** — the test runs until it hits something ambiguous, calls `browserctl pause`, lets a human inspect and act, then `browserctl resume` hands control back to the script with all state intact.
119
103
 
120
104
  ---
121
105
 
122
- ## All Commands
106
+ ## Why browserctl?
123
107
 
124
- ### Browser commands _(require `browserd` running)_
108
+ Most automation tools are stateless — every script spins up a fresh browser and tears it down. browserctl doesn't.
125
109
 
126
- | Command | Description |
127
- |---|---|
128
- | `open <page> [--url URL]` | Open or focus a named page |
129
- | `close <page>` | Close a named page |
130
- | `pages` | List open pages |
131
- | `goto <page> <url>` | Navigate a page to a URL |
132
- | `fill <page> <selector> <value>` | Fill an input field by CSS selector |
133
- | `fill <page> --ref <id> --value <v>` | Fill an input field by snapshot ref |
134
- | `click <page> <selector>` | Click an element by CSS selector |
135
- | `click <page> --ref <id>` | Click an element by snapshot ref |
136
- | `snap <page> [--format ai\|html] [--diff]` | Snapshot DOM; `--diff` returns only changed elements |
137
- | `watch <page> <selector> [--timeout N]` | Poll until selector appears (default timeout: 30s) |
138
- | `shot <page> [--out PATH] [--full]` | Take a screenshot |
139
- | `url <page>` | Print current URL |
140
- | `eval <page> <expression>` | Evaluate a JS expression |
141
- | `pause <page>` | Pause automation — browser stays live for manual interaction |
142
- | `resume <page>` | Resume automation after manual action |
143
- | `inspect <page>` | Open Chrome DevTools for a named page |
144
- | `cookies <page>` | List all cookies as JSON |
145
- | `set_cookie <page> <name> <value> <domain>` | Set a cookie (path defaults to `/`) |
146
- | `clear_cookies <page>` | Clear all cookies for a page |
147
- | `record start <name>` | Begin recording commands as a replayable workflow |
148
- | `record stop [--out path]` | End recording; saves to `.browserctl/workflows/` or custom path |
149
- | `record status` | Show whether a recording is active |
150
-
151
- ### Daemon commands
152
-
153
- | Command | Description |
154
- |---|---|
155
- | `ping` | Check if `browserd` is alive |
156
- | `shutdown` | Stop `browserd` |
110
+ | | browserctl | Playwright / Selenium |
111
+ |---|---|---|
112
+ | Session persists across commands | | (per-script lifecycle) |
113
+ | Named page handles | | |
114
+ | AI-friendly DOM snapshot | | |
115
+ | Human-in-the-loop pause/resume | | |
116
+ | Lightweight CLI interface | | |
117
+ | Full browser automation API | | |
118
+ | Parallel multi-browser testing | | |
157
119
 
158
- ### Workflow commands
120
+ **Use browserctl when** you need a browser that stays alive and remembers state — for AI agents, iterative dev workflows, or tasks that mix automation with human judgment.
159
121
 
160
- | Command | Description |
161
- |---|---|
162
- | `run <name\|file.rb> [--key value ...]` | Run a named workflow or workflow file |
163
- | `workflows` | List available workflows |
164
- | `describe <name>` | Show workflow params and steps |
122
+ **Use Playwright/Selenium when** you need parallel test suites, multi-browser support, or a full programmatic API.
165
123
 
166
124
  ---
167
125
 
168
- ## AI Snapshot Format
169
-
170
- `browserctl snap <page>` returns a compact JSON array of interactable elements — designed to be token-efficient for AI agents:
171
-
172
- ```json
173
- [
174
- {
175
- "ref": "e1",
176
- "tag": "input",
177
- "text": "",
178
- "selector": "form > input[name=email]",
179
- "attrs": {
180
- "type": "email",
181
- "name": "email",
182
- "placeholder": "Enter email"
183
- }
184
- },
185
- {
186
- "ref": "e2",
187
- "tag": "button",
188
- "text": "Sign in",
189
- "selector": "form > button",
190
- "attrs": {
191
- "type": "submit"
192
- }
193
- }
194
- ]
195
- ```
196
-
197
- Use `ref` values directly with `--ref` for zero-fragility interactions, or use `selector` values with `fill` and `click`.
198
-
199
- ### Ref-based interaction
126
+ ## Installation
200
127
 
201
- After a `snap`, use ref IDs instead of CSS selectors — no selector knowledge required:
128
+ **Requirements:** Ruby >= 3.3 · Chrome or Chromium on your `PATH`
202
129
 
203
130
  ```bash
204
- browserctl fill login --ref e1 --value user@example.com
205
- browserctl click login --ref e2
131
+ gem install browserctl
206
132
  ```
207
133
 
208
- ### Diff snapshots
209
-
210
- Track only what changed since the last snapshot — useful for AI agents monitoring async updates:
134
+ Or in your `Gemfile`:
211
135
 
212
- ```bash
213
- browserctl snap login --diff
136
+ ```ruby
137
+ gem "browserctl"
214
138
  ```
215
139
 
216
140
  ---
217
141
 
218
- ## Workflows
142
+ ## Claude Code Plugin
219
143
 
220
- Workflows are Ruby files using the `Browserctl.workflow` DSL. Place them in any of:
144
+ browserctl ships as a Claude Code plugin. Install it once and Claude automatically knows how to use the daemon, ref-based interaction, HITL patterns, and workflow authoring.
221
145
 
222
- - `./.browserctl/workflows/`
223
- - `~/.browserctl/workflows/`
146
+ **Interactive install**
224
147
 
225
- ### Example
226
-
227
- ```ruby
228
- # .browserctl/workflows/smoke_login.rb
229
- Browserctl.workflow "smoke_login" do
230
- desc "Log in and confirm the dashboard loads"
231
-
232
- param :email, required: true
233
- param :password, required: true, secret: true
234
- param :base_url, default: "https://app.example.com"
235
-
236
- step "open login page" do
237
- page(:login).goto("#{base_url}/login")
238
- end
239
-
240
- step "submit credentials" do
241
- page(:login).fill("input[name=email]", email)
242
- page(:login).fill("input[name=password]", password)
243
- page(:login).click("button[type=submit]")
244
- end
245
-
246
- step "verify dashboard" do
247
- page(:login).wait_for("[data-test=dashboard]", timeout: 10)
248
- assert page(:login).url.include?("/dashboard")
249
- end
250
- end
251
148
  ```
252
-
253
- ```bash
254
- browserctl run smoke_login --email me@example.com --password s3cr3t
149
+ /plugin marketplace add patrick204nqh/browserctl
150
+ /plugin install browserctl@browserctl
255
151
  ```
256
152
 
257
- ### Workflow DSL reference
258
-
259
- | Method | Description |
260
- |---|---|
261
- | `desc "text"` | Human-readable description |
262
- | `param :name, required:, secret:, default:` | Declare a parameter |
263
- | `step "label" { }` | Add a step (runs in order, halts on failure) |
264
- | `step "label", retry_count: N, timeout: S { }` | Step with retry and/or timeout |
265
- | `page(:name)` | Returns a `PageProxy` for the named page |
266
- | `invoke "other_workflow", **overrides` | Call another workflow |
267
- | `assert condition, "message"` | Raise `WorkflowError` if condition is false |
268
-
269
- ### PageProxy methods
270
-
271
- `goto(url)` · `fill(selector, value)` · `click(selector)` · `snapshot(**opts)` · `screenshot(**opts)` · `wait_for(selector, timeout: 10)` · `url` · `evaluate(expression)` · `pause` · `resume` · `inspect_page` · `cookies` · `set_cookie(name, value, domain, path: "/")` · `clear_cookies`
272
-
273
- ---
274
-
275
- ## Examples
153
+ **Project settings** commit `.claude/settings.json` to share with your team:
276
154
 
277
- Ready-to-run smoke tests against [the-internet.herokuapp.com](https://the-internet.herokuapp.com) are included in `examples/the_internet/`. See [docs/smoke-testing-the-internet.md](docs/smoke-testing-the-internet.md) for annotated output and auto-generated screenshots of each scenario.
155
+ ```json
156
+ {
157
+ "extraKnownMarketplaces": {
158
+ "browserctl": {
159
+ "source": { "source": "github", "repo": "patrick204nqh/browserctl" }
160
+ }
161
+ },
162
+ "enabledPlugins": {
163
+ "browserctl@browserctl": true
164
+ }
165
+ }
166
+ ```
278
167
 
279
- For a full guide on building your own workflows, see [docs/writing-workflows.md](docs/writing-workflows.md).
168
+ Once installed, Claude Code loads the `browserctl` skill automatically no `/invoke` needed.
280
169
 
281
170
  ---
282
171
 
283
172
  ## How it works
284
173
 
285
- `browserd` runs as a background process, listening on a Unix socket at `~/.browserctl/browserd.sock`. Start multiple named instances for agent isolation:
174
+ `browserd` runs as a background process, listening on a Unix socket at `~/.browserctl/browserd.sock`. It manages a Ferrum (Chrome DevTools Protocol) browser instance with named page handles. `browserctl` sends JSON-RPC commands over the socket and prints the result.
175
+
176
+ Start multiple named instances for agent isolation:
286
177
 
287
178
  ```bash
288
179
  browserd --name agent-a &
@@ -290,11 +181,22 @@ browserd --name agent-b &
290
181
  browserctl --daemon agent-a open main --url https://app.example.com
291
182
  ```
292
183
 
293
- It manages a Ferrum (Chrome DevTools Protocol) browser instance with named page handles.
184
+ The daemon shuts itself down after 30 minutes of inactivity.
294
185
 
295
- `browserctl` sends JSON-RPC commands over the socket and prints the result. Workflows run in-process through the same client.
186
+ ---
296
187
 
297
- The daemon shuts itself down after 30 minutes of inactivity.
188
+ ## Documentation
189
+
190
+ | | |
191
+ |---|---|
192
+ | [Getting Started](docs/getting-started.md) | Install, first session, first snapshot |
193
+ | [Concepts](docs/concepts/) | Sessions, snapshots, human-in-the-loop |
194
+ | [Guides](docs/guides/) | Writing workflows, handling challenges, smoke testing |
195
+ | [Command Reference](docs/reference/commands.md) | Every command and flag |
196
+ | [API Stability](docs/reference/api-stability.md) | Wire protocol contract and stability zones |
197
+ | [Product](docs/product.md) | What browserctl is and who it's for |
198
+ | [Vision & Roadmap](docs/vision.md) | Philosophy and release roadmap |
199
+ | [vs. agent-browser](docs/vs-agent-browser.md) | How browserctl differs from Vercel's agent-browser |
298
200
 
299
201
  ---
300
202
 
@@ -303,10 +205,14 @@ The daemon shuts itself down after 30 minutes of inactivity.
303
205
  ```bash
304
206
  git clone https://github.com/patrick204nqh/browserctl
305
207
  cd browserctl
306
- bin/setup # install deps + check for Chrome
208
+ bin/setup # brew bundle (macOS) + bundle install + Chrome check
307
209
 
308
210
  bundle exec rspec # run tests
309
211
  bundle exec rubocop # lint
212
+
213
+ rake demo # regenerate screenshots + terminal GIF
214
+ rake demo:screenshots # screenshots only (no VHS required)
215
+ rake demo:terminal # terminal GIF only
310
216
  ```
311
217
 
312
218
  ---
data/bin/browserctl CHANGED
@@ -21,9 +21,13 @@ require "browserctl/commands/snapshot"
21
21
  require "browserctl/commands/screenshot"
22
22
  require "browserctl/commands/watch"
23
23
  require "browserctl/commands/record"
24
- require "browserctl/commands/pause_resume"
24
+ require "browserctl/commands/pause"
25
+ require "browserctl/commands/resume"
25
26
  require "browserctl/commands/init"
26
27
  require "browserctl/commands/inspect"
28
+ require "browserctl/commands/export_cookies"
29
+ require "browserctl/commands/import_cookies"
30
+ require "browserctl/commands/status"
27
31
 
28
32
  def print_result(res)
29
33
  if res.is_a?(Hash) && res[:error]
@@ -59,8 +63,10 @@ def usage
59
63
  resume <page> Resume automation after manual action
60
64
  inspect <page> Open Chrome DevTools for a named page
61
65
  cookies <page> List all cookies as JSON
62
- set_cookie <page> <name> <value> <domain> Set a cookie (path defaults to /)
63
- clear_cookies <page> Clear all cookies for a page
66
+ set-cookie <page> <name> <value> <domain> Set a cookie (path defaults to /)
67
+ clear-cookies <page> Clear all cookies for a page
68
+ export-cookies <page> <path> Export cookies to a JSON file
69
+ import-cookies <page> <path> Import cookies from a JSON file
64
70
 
65
71
  Recording commands:
66
72
  record start <name> Start recording browser commands
@@ -68,12 +74,13 @@ def usage
68
74
  record status Show active recording name
69
75
 
70
76
  Workflow commands:
71
- run <name|file> [--key value ...] Run a workflow
77
+ run <name|file> [--params file] [--key value ...] Run a workflow
72
78
  workflows List available workflows
73
79
  describe <name> Describe a workflow
74
80
 
75
81
  Daemon commands:
76
82
  ping Check if browserd is alive
83
+ status Show daemon status, PID, and open pages
77
84
  shutdown Stop browserd
78
85
 
79
86
  Options:
@@ -101,15 +108,27 @@ case cmd
101
108
  when "run"
102
109
  name = args.shift or abort "usage: browserctl run <workflow_name|file.rb> [--key value ...]"
103
110
  if File.exist?(name)
104
- before = Browserctl::REGISTRY.keys.dup
111
+ before = Browserctl.registry_snapshot.keys
105
112
  load File.expand_path(name)
106
- name = (Browserctl::REGISTRY.keys - before).first || File.basename(name, ".rb")
113
+ name = (Browserctl.registry_snapshot.keys - before).first || File.basename(name, ".rb")
107
114
  end
108
- params = {}
115
+ params_file_idx = args.index("--params")
116
+ file_params = {}
117
+ if params_file_idx
118
+ params_path = args.delete_at(params_file_idx + 1)
119
+ args.delete_at(params_file_idx)
120
+ begin
121
+ file_params = Browserctl::Runner.load_params_file(params_path)
122
+ rescue StandardError => e
123
+ abort "Error loading params file: #{e.message}"
124
+ end
125
+ end
126
+ cli_params = {}
109
127
  args.each_slice(2) do |flag, val|
110
128
  key = flag.sub(/\A--/, "").to_sym
111
- params[key] = val
129
+ cli_params[key] = val
112
130
  end
131
+ params = file_params.merge(cli_params)
113
132
  success = runner.run_workflow(name, **params)
114
133
  exit(success ? 0 : 1)
115
134
 
@@ -139,13 +158,16 @@ else
139
158
  when "url" then print_result(client.url(args[0]))
140
159
  when "eval" then print_result(client.evaluate(args[0], args[1]))
141
160
  when "watch" then Browserctl::Commands::Watch.run(client, args)
142
- when "pause" then Browserctl::Commands::PauseResume.pause(client, args)
143
- when "resume" then Browserctl::Commands::PauseResume.resume(client, args)
161
+ when "pause" then Browserctl::Commands::Pause.run(client, args)
162
+ when "resume" then Browserctl::Commands::Resume.run(client, args)
144
163
  when "inspect" then Browserctl::Commands::Inspect.run(client, args)
145
- when "cookies" then print_result(client.cookies(args[0]))
146
- when "set_cookie" then print_result(client.set_cookie(args[0], args[1], args[2], args[3]))
147
- when "clear_cookies" then print_result(client.clear_cookies(args[0]))
164
+ when "cookies" then print_result(client.cookies(args[0]))
165
+ when "set-cookie" then print_result(client.set_cookie(args[0], args[1], args[2], args[3]))
166
+ when "clear-cookies" then print_result(client.clear_cookies(args[0]))
167
+ when "export-cookies" then Browserctl::Commands::ExportCookies.run(client, args)
168
+ when "import-cookies" then Browserctl::Commands::ImportCookies.run(client, args)
148
169
  when "ping" then print_result(client.ping)
170
+ when "status" then Browserctl::Commands::Status.run(client)
149
171
  when "shutdown" then print_result(client.shutdown)
150
172
  else
151
173
  abort "unknown command: #{cmd}\nRun 'browserctl --help' for usage."
data/bin/browserd CHANGED
@@ -16,7 +16,13 @@ opts = Optimist.options do
16
16
  opt :name, "Daemon instance name for multi-agent use", default: nil, short: "-n", type: :string
17
17
  end
18
18
 
19
- Browserctl.logger = Browserctl.build_logger(opts[:log_level])
19
+ if opts[:name] && opts[:name] !~ /\A[a-zA-Z0-9_-]{1,64}\z/
20
+ abort "Invalid daemon name #{opts[:name].inspect} — use only letters, digits, _ or -"
21
+ end
22
+
23
+ log_path = Browserctl.log_path(opts[:name])
24
+ warn "browserd starting — log: #{log_path}"
25
+ Browserctl.logger = Browserctl.build_logger(opts[:log_level], log_path: log_path)
20
26
  Browserctl::Server.new(
21
27
  headless: !opts[:headed],
22
28
  socket_path: Browserctl.socket_path(opts[:name]),
data/bin/setup CHANGED
@@ -1,14 +1,18 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
 
4
- echo "==> Installing dependencies..."
4
+ if [[ "$(uname)" == "Darwin" ]] && command -v brew &>/dev/null; then
5
+ echo "==> Installing Homebrew dependencies (Brewfile)..."
6
+ brew bundle --no-upgrade
7
+ fi
8
+
9
+ echo "==> Installing gem dependencies..."
5
10
  bundle install
6
11
 
7
12
  echo "==> Checking for Chrome/Chromium..."
8
13
  if ! command -v google-chrome &>/dev/null && ! command -v chromium-browser &>/dev/null && ! command -v chromium &>/dev/null; then
9
14
  echo "WARNING: Chrome/Chromium not found on PATH."
10
- echo " Install it before running browserctl."
11
- echo " macOS: brew install --cask google-chrome"
15
+ echo " macOS: brew bundle (includes google-chrome)"
12
16
  echo " Ubuntu: sudo apt-get install -y chromium-browser"
13
17
  fi
14
18
 
@@ -55,7 +55,7 @@ Browserctl.workflow "cloudflare_hitl" do
55
55
 
56
56
  step "wait for content and snapshot" do
57
57
  page(:main).wait_for(selector, timeout: 15)
58
- result = page(:main).snapshot(format: "ai")
58
+ result = page(:main).snapshot(format: "elements")
59
59
  $stdout.puts " Snapshot: #{result[:snapshot]&.length || 0} elements captured"
60
60
  end
61
61