chromeflow 0.7.1 → 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,36 +1,50 @@
1
1
  {
2
2
  "name": "chromeflow",
3
- "version": "0.7.1",
4
- "description": "Browser guidance MCP server for Claude Code highlights, clicks, fills, and captures from the web so you don't have to.",
3
+ "version": "0.9.8",
4
+ "description": "MCP server for chromeflow — lets Claude Code or Codex CLI drive your real Chrome browser with sessions intact. Plugin install recommended; npx chromeflow for manual MCP wiring.",
5
5
  "type": "module",
6
+ "main": "./bin/chromeflow.mjs",
6
7
  "bin": {
7
- "chromeflow": "dist/index.js"
8
+ "chromeflow": "./bin/chromeflow.mjs"
8
9
  },
9
10
  "files": [
10
- "dist",
11
- "CLAUDE.md",
12
- "README.md"
11
+ "bin/",
12
+ "README.md",
13
+ "LICENSE"
13
14
  ],
14
- "repository": {
15
- "type": "git",
16
- "url": "git+https://github.com/NeoDrew/chromeflow.git"
15
+ "engines": {
16
+ "node": ">=20"
17
+ },
18
+ "scripts": {
19
+ "build": "../plugin/scripts/build-server.sh && mkdir -p bin && cp ../plugin/server/chromeflow.mjs bin/chromeflow.mjs",
20
+ "dev": "tsc --watch"
17
21
  },
18
- "homepage": "https://github.com/NeoDrew/chromeflow",
19
- "license": "MIT",
20
22
  "keywords": [
21
23
  "mcp",
22
- "claude",
24
+ "model-context-protocol",
23
25
  "claude-code",
24
- "browser",
25
- "automation",
26
- "chrome"
26
+ "codex-cli",
27
+ "browser-automation",
28
+ "chrome-extension",
29
+ "ai-agent",
30
+ "agent-tooling",
31
+ "playwright-alternative",
32
+ "browser-use-alternative",
33
+ "puppeteer-alternative",
34
+ "web-automation",
35
+ "stripe",
36
+ "canvas",
37
+ "real-chrome-session"
27
38
  ],
28
- "scripts": {
29
- "prebuild": "cp ../../CLAUDE.md ./CLAUDE.md && cp ../../README.md ./README.md",
30
- "build": "tsc",
31
- "postbuild": "node --input-type=commonjs -e \"const fs=require('fs');const f='dist/index.js';fs.writeFileSync(f,'#!/usr/bin/env node\\n'+fs.readFileSync(f,'utf8'));\"",
32
- "dev": "tsc --watch"
39
+ "repository": {
40
+ "type": "git",
41
+ "url": "git+https://gitlab.com/NeoDrew/chromeflow.git",
42
+ "directory": "packages/mcp-server"
33
43
  },
44
+ "homepage": "https://chromeflow.run",
45
+ "bugs": "https://gitlab.com/NeoDrew/chromeflow/-/issues",
46
+ "license": "MIT",
47
+ "author": "Andrew Robertson (NeoDrewX)",
34
48
  "dependencies": {
35
49
  "@modelcontextprotocol/sdk": "^1.0.0",
36
50
  "ws": "^8.18.0",
@@ -39,6 +53,7 @@
39
53
  "devDependencies": {
40
54
  "@types/node": "^22.0.0",
41
55
  "@types/ws": "^8.5.0",
56
+ "esbuild": "^0.24.0",
42
57
  "typescript": "^5.5.0"
43
58
  }
44
59
  }
package/CLAUDE.md DELETED
@@ -1,356 +0,0 @@
1
- # Chromeflow — Claude Instructions
2
-
3
- ## What chromeflow is
4
- Chromeflow is a browser guidance tool. When a task requires the user to interact with a
5
- website (create accounts, set up billing, retrieve API keys, configure third-party services),
6
- use chromeflow to guide them through it visually instead of giving text instructions.
7
-
8
- ## When to use chromeflow (be proactive)
9
- Use chromeflow automatically whenever a task requires:
10
- - Creating or configuring a third-party account (Stripe, SendGrid, Supabase, Vercel, etc.)
11
- - Retrieving API keys, secrets, or credentials to place in `.env`
12
- - Setting up pricing tiers, webhooks, or service configuration in a web UI
13
- - Any browser-based step that is blocking code work
14
-
15
- Do NOT ask "should I open the browser?" — just do it. The user expects seamless handoff.
16
-
17
- **Never end a response with a "you still need to" list of browser tasks.** If code changes are done and browser steps remain (e.g. creating a Stripe product, adding an env var), continue immediately with chromeflow — don't hand them back to the user.
18
-
19
- ## HARD RULES — never break these
20
-
21
- 1. **Never use Bash as a fallback for browser tasks.** If `click_element` fails, use
22
- `scroll_page` then retry, or use `highlight_region` to show the user. Never use
23
- `osascript`, `applescript`, or any shell command to control the browser.
24
-
25
- 2. **Never use `take_screenshot` to read page content.** After `scroll_page`, after
26
- `click_element`, after navigation — always call `get_page_text`, not `take_screenshot`.
27
- `get_page_text` returns up to 10,000 characters; if truncated it tells you the next
28
- `startIndex` to paginate. When you only need to confirm a specific phrase is present,
29
- prefer `find_text("phrase")` — it returns matches with context and selectors instead of
30
- dumping the whole page. Screenshots are only for locating an element's pixel position
31
- when DOM queries have already failed. Never take more than 1–2 screenshots in a row.
32
-
33
- 3. **Use `wait_for_selector` to wait for async page changes** (build completion, modals,
34
- toasts). Never poll with repeated `take_screenshot` calls.
35
-
36
- ## Guided flow pattern
37
-
38
- ```
39
- 1. open_page(url) — navigate to the right page (add new_tab=true to keep current tab open; add background=true to keep the current tab focused if its form auto-saves on blur)
40
- 2. For each step:
41
- a. Claude acts directly:
42
- click_element("Save") — press buttons/links Claude can press
43
- click_element("Save", until_selector=".success-toast") — when synthetic clicks may silently no-op on a React-heavy site, require an observable post-click condition (or until_url_contains / until_text_contains)
44
- get_page_text() or wait_for_selector(".success") — confirm after click without an until-clause; click_element returns after 600ms regardless of outcome unless until_* was used
45
- fill_form([{label, value}, ...], exact=true) — fill multiple fields in one call; pass exact=true on dense forms to refuse fuzzy text-walk matches
46
- fill_input("Product name", "Pro") — fill a single field (works on React, CodeMirror, and contenteditable). Always check the response — it names the matched element so you can spot wrong-field matches
47
- fill_input("Rate", "5", exact=true) — exact-match mode for short generic labels that may collide with neighbouring fields
48
- react_set_input("input[name=email]", "x@y") — for inputs where fill_input fails (or for iframe-hosted inputs via frame=...) — handles the prototype-from-instance gotcha automatically
49
- type_text("hello world") — type via trusted keyboard events (use when fill_input fails isTrusted checks)
50
- type_text("description", frame="iframe.se-rte") — type into a same-origin iframe's contenteditable (eBay description editor pattern)
51
- set_file_input("Upload", "/abs/path/to/file.zip") — upload a file; returns success only after the upload is observably committed (no manual sleep needed between rapid uploads)
52
- clear_overlays() — call this immediately after fill_input/fill_form succeeds
53
- scroll_to_element("label text") — jump directly to a known field; prefer this over scroll_page when the target is known
54
- scroll_page("down") — reveal off-screen content when target location is unknown
55
- b. Check results with text, not vision:
56
- get_page_text() — read errors/status after actions
57
- wait_for_selector(".success") — wait for a new element to appear
58
- wait_for_change(".toast") — wait for an existing element's content to mutate, then read it (uses MutationObserver, cheaper than polling)
59
- execute_script("return await fetch('/api/x').then(r => r.json())") — top-level await is supported, no window.__variable + sleep dance needed
60
- c. When an element can't be found or clicked:
61
- scroll_page("down") and retry — always try this first
62
- get_elements() — get EXACT DOM coords when needed
63
- highlight_region(selector,msg) — highlight by CSS selector (preferred; scrolls element into view automatically)
64
- highlight_region(x,y,w,h,msg) — highlight by coords only if no selector available (coords go stale on scroll)
65
- [absolute last resort] take_screenshot() — only if you genuinely can't identify the element from DOM
66
- d. Pause for the user when needed:
67
- find_and_highlight(text, msg) — show the user what to do
68
- wait_for_click() — wait for user interaction
69
- [after fill_input] clear_overlays() — always clear after filling
70
- 3. clear_overlays() — clean up when done
71
- ```
72
-
73
- **Default to automation.** Only pause for human input when the step genuinely requires
74
- personal data or a human decision.
75
-
76
- ## What to do automatically vs pause for the user
77
-
78
- **Claude acts directly** (`click_element` / `fill_input`):
79
- - Any button: Save, Continue, Create, Add, Confirm, Next, Submit, Update
80
- - Product names, descriptions, feature lists
81
- - Prices and amounts specified in the task
82
- - URLs, redirect URIs, webhook endpoints
83
- - Selecting billing period, currency, or other known options
84
- - Dismissing cookie banners, cookie dialogs, "not now" prompts
85
-
86
- **Pause for the user** (`find_and_highlight` + `wait_for_click`):
87
- - Email address / username / login
88
- - Password or passphrase
89
- - Payment method / billing / card details
90
- - Phone number / 2FA / OTP codes
91
- - Any legal consent the user must personally accept
92
- - Choices that depend on user preference Claude wasn't told
93
-
94
- ## Capturing credentials
95
- After a secret key or API key is revealed:
96
- 1. `read_element(hint)` — capture the value
97
- 2. `write_to_env(KEY_NAME, value, envPath)` — write to `.env`
98
- 3. Tell the user what was written
99
-
100
- Use the absolute path for `envPath` — it's the Claude Code working directory + `/.env`.
101
-
102
- To capture and share a screenshot (e.g. for uploading to a form or pasting into a chat),
103
- use `take_screenshot(copy_to_clipboard=true, save_to="downloads")` — saves a PNG to ~/Downloads
104
- and copies it to the clipboard. The defaults (`copy_to_clipboard=false, save_to="none"`) return
105
- the image to Claude only.
106
-
107
- ## Working with complex forms
108
- - Before filling a large or unfamiliar form, call `get_form_fields()` to get a full inventory
109
- of every field (type, label, current value, vertical position, and section heading). Use
110
- `get_elements()` when you need pixel coordinates of visible elements; use `get_form_fields()`
111
- when you need to understand the full structure of a form including fields below the fold.
112
- If you only need one or two specific fields, use `find_input("hint")` instead — targeted
113
- lookup is much cheaper than the full inventory and returns labels you can pipe straight
114
- into `fill_input`.
115
- - `get_form_fields()` includes `[type=file]` fields even when they are visually hidden behind
116
- custom drag-and-drop zones. Use `set_file_input(hint, filePath)` to upload a file — provide
117
- the label/hint text and the absolute path to the file on disk.
118
- - For forms with multiple fields, use `fill_form([{label, value}, ...])` to fill them all
119
- in a single call. It returns a per-field success/failure report so you can immediately see
120
- which fields weren't found. Use `fill_input` only for a single field.
121
- - `fill_input` and `fill_form` work on React-controlled inputs, contenteditable (Stripe,
122
- Notion), and **CodeMirror 6 editors** — auto-detected. After filling, the value is read
123
- back and a warning is shown if React did not accept it.
124
- - **Monaco editors** (VS Code-style code editors on DataAnnotation, etc.) appear in
125
- `get_form_fields()` as type "monaco". They cannot be filled via `fill_input` — use
126
- `execute_script` with the Monaco API instead:
127
- ```js
128
- // Read content from the first Monaco model
129
- monaco.editor.getModels()[0].getValue()
130
- // Write content to the first Monaco model
131
- monaco.editor.getModels()[0].setValue('new content here')
132
- ```
133
- - `set_file_input` accepts CSS selectors as the hint (e.g. `#import-problem-file`,
134
- `.upload-input`) in addition to label text. Use selectors when file inputs are hidden
135
- behind custom UIs and have no visible label.
136
- - **Replacing an already-uploaded file**: after `set_file_input` succeeds, the input
137
- becomes invisible and a "Remove" span/button typically appears near the upload area.
138
- To replace the file: `click_element("Remove", nth=N)` (the right `nth` if there are
139
- multiple), then call `set_file_input(hint, newPath)` again — the same hidden input is
140
- recycled and accepts the new file. Verify with `get_form_fields()` between the two
141
- steps so you're sure the input has reappeared.
142
- - **Forcing auto-save on idempotent text edits** (e.g. keep-alive loop on an
143
- auto-saving DataAnnotation form): some auto-save logic diffs against the last-saved
144
- value and skips no-op writes. To force a real save on each tick without changing
145
- visible content, toggle a trailing space — add when absent, remove when present.
146
- `fill_input` value comparison handles both directions transparently.
147
- - After any radio/checkbox click that reveals new fields, call `get_form_fields()` again —
148
- the inventory will include the new fields and warn if more hidden ones still exist.
149
- - If a form has collapsible sections, expand them all before calling `get_form_fields()` so
150
- the field list is complete. Use the `[under: "section name"]` context in each field's entry
151
- to identify fields by section rather than by index — indices shift when sections expand.
152
- - Prefer `scroll_to_element("label text or #selector")` over `scroll_page` whenever you know
153
- which field or section you need — it scrolls precisely and confirms the matched element.
154
- - For multi-session tasks (long forms that may exceed context), call `save_page_state()` as a
155
- checkpoint. A future session can call `restore_page_state()` to reload all field values.
156
-
157
- ## Discovery — find without dumping the whole page
158
-
159
- Three lightweight tools save tokens vs `get_page_text` / `get_form_fields` when you don't need the full content:
160
-
161
- - `find_text("Saved successfully")` — grep the DOM. Returns surrounding context, a CSS selector, and a `clickable` flag for each match. Use this instead of `get_page_text` when you're checking whether a specific phrase is present, or to locate a button by its visible text. If `clickable=true`, pipe the matched text straight into `click_element`.
162
- - `find_input("Email")` — fuzzy form-field lookup, top-N. Returns labels you can pipe straight into `fill_input(label, value)` — both tools share the same match ranks (`aria-eq` → `placeholder-eq` → `label-text-eq` → `name-eq` → `id-eq` → `*-includes` → `fuzzy-text-walk`). Cheaper than `get_form_fields` when you just need a couple of specific fields. Pass `type_filter="email"` to restrict to a specific input type.
163
- - `wait_for_text("Saved")` — wait for text to appear without knowing the selector ahead of time. Complements `wait_for_selector` for the case where you only know the post-action message.
164
-
165
- All three pierce open shadow roots and accept `frame="iframe.selector"` for same-origin iframes. Pass `regex=true` on `find_text` / `wait_for_text` for case-insensitive regex matching. Pass `exact=true` on `find_input` to refuse fuzzy text-walk matches.
166
-
167
- ```
168
- find_text("Build complete", scope_selector=".log-output") — only check the build log section
169
- find_input("Card number", type_filter="text") — find Stripe's card-number field
170
- wait_for_text("Deploy successful", timeout_ms=30000) — wait up to 30s after clicking Deploy
171
- ```
172
-
173
- Reach for these BEFORE `get_page_text` / `get_form_fields` when the goal is "is X here?" or "where is X?". Reserve `get_page_text` for reading actual content, and `get_form_fields` for understanding a whole form's structure.
174
-
175
- ## Working with multiple tabs
176
- - Before opening a new tab, call `list_tabs()` to check if the target URL is already open —
177
- use `switch_to_tab` to return to it instead of opening a duplicate.
178
- - `open_page(url, new_tab=true)` opens a URL without losing the current tab. Use sparingly —
179
- prefer switching to an existing tab over opening a new one.
180
- - `switch_to_tab("1")` switches by tab number; `switch_to_tab("form")` matches by URL or title substring.
181
- - Before navigating away from a partially-filled form, call `save_page_state()` so the form
182
- can be restored if the tab reloads or the page loses its state on return.
183
- - **In long-lived self-rescheduling loops**, the active tab can silently drift mid-session
184
- (the user navigates manually while AFK, or another tab steals focus). At the start of
185
- every loop iteration, call `list_tabs` and verify the active tab's URL matches your
186
- expected target — if not, `switch_to_tab(<URL or title substring>)` before running
187
- `execute_script` or any other tab-scoped tool. Without this guard, scripts run on the
188
- wrong tab and fail with confusing "undefined" errors that look like page bugs.
189
-
190
- ## Error handling
191
-
192
- **After any action**, confirm with `get_page_text()` or `wait_for_selector` — never take a
193
- screenshot to check what happened.
194
-
195
- **`click_element` not found:**
196
- 1. `scroll_page("down")` then retry `click_element`
197
- 2. `get_elements()` to get exact coords → `highlight_region(x,y,w,h,msg)`
198
- 3. `take_screenshot()` only if you still can't identify the element from DOM queries
199
-
200
- **Multiple elements with the same label** (e.g. many "Remove" buttons):
201
- `click_element("Remove", nth=3)` — use `nth` (1-based) to target the specific one by order top-to-bottom. Check `get_form_fields` or `get_page_text` first to determine which index corresponds to the right section.
202
-
203
- **`fill_input` matched the wrong field** (always read the response — it names the matched element):
204
- - If you wanted "Ad rate" and got back `<input name="title">`, the fuzzy text walker latched onto a neighbour. Retry with `exact=true` and a more specific hint, or use `react_set_input(selector, value)` with a precise CSS selector.
205
- - The match-strength is reported as `aria-eq`, `placeholder-eq`, `name-eq`, `id-eq`, `label-text-eq`, or fuzzier kinds. Anything labeled `fuzzy-text-walk` or `*-includes` is the lowest-confidence kind — verify the matched element really was what you wanted.
206
-
207
- **`fill_input` not found or rejected by the page:**
208
- 1. `click_element(hint)` to focus the field, then retry `fill_input`
209
- 2. `react_set_input("input[name=...]", value)` — uses the input's own prototype to set the value, dispatches input/change. Handles the "Illegal invocation" iframe gotcha and the prototype-from-instance ceremony for you.
210
- 3. If the site rejects programmatic input (isTrusted check, shadow DOM, custom editors):
211
- - `click_element(hint)` to focus the field
212
- - `execute_script("document.execCommand('selectAll')")` to clear existing content
213
- - `type_text("new value")` — uses CDP trusted keyboard events that pass isTrusted checks
214
- 4. For iframe-hosted contenteditable rich-text editors (eBay's description, etc.):
215
- - `type_text("body content", frame="iframe.selector")` — same-origin only. Focuses the iframe's contenteditable, types via CDP, dispatches input/change in the iframe's context so React reads the new value.
216
- 5. `find_and_highlight(hint, "Click here — I'll fill it in")` (no `valueToType`) then
217
- `wait_for_click()` — the user's click focuses the field and `fill_input`'s active-element
218
- fallback fills it automatically
219
- 6. Call `clear_overlays()` after `fill_input` succeeds
220
- 7. Only use `valueToType` when the user must personally type the value (password, personal data)
221
-
222
- **`click_element` returned success but the page didn't change** (common on React-heavy sites where synthetic clicks no-op):
223
- Pass an `until_*` clause to require an observable post-click condition. `click_element` returns success=false if the condition isn't met within `until_timeout_ms` (default 5000):
224
- ```
225
- click_element("List with displayed fees", until_url_contains="/listing-published")
226
- click_element("Save", until_selector=".success-toast")
227
- click_element("Confirm", until_text_contains="Order placed")
228
- ```
229
- If success=false: try `react_set_input` to fire the click via the page's own React handler, or use `execute_script("document.querySelector(...).click()")` directly.
230
-
231
- **`set_file_input` not committing on rapid back-to-back uploads:**
232
- The default 3000ms commit-wait is enough for most uploaders. For batch photo uploads on slow react file handlers (eBay's 25-photo carousel, Stripe Connect document upload), increase `wait_ms` to 6000–8000 OR pass `verify_selector` pointing at the thumbnail/Remove-button that should appear:
233
- ```
234
- set_file_input("Photos", "/path/1.jpg", verify_selector=".photo-thumbnail:nth-of-type(1)")
235
- set_file_input("Photos", "/path/2.jpg", verify_selector=".photo-thumbnail:nth-of-type(2)")
236
- ```
237
- The page-level file count is reported in the response — use it to spot uploaders that consume-and-reset the input vs uploaders that keep the file there.
238
-
239
- **Waiting for async results** (build, save, deploy): `wait_for_selector(selector, timeout)` — never poll with screenshots. `wait_for_selector` pierces open shadow roots, so a selector inside a web component (Outlier task UI, Lit/Stencil widget) matches without ceremony.
240
-
241
- **Waiting for a shadow host's tree to attach** (e.g. SPA route flips where `<my-host>` appears 10s before its shadow content hydrates, and `wait_for_selector("my-host")` resolves while `host.shadowRoot` is still null): pass `shadow_root=true`. The wait then requires the matched element's `.shadowRoot` to be non-null, not just for the host element to exist.
242
- ```
243
- wait_for_selector("iframe", shadow_root=true) — wait until the iframe both exists AND has an attached shadowRoot
244
- ```
245
-
246
- **Waiting for an existing region to update** (e.g. click Save, then get the confirmation toast; send a chat message, then get the reply): `wait_for_change(selector)` uses a MutationObserver on the element's subtree and returns its new text content as soon as the mutation settles. Prefer this over `wait_for_selector` + `get_page_text` when the element already exists and you just need its next state — one call instead of two, no polling.
247
-
248
- **Pre-filling `prompt()` and `confirm()` dialogs**: When a page action will trigger a JS
249
- dialog (e.g. "Save As" calling `prompt()`), call `set_dialog_response` BEFORE the action:
250
- ```
251
- set_dialog_response(type="prompt", value="my-filename") — next prompt() returns "my-filename"
252
- set_dialog_response(type="confirm", value="true") — next confirm() returns true
253
- ```
254
- Then trigger the action (e.g. `click_element("Save As")`). The response is consumed once.
255
-
256
- **React Select / custom styled dropdowns** (e.g. "Select..." components on DataAnnotation):
257
- `click_element` and `fill_input` do NOT work on these — they intercept native events. The cleanest path is `react_set_input` (which handles the prototype-from-instance setter for you) followed by a click on the filtered option:
258
-
259
- ```
260
- 1. react_set_input('input[id*="react-select-3-input"]', "Target Option")
261
- — sets the hidden combobox input via its own prototype's value-setter and dispatches the input event React's onChange listens for
262
- 2. (300ms pause for the dropdown to filter)
263
- 3. execute_script("document.querySelector('[id*=\"react-select-3-option-0\"]').click()")
264
- 4. Verify the control shows the selected value:
265
- execute_script("document.querySelector('[class*=\"singleValue\"]').textContent.trim()")
266
- ```
267
-
268
- If you must hand-roll this with `execute_script` (older React-Select versions, weird custom wrappers), prefer reading the prototype FROM the instance to avoid "Illegal invocation" inside iframes:
269
-
270
- ```js
271
- var input = document.querySelector('input[id*="react-select-3-input"]');
272
- input.focus();
273
- var setter = Object.getOwnPropertyDescriptor(Object.getPrototypeOf(input), 'value').set;
274
- setter.call(input, 'Target Option');
275
- input.dispatchEvent(new Event('input', { bubbles: true }));
276
- ```
277
-
278
- Fallback if the combobox approach doesn't work (older React Select versions):
279
- ```js
280
- var controls = document.querySelectorAll('[class*="control"]');
281
- controls[N].click();
282
- var allEls = document.querySelectorAll('*');
283
- for (var i = 0; i < allEls.length; i++) {
284
- if (allEls[i].textContent.trim() === 'Target Option' && allEls[i].children.length === 0) {
285
- allEls[i].dispatchEvent(new MouseEvent('mousedown', {bubbles: true}));
286
- allEls[i].click();
287
- break;
288
- }
289
- }
290
- ```
291
-
292
- **Page text with large embedded content** (e.g. uploaded log files previewed inline): full-page `get_page_text()` pagination becomes unwieldy. Scope to a specific section instead:
293
- ```
294
- get_page_text(selector=".section-3") — scope to a CSS selector
295
- get_page_text(selector="#upload-form") — scope to an id
296
- ```
297
- Use `execute_script("document.querySelectorAll('section').length")` to find structural selectors first.
298
-
299
- **Page content rendered as images** (e.g. qualification "Examples" tabs that show PNG screenshots
300
- instead of DOM text): `get_page_text()` returns nothing useful. Zoom out and screenshot instead:
301
-
302
- ```js
303
- // Shrink to fit wide content, then screenshot
304
- document.body.style.zoom = '0.4';
305
- // use take_screenshot() to read it
306
- // restore afterward:
307
- document.body.style.zoom = '1';
308
- ```
309
-
310
- **Downloads via `execute_script`**: Creating a Blob URL and clicking an anchor via
311
- `execute_script` sometimes fails due to CSP or timing. If a download doesn't trigger:
312
- 1. Retry the exact same `execute_script` call
313
- 2. If still failing, use `find_and_highlight` to show the user a download button to click manually
314
-
315
- **React-controlled native radios/checkboxes that don't update `checked`**: `click_element`
316
- auto-handles this for native `<input type=radio>` and `<input type=checkbox>` inputs
317
- (including labels that wrap or `for=` reference them). The flow:
318
- - If a radio is already `checked=true`, `click_element` skips the click — re-clicking can
319
- toggle it OFF on React forms whose `onChange` interprets the click as a deselect. The
320
- response says `"X — radio already checked, click skipped"`.
321
- - If the standard click fires but the input's `checked` state didn't change as expected
322
- (radio still unchecked, or checkbox didn't toggle), `click_element` automatically
323
- dispatches the full pointer-event chain (`pointerdown → mousedown → pointerup → mouseup
324
- → click`) on the input. The response says `"now checked (after pointer-chain fallback)"`.
325
-
326
- You only need to drop into `execute_script` for the no-native-input case below.
327
-
328
- **Shadow DOM `[role=radio]` / role-only custom radios silently no-op**: On sites like
329
- Outlier where the radio is a `[role=radio]` div with no underlying `<input>`,
330
- `click_element`'s native-input fallback can't help — the click target has no `.checked`
331
- property to verify. Two things must be true: (a) the element must be scrolled into view
332
- FIRST (`scrollIntoView({block:'center'})`), and (b) the full pointer-event chain must
333
- fire — not just `click()`:
334
- ```js
335
- ['pointerdown','mousedown','pointerup','mouseup','click'].forEach(t =>
336
- el.dispatchEvent(new MouseEvent(t, {bubbles: true, cancelable: true}))
337
- );
338
- ```
339
- After scroll, re-query the radio list — its length may change as more content becomes
340
- visible. Then verify `aria-checked === "true"` before moving on.
341
-
342
- **Visibility-detection overlays** (e.g. Multimango's "Content Hidden" black overlay):
343
- Some sites render a full-screen overlay when the tab loses focus, triggered by
344
- `document.visibilityState` / `document.hidden`. Chromeflow tab-switching triggers it.
345
- Workaround — remove the overlay and patch the APIs:
346
- ```js
347
- document.querySelectorAll('[style*="z-index: 99999"]').forEach(el => el.remove());
348
- Object.defineProperty(document, 'hidden', { get: () => false, configurable: true });
349
- Object.defineProperty(document, 'visibilityState', { get: () => 'visible', configurable: true });
350
- ['visibilitychange','blur'].forEach(t =>
351
- document.addEventListener(t, e => e.stopImmediatePropagation(), true)
352
- );
353
- ```
354
- Re-apply after every navigation.
355
-
356
- **Never use Bash to work around a stuck browser interaction.**
package/dist/index.js DELETED
@@ -1,92 +0,0 @@
1
- #!/usr/bin/env node
2
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
- import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
- import { WsBridge } from "./ws-bridge.js";
5
- import { registerBrowserTools } from "./tools/browser.js";
6
- import { registerHighlightTools } from "./tools/highlight.js";
7
- import { registerCaptureTools } from "./tools/capture.js";
8
- import { registerFlowTools } from "./tools/flow.js";
9
- import { runSetup, runUpdate, runUninstall } from "./setup.js";
10
- if (process.argv[2] === "setup") {
11
- runSetup().catch((err) => {
12
- console.error(err);
13
- process.exit(1);
14
- });
15
- } else if (process.argv[2] === "update") {
16
- runUpdate().catch((err) => {
17
- console.error(err);
18
- process.exit(1);
19
- });
20
- } else if (process.argv[2] === "uninstall") {
21
- runUninstall().catch((err) => {
22
- console.error(err);
23
- process.exit(1);
24
- });
25
- } else {
26
- main().catch((err) => {
27
- console.error("[chromeflow] Fatal error:", err);
28
- process.exit(1);
29
- });
30
- }
31
- async function main() {
32
- const bridge = new WsBridge();
33
- const server = new McpServer({
34
- name: "chromeflow",
35
- version: "0.1.14"
36
- });
37
- registerBrowserTools(server, bridge);
38
- registerHighlightTools(server, bridge);
39
- registerCaptureTools(server, bridge);
40
- registerFlowTools(server, bridge);
41
- server.prompt(
42
- "chromeflow-status",
43
- "Check if the chromeflow Chrome extension is connected and which tab is active",
44
- async () => {
45
- const connected = bridge.isConnected();
46
- if (!connected) {
47
- return {
48
- messages: [{
49
- role: "user",
50
- content: {
51
- type: "text",
52
- text: "Check chromeflow status. The Chrome extension is NOT connected. Tell the user to reload the chromeflow extension in chrome://extensions."
53
- }
54
- }]
55
- };
56
- }
57
- try {
58
- const response = await bridge.request({ type: "list_tabs" }, 3e3);
59
- const tabs = response.tabs;
60
- const active = tabs.find((t) => t.active);
61
- const tabList = tabs.map((t) => `${t.index}. ${t.active ? "[active] " : ""}${t.title} \u2014 ${t.url}`).join("\n");
62
- return {
63
- messages: [{
64
- role: "user",
65
- content: {
66
- type: "text",
67
- text: `Check chromeflow status.
68
-
69
- Extension: Connected
70
- Active tab: ${active?.title ?? "none"} \u2014 ${active?.url ?? ""}
71
- All tabs:
72
- ${tabList}`
73
- }
74
- }]
75
- };
76
- } catch {
77
- return {
78
- messages: [{
79
- role: "user",
80
- content: {
81
- type: "text",
82
- text: "Check chromeflow status. Extension is connected but not responding. The user may need to reload it."
83
- }
84
- }]
85
- };
86
- }
87
- }
88
- );
89
- const transport = new StdioServerTransport();
90
- await server.connect(transport);
91
- console.error("[chromeflow] MCP server running. Waiting for Claude...");
92
- }