chromeflow 0.8.0 → 0.9.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +43 -148
- package/bin/chromeflow.mjs +25916 -0
- package/package.json +35 -20
- package/CLAUDE.md +0 -370
- package/dist/index.js +0 -115
- package/dist/setup.js +0 -493
- package/dist/tools/browser.js +0 -404
- package/dist/tools/capture.js +0 -216
- package/dist/tools/flow.js +0 -436
- package/dist/tools/highlight.js +0 -70
- package/dist/types.js +0 -0
- package/dist/ws-bridge.js +0 -116
package/package.json
CHANGED
|
@@ -1,36 +1,50 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "chromeflow",
|
|
3
|
-
"version": "0.8
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.9.8",
|
|
4
|
+
"description": "MCP server for chromeflow — lets Claude Code or Codex CLI drive your real Chrome browser with sessions intact. Plugin install recommended; npx chromeflow for manual MCP wiring.",
|
|
5
5
|
"type": "module",
|
|
6
|
+
"main": "./bin/chromeflow.mjs",
|
|
6
7
|
"bin": {
|
|
7
|
-
"chromeflow": "
|
|
8
|
+
"chromeflow": "./bin/chromeflow.mjs"
|
|
8
9
|
},
|
|
9
10
|
"files": [
|
|
10
|
-
"
|
|
11
|
-
"
|
|
12
|
-
"
|
|
11
|
+
"bin/",
|
|
12
|
+
"README.md",
|
|
13
|
+
"LICENSE"
|
|
13
14
|
],
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
|
|
15
|
+
"engines": {
|
|
16
|
+
"node": ">=20"
|
|
17
|
+
},
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "../plugin/scripts/build-server.sh && mkdir -p bin && cp ../plugin/server/chromeflow.mjs bin/chromeflow.mjs",
|
|
20
|
+
"dev": "tsc --watch"
|
|
17
21
|
},
|
|
18
|
-
"homepage": "https://github.com/NeoDrew/chromeflow",
|
|
19
|
-
"license": "MIT",
|
|
20
22
|
"keywords": [
|
|
21
23
|
"mcp",
|
|
22
|
-
"
|
|
24
|
+
"model-context-protocol",
|
|
23
25
|
"claude-code",
|
|
24
|
-
"
|
|
25
|
-
"automation",
|
|
26
|
-
"chrome"
|
|
26
|
+
"codex-cli",
|
|
27
|
+
"browser-automation",
|
|
28
|
+
"chrome-extension",
|
|
29
|
+
"ai-agent",
|
|
30
|
+
"agent-tooling",
|
|
31
|
+
"playwright-alternative",
|
|
32
|
+
"browser-use-alternative",
|
|
33
|
+
"puppeteer-alternative",
|
|
34
|
+
"web-automation",
|
|
35
|
+
"stripe",
|
|
36
|
+
"canvas",
|
|
37
|
+
"real-chrome-session"
|
|
27
38
|
],
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"dev": "tsc --watch"
|
|
39
|
+
"repository": {
|
|
40
|
+
"type": "git",
|
|
41
|
+
"url": "git+https://gitlab.com/NeoDrew/chromeflow.git",
|
|
42
|
+
"directory": "packages/mcp-server"
|
|
33
43
|
},
|
|
44
|
+
"homepage": "https://chromeflow.run",
|
|
45
|
+
"bugs": "https://gitlab.com/NeoDrew/chromeflow/-/issues",
|
|
46
|
+
"license": "MIT",
|
|
47
|
+
"author": "Andrew Robertson (NeoDrewX)",
|
|
34
48
|
"dependencies": {
|
|
35
49
|
"@modelcontextprotocol/sdk": "^1.0.0",
|
|
36
50
|
"ws": "^8.18.0",
|
|
@@ -39,6 +53,7 @@
|
|
|
39
53
|
"devDependencies": {
|
|
40
54
|
"@types/node": "^22.0.0",
|
|
41
55
|
"@types/ws": "^8.5.0",
|
|
56
|
+
"esbuild": "^0.24.0",
|
|
42
57
|
"typescript": "^5.5.0"
|
|
43
58
|
}
|
|
44
59
|
}
|
package/CLAUDE.md
DELETED
|
@@ -1,370 +0,0 @@
|
|
|
1
|
-
# Chromeflow — Claude Instructions
|
|
2
|
-
|
|
3
|
-
## What chromeflow is
|
|
4
|
-
Chromeflow is a browser guidance tool. When a task requires the user to interact with a
|
|
5
|
-
website (create accounts, set up billing, retrieve API keys, configure third-party services),
|
|
6
|
-
use chromeflow to guide them through it visually instead of giving text instructions.
|
|
7
|
-
|
|
8
|
-
## When to use chromeflow (be proactive)
|
|
9
|
-
Use chromeflow automatically whenever a task requires:
|
|
10
|
-
- Creating or configuring a third-party account (Stripe, SendGrid, Supabase, Vercel, etc.)
|
|
11
|
-
- Retrieving API keys, secrets, or credentials to place in `.env`
|
|
12
|
-
- Setting up pricing tiers, webhooks, or service configuration in a web UI
|
|
13
|
-
- Any browser-based step that is blocking code work
|
|
14
|
-
|
|
15
|
-
Do NOT ask "should I open the browser?" — just do it. The user expects seamless handoff.
|
|
16
|
-
|
|
17
|
-
**Never end a response with a "you still need to" list of browser tasks.** If code changes are done and browser steps remain (e.g. creating a Stripe product, adding an env var), continue immediately with chromeflow — don't hand them back to the user.
|
|
18
|
-
|
|
19
|
-
## HARD RULES — never break these
|
|
20
|
-
|
|
21
|
-
1. **Never use Bash as a fallback for browser tasks.** If `click_element` fails, use
|
|
22
|
-
`scroll_page` then retry, or use `highlight_region` to show the user. Never use
|
|
23
|
-
`osascript`, `applescript`, or any shell command to control the browser.
|
|
24
|
-
|
|
25
|
-
2. **Never use `take_screenshot` to read page content.** After `scroll_page`, after
|
|
26
|
-
`click_element`, after navigation — always call `get_page_text`, not `take_screenshot`.
|
|
27
|
-
`get_page_text` returns up to 10,000 characters; if truncated it tells you the next
|
|
28
|
-
`startIndex` to paginate. When you only need to confirm a specific phrase is present,
|
|
29
|
-
prefer `find_text("phrase")` — it returns matches with context and selectors instead of
|
|
30
|
-
dumping the whole page. Screenshots are only for locating an element's pixel position
|
|
31
|
-
when DOM queries have already failed. Never take more than 1–2 screenshots in a row.
|
|
32
|
-
|
|
33
|
-
3. **Use `wait_for_selector` to wait for async page changes** (build completion, modals,
|
|
34
|
-
toasts). Never poll with repeated `take_screenshot` calls.
|
|
35
|
-
|
|
36
|
-
## Guided flow pattern
|
|
37
|
-
|
|
38
|
-
```
|
|
39
|
-
1. open_page(url) — navigate to the right page (add new_tab=true to keep current tab open; add background=true to keep the current tab focused if its form auto-saves on blur)
|
|
40
|
-
2. For each step:
|
|
41
|
-
a. Claude acts directly:
|
|
42
|
-
click_element("Save") — press buttons/links Claude can press
|
|
43
|
-
click_element("Save", until_selector=".success-toast") — when synthetic clicks may silently no-op on a React-heavy site, require an observable post-click condition (or until_url_contains / until_text_contains)
|
|
44
|
-
get_page_text() or wait_for_selector(".success") — confirm after click without an until-clause; click_element returns after 600ms regardless of outcome unless until_* was used
|
|
45
|
-
fill_form([{label, value}, ...], exact=true) — fill multiple fields in one call; pass exact=true on dense forms to refuse fuzzy text-walk matches
|
|
46
|
-
fill_input("Product name", "Pro") — fill a single field (works on React, CodeMirror, and contenteditable). Always check the response — it names the matched element so you can spot wrong-field matches
|
|
47
|
-
fill_input("Rate", "5", exact=true) — exact-match mode for short generic labels that may collide with neighbouring fields
|
|
48
|
-
react_set_input("input[name=email]", "x@y") — for inputs where fill_input fails (or for iframe-hosted inputs via frame=...) — handles the prototype-from-instance gotcha automatically
|
|
49
|
-
type_text("hello world") — type via trusted keyboard events (use when fill_input fails isTrusted checks)
|
|
50
|
-
type_text("description", frame="iframe.se-rte") — type into a same-origin iframe's contenteditable (eBay description editor pattern)
|
|
51
|
-
set_file_input("Upload", "/abs/path/to/file.zip") — upload a file; returns success only after the upload is observably committed (no manual sleep needed between rapid uploads)
|
|
52
|
-
clear_overlays() — call this immediately after fill_input/fill_form succeeds
|
|
53
|
-
scroll_to_element("label text") — jump directly to a known field; prefer this over scroll_page when the target is known
|
|
54
|
-
scroll_page("down") — reveal off-screen content when target location is unknown
|
|
55
|
-
b. Check results with text, not vision:
|
|
56
|
-
get_page_text() — read errors/status after actions
|
|
57
|
-
wait_for_selector(".success") — wait for a new element to appear
|
|
58
|
-
wait_for_change(".toast") — wait for an existing element's content to mutate, then read it (uses MutationObserver, cheaper than polling)
|
|
59
|
-
execute_script("return await fetch('/api/x').then(r => r.json())") — top-level await is supported, no window.__variable + sleep dance needed
|
|
60
|
-
c. When an element can't be found or clicked:
|
|
61
|
-
scroll_page("down") and retry — always try this first
|
|
62
|
-
get_elements() — get EXACT DOM coords when needed
|
|
63
|
-
highlight_region(selector,msg) — highlight by CSS selector (preferred; scrolls element into view automatically)
|
|
64
|
-
highlight_region(x,y,w,h,msg) — highlight by coords only if no selector available (coords go stale on scroll)
|
|
65
|
-
[absolute last resort] take_screenshot() — only if you genuinely can't identify the element from DOM
|
|
66
|
-
d. Pause for the user when needed:
|
|
67
|
-
find_and_highlight(text, msg) — show the user what to do
|
|
68
|
-
wait_for_click() — wait for user interaction
|
|
69
|
-
[after fill_input] clear_overlays() — always clear after filling
|
|
70
|
-
3. clear_overlays() — clean up when done
|
|
71
|
-
```
|
|
72
|
-
|
|
73
|
-
**Default to automation.** Only pause for human input when the step genuinely requires
|
|
74
|
-
personal data or a human decision.
|
|
75
|
-
|
|
76
|
-
## What to do automatically vs pause for the user
|
|
77
|
-
|
|
78
|
-
**Claude acts directly** (`click_element` / `fill_input`):
|
|
79
|
-
- Any button: Save, Continue, Create, Add, Confirm, Next, Submit, Update
|
|
80
|
-
- Product names, descriptions, feature lists
|
|
81
|
-
- Prices and amounts specified in the task
|
|
82
|
-
- URLs, redirect URIs, webhook endpoints
|
|
83
|
-
- Selecting billing period, currency, or other known options
|
|
84
|
-
- Dismissing cookie banners, cookie dialogs, "not now" prompts
|
|
85
|
-
|
|
86
|
-
**Pause for the user** (`find_and_highlight` + `wait_for_click`):
|
|
87
|
-
- Email address / username / login
|
|
88
|
-
- Password or passphrase
|
|
89
|
-
- Payment method / billing / card details
|
|
90
|
-
- Phone number / 2FA / OTP codes
|
|
91
|
-
- Any legal consent the user must personally accept
|
|
92
|
-
- Choices that depend on user preference Claude wasn't told
|
|
93
|
-
|
|
94
|
-
## Capturing credentials
|
|
95
|
-
After a secret key or API key is revealed:
|
|
96
|
-
1. `read_element(hint)` — capture the value
|
|
97
|
-
2. `write_to_env(KEY_NAME, value, envPath)` — write to `.env`
|
|
98
|
-
3. Tell the user what was written
|
|
99
|
-
|
|
100
|
-
Use the absolute path for `envPath` — it's the Claude Code working directory + `/.env`.
|
|
101
|
-
|
|
102
|
-
To capture and share a screenshot (e.g. for uploading to a form or pasting into a chat),
|
|
103
|
-
use `take_screenshot(copy_to_clipboard=true, save_to="downloads")` — saves a PNG to ~/Downloads
|
|
104
|
-
and copies it to the clipboard. The defaults (`copy_to_clipboard=false, save_to="none"`) return
|
|
105
|
-
the image to Claude only.
|
|
106
|
-
|
|
107
|
-
## Working with complex forms
|
|
108
|
-
- Before filling a large or unfamiliar form, call `get_form_fields()` to get a full inventory
|
|
109
|
-
of every field (type, label, current value, vertical position, and section heading). Use
|
|
110
|
-
`get_elements()` when you need pixel coordinates of visible elements; use `get_form_fields()`
|
|
111
|
-
when you need to understand the full structure of a form including fields below the fold.
|
|
112
|
-
If you only need one or two specific fields, use `find_input("hint")` instead — targeted
|
|
113
|
-
lookup is much cheaper than the full inventory and returns labels you can pipe straight
|
|
114
|
-
into `fill_input`.
|
|
115
|
-
- `get_form_fields()` includes `[type=file]` fields even when they are visually hidden behind
|
|
116
|
-
custom drag-and-drop zones. Use `set_file_input(hint, filePath)` to upload a file — provide
|
|
117
|
-
the label/hint text and the absolute path to the file on disk.
|
|
118
|
-
- For forms with multiple fields, use `fill_form([{label, value}, ...])` to fill them all
|
|
119
|
-
in a single call. It returns a per-field success/failure report so you can immediately see
|
|
120
|
-
which fields weren't found. Use `fill_input` only for a single field.
|
|
121
|
-
- `fill_input` and `fill_form` work on React-controlled inputs, contenteditable (Stripe,
|
|
122
|
-
Notion), and **CodeMirror 6 editors** — auto-detected. After filling, the value is read
|
|
123
|
-
back and a warning is shown if React did not accept it.
|
|
124
|
-
- **Monaco editors** (VS Code-style code editors on DataAnnotation, etc.) appear in
|
|
125
|
-
`get_form_fields()` as type "monaco". They cannot be filled via `fill_input` — use
|
|
126
|
-
`execute_script` with the Monaco API instead:
|
|
127
|
-
```js
|
|
128
|
-
// Read content from the first Monaco model
|
|
129
|
-
monaco.editor.getModels()[0].getValue()
|
|
130
|
-
// Write content to the first Monaco model
|
|
131
|
-
monaco.editor.getModels()[0].setValue('new content here')
|
|
132
|
-
```
|
|
133
|
-
- `set_file_input` accepts CSS selectors as the hint (e.g. `#import-problem-file`,
|
|
134
|
-
`.upload-input`) in addition to label text. Use selectors when file inputs are hidden
|
|
135
|
-
behind custom UIs and have no visible label.
|
|
136
|
-
- **Replacing an already-uploaded file**: after `set_file_input` succeeds, the input
|
|
137
|
-
becomes invisible and a "Remove" span/button typically appears near the upload area.
|
|
138
|
-
To replace the file: `click_element("Remove", nth=N)` (the right `nth` if there are
|
|
139
|
-
multiple), then call `set_file_input(hint, newPath)` again — the same hidden input is
|
|
140
|
-
recycled and accepts the new file. Verify with `get_form_fields()` between the two
|
|
141
|
-
steps so you're sure the input has reappeared.
|
|
142
|
-
- **Forcing auto-save on idempotent text edits** (e.g. keep-alive loop on an
|
|
143
|
-
auto-saving DataAnnotation form): some auto-save logic diffs against the last-saved
|
|
144
|
-
value and skips no-op writes. To force a real save on each tick without changing
|
|
145
|
-
visible content, toggle a trailing space — add when absent, remove when present.
|
|
146
|
-
`fill_input` value comparison handles both directions transparently. **Caveat:**
|
|
147
|
-
long-running heartbeats that toggle whitespace on a real form field have been
|
|
148
|
-
observed to drift other fields' React state out of sync (the re-render reset a
|
|
149
|
-
separate radio's checked state to the form-level store value). For heartbeat
|
|
150
|
-
loops, prefer writing to `localStorage` via `execute_script` instead — the
|
|
151
|
-
auto-save handler usually fires on any input event, but `localStorage` writes
|
|
152
|
-
don't perturb React state at all.
|
|
153
|
-
- After any radio/checkbox click that reveals new fields, call `get_form_fields()` again —
|
|
154
|
-
the inventory will include the new fields and warn if more hidden ones still exist.
|
|
155
|
-
- If a form has collapsible sections, expand them all before calling `get_form_fields()` so
|
|
156
|
-
the field list is complete. Use the `[under: "section name"]` context in each field's entry
|
|
157
|
-
to identify fields by section rather than by index — indices shift when sections expand.
|
|
158
|
-
- Prefer `scroll_to_element("label text or #selector")` over `scroll_page` whenever you know
|
|
159
|
-
which field or section you need — it scrolls precisely and confirms the matched element.
|
|
160
|
-
- For multi-session tasks (long forms that may exceed context), call `save_page_state()` as a
|
|
161
|
-
checkpoint. A future session can call `restore_page_state()` to reload all field values.
|
|
162
|
-
|
|
163
|
-
## Discovery — find without dumping the whole page
|
|
164
|
-
|
|
165
|
-
Three lightweight tools save tokens vs `get_page_text` / `get_form_fields` when you don't need the full content:
|
|
166
|
-
|
|
167
|
-
- `find_text("Saved successfully")` — grep the DOM. Returns surrounding context, a CSS selector, and a `clickable` flag for each match. Use this instead of `get_page_text` when you're checking whether a specific phrase is present, or to locate a button by its visible text. If `clickable=true`, pipe the matched text straight into `click_element`.
|
|
168
|
-
- `find_input("Email")` — fuzzy form-field lookup, top-N. Returns labels you can pipe straight into `fill_input(label, value)` — both tools share the same match ranks (`aria-eq` → `placeholder-eq` → `label-text-eq` → `name-eq` → `id-eq` → `*-includes` → `fuzzy-text-walk`). Cheaper than `get_form_fields` when you just need a couple of specific fields. Pass `type_filter="email"` to restrict to a specific input type.
|
|
169
|
-
- `wait_for_text("Saved")` — wait for text to appear without knowing the selector ahead of time. Complements `wait_for_selector` for the case where you only know the post-action message.
|
|
170
|
-
|
|
171
|
-
All three pierce open shadow roots and accept `frame="iframe.selector"` for same-origin iframes. Pass `regex=true` on `find_text` / `wait_for_text` for case-insensitive regex matching. Pass `exact=true` on `find_input` to refuse fuzzy text-walk matches.
|
|
172
|
-
|
|
173
|
-
```
|
|
174
|
-
find_text("Build complete", scope_selector=".log-output") — only check the build log section
|
|
175
|
-
find_input("Card number", type_filter="text") — find Stripe's card-number field
|
|
176
|
-
wait_for_text("Deploy successful", timeout_ms=30000) — wait up to 30s after clicking Deploy
|
|
177
|
-
```
|
|
178
|
-
|
|
179
|
-
Reach for these BEFORE `get_page_text` / `get_form_fields` when the goal is "is X here?" or "where is X?". Reserve `get_page_text` for reading actual content, and `get_form_fields` for understanding a whole form's structure.
|
|
180
|
-
|
|
181
|
-
## Working with multiple tabs
|
|
182
|
-
- Before opening a new tab, call `list_tabs()` to check if the target URL is already open —
|
|
183
|
-
use `switch_to_tab` to return to it instead of opening a duplicate.
|
|
184
|
-
- `open_page(url, new_tab=true)` opens a URL without losing the current tab. Use sparingly —
|
|
185
|
-
prefer switching to an existing tab over opening a new one.
|
|
186
|
-
- `switch_to_tab("1")` switches by tab number; `switch_to_tab("form")` matches by URL or title substring.
|
|
187
|
-
- Before navigating away from a partially-filled form, call `save_page_state()` so the form
|
|
188
|
-
can be restored if the tab reloads or the page loses its state on return.
|
|
189
|
-
- **In long-lived self-rescheduling loops**, the active tab can silently drift mid-session
|
|
190
|
-
(the user navigates manually while AFK, or another tab steals focus). At the start of
|
|
191
|
-
every loop iteration, call `list_tabs` and verify the active tab's URL matches your
|
|
192
|
-
expected target — if not, `switch_to_tab(<URL or title substring>)` before running
|
|
193
|
-
`execute_script` or any other tab-scoped tool. Without this guard, scripts run on the
|
|
194
|
-
wrong tab and fail with confusing "undefined" errors that look like page bugs.
|
|
195
|
-
|
|
196
|
-
## Error handling
|
|
197
|
-
|
|
198
|
-
**After any action**, confirm with `get_page_text()` or `wait_for_selector` — never take a
|
|
199
|
-
screenshot to check what happened.
|
|
200
|
-
|
|
201
|
-
**`click_element` not found:**
|
|
202
|
-
1. `scroll_page("down")` then retry `click_element`
|
|
203
|
-
2. `get_elements()` to get exact coords → `highlight_region(x,y,w,h,msg)`
|
|
204
|
-
3. `take_screenshot()` only if you still can't identify the element from DOM queries
|
|
205
|
-
|
|
206
|
-
**Multiple elements with the same label** (e.g. many "Remove" buttons):
|
|
207
|
-
`click_element("Remove", nth=3)` — use `nth` (1-based) to target the specific one by order top-to-bottom. Check `get_form_fields` or `get_page_text` first to determine which index corresponds to the right section.
|
|
208
|
-
|
|
209
|
-
**`fill_input` matched the wrong field** (always read the response — it names the matched element):
|
|
210
|
-
- If you wanted "Ad rate" and got back `<input name="title">`, the fuzzy text walker latched onto a neighbour. Retry with `exact=true` and a more specific hint, or use `react_set_input(selector, value)` with a precise CSS selector.
|
|
211
|
-
- The match-strength is reported as `aria-eq`, `placeholder-eq`, `name-eq`, `id-eq`, `label-text-eq`, or fuzzier kinds. Anything labeled `fuzzy-text-walk` or `*-includes` is the lowest-confidence kind — verify the matched element really was what you wanted.
|
|
212
|
-
|
|
213
|
-
**`fill_input` not found or rejected by the page:**
|
|
214
|
-
1. `click_element(hint)` to focus the field, then retry `fill_input`
|
|
215
|
-
2. `react_set_input("input[name=...]", value)` — uses the input's own prototype to set the value, dispatches input/change. Handles the "Illegal invocation" iframe gotcha and the prototype-from-instance ceremony for you.
|
|
216
|
-
3. If the site rejects programmatic input (isTrusted check, shadow DOM, custom editors):
|
|
217
|
-
- `click_element(hint)` to focus the field
|
|
218
|
-
- `execute_script("document.execCommand('selectAll')")` to clear existing content
|
|
219
|
-
- `type_text("new value")` — uses CDP trusted keyboard events that pass isTrusted checks
|
|
220
|
-
4. For iframe-hosted contenteditable rich-text editors (eBay's description, etc.):
|
|
221
|
-
- `type_text("body content", frame="iframe.selector")` — same-origin only. Focuses the iframe's contenteditable, types via CDP, dispatches input/change in the iframe's context so React reads the new value.
|
|
222
|
-
5. `find_and_highlight(hint, "Click here — I'll fill it in")` (no `valueToType`) then
|
|
223
|
-
`wait_for_click()` — the user's click focuses the field and `fill_input`'s active-element
|
|
224
|
-
fallback fills it automatically
|
|
225
|
-
6. Call `clear_overlays()` after `fill_input` succeeds
|
|
226
|
-
7. Only use `valueToType` when the user must personally type the value (password, personal data)
|
|
227
|
-
|
|
228
|
-
**`click_element` returned success but the page didn't change** (common on React-heavy sites where synthetic clicks no-op):
|
|
229
|
-
Pass an `until_*` clause to require an observable post-click condition. `click_element` returns success=false if the condition isn't met within `until_timeout_ms` (default 5000):
|
|
230
|
-
```
|
|
231
|
-
click_element("List with displayed fees", until_url_contains="/listing-published")
|
|
232
|
-
click_element("Save", until_selector=".success-toast")
|
|
233
|
-
click_element("Confirm", until_text_contains="Order placed")
|
|
234
|
-
```
|
|
235
|
-
If success=false: try `react_set_input` to fire the click via the page's own React handler, or use `execute_script("document.querySelector(...).click()")` directly.
|
|
236
|
-
|
|
237
|
-
**`click_element` timed out (the WS request, not until-polling)**: the message will say "the click MAY have already fired". On a busy React reconciliation, the click does land but the response read can outrun the 30s WS timeout. Don't blindly retry — re-clicking can toggle React radios OFF or fire a duplicate submit. Verify with `get_page_text`, `wait_for_selector`, or `wait_for_text` first; only retry if the page state confirms the click never took effect.
|
|
238
|
-
|
|
239
|
-
**Modal never opens / submit handler swallowed by stale validation state**: when a Submit button's onClick opens a modal that never renders (e.g. validation thinks the form is incomplete because the form-level React state is stale, but DOM inputs look filled), use `react_call_prop` to call the bypass handler directly:
|
|
240
|
-
```
|
|
241
|
-
react_call_prop("input[name=justification]", "handleForceSubmitConfirmation", ["my justification text"])
|
|
242
|
-
```
|
|
243
|
-
Walks up the React fiber from the selector, finds the nearest component with a prop function of the given name, and calls it with the JSON-serializable args. Returns the component name and stringified return value so you can verify the right handler ran.
|
|
244
|
-
|
|
245
|
-
**`set_file_input` not committing on rapid back-to-back uploads:**
|
|
246
|
-
The default 3000ms commit-wait is enough for most uploaders. For batch photo uploads on slow react file handlers (eBay's 25-photo carousel, Stripe Connect document upload), increase `wait_ms` to 6000–8000 OR pass `verify_selector` pointing at the thumbnail/Remove-button that should appear:
|
|
247
|
-
```
|
|
248
|
-
set_file_input("Photos", "/path/1.jpg", verify_selector=".photo-thumbnail:nth-of-type(1)")
|
|
249
|
-
set_file_input("Photos", "/path/2.jpg", verify_selector=".photo-thumbnail:nth-of-type(2)")
|
|
250
|
-
```
|
|
251
|
-
The page-level file count is reported in the response — use it to spot uploaders that consume-and-reset the input vs uploaders that keep the file there.
|
|
252
|
-
|
|
253
|
-
**Waiting for async results** (build, save, deploy): `wait_for_selector(selector, timeout)` — never poll with screenshots. `wait_for_selector` pierces open shadow roots, so a selector inside a web component (Outlier task UI, Lit/Stencil widget) matches without ceremony.
|
|
254
|
-
|
|
255
|
-
**Waiting for a shadow host's tree to attach** (e.g. SPA route flips where `<my-host>` appears 10s before its shadow content hydrates, and `wait_for_selector("my-host")` resolves while `host.shadowRoot` is still null): pass `shadow_root=true`. The wait then requires the matched element's `.shadowRoot` to be non-null, not just for the host element to exist.
|
|
256
|
-
```
|
|
257
|
-
wait_for_selector("iframe", shadow_root=true) — wait until the iframe both exists AND has an attached shadowRoot
|
|
258
|
-
```
|
|
259
|
-
|
|
260
|
-
**Waiting for an existing region to update** (e.g. click Save, then get the confirmation toast; send a chat message, then get the reply): `wait_for_change(selector)` uses a MutationObserver on the element's subtree and returns its new text content as soon as the mutation settles. Prefer this over `wait_for_selector` + `get_page_text` when the element already exists and you just need its next state — one call instead of two, no polling.
|
|
261
|
-
|
|
262
|
-
**Pre-filling `prompt()` and `confirm()` dialogs**: When a page action will trigger a JS
|
|
263
|
-
dialog (e.g. "Save As" calling `prompt()`), call `set_dialog_response` BEFORE the action:
|
|
264
|
-
```
|
|
265
|
-
set_dialog_response(type="prompt", value="my-filename") — next prompt() returns "my-filename"
|
|
266
|
-
set_dialog_response(type="confirm", value="true") — next confirm() returns true
|
|
267
|
-
```
|
|
268
|
-
Then trigger the action (e.g. `click_element("Save As")`). The response is consumed once.
|
|
269
|
-
|
|
270
|
-
**React Select / custom styled dropdowns** (e.g. "Select..." components on DataAnnotation):
|
|
271
|
-
`click_element` and `fill_input` do NOT work on these — they intercept native events. The cleanest path is `react_set_input` (which handles the prototype-from-instance setter for you) followed by a click on the filtered option:
|
|
272
|
-
|
|
273
|
-
```
|
|
274
|
-
1. react_set_input('input[id*="react-select-3-input"]', "Target Option")
|
|
275
|
-
— sets the hidden combobox input via its own prototype's value-setter and dispatches the input event React's onChange listens for
|
|
276
|
-
2. (300ms pause for the dropdown to filter)
|
|
277
|
-
3. execute_script("document.querySelector('[id*=\"react-select-3-option-0\"]').click()")
|
|
278
|
-
4. Verify the control shows the selected value:
|
|
279
|
-
execute_script("document.querySelector('[class*=\"singleValue\"]').textContent.trim()")
|
|
280
|
-
```
|
|
281
|
-
|
|
282
|
-
If you must hand-roll this with `execute_script` (older React-Select versions, weird custom wrappers), prefer reading the prototype FROM the instance to avoid "Illegal invocation" inside iframes:
|
|
283
|
-
|
|
284
|
-
```js
|
|
285
|
-
var input = document.querySelector('input[id*="react-select-3-input"]');
|
|
286
|
-
input.focus();
|
|
287
|
-
var setter = Object.getOwnPropertyDescriptor(Object.getPrototypeOf(input), 'value').set;
|
|
288
|
-
setter.call(input, 'Target Option');
|
|
289
|
-
input.dispatchEvent(new Event('input', { bubbles: true }));
|
|
290
|
-
```
|
|
291
|
-
|
|
292
|
-
Fallback if the combobox approach doesn't work (older React Select versions):
|
|
293
|
-
```js
|
|
294
|
-
var controls = document.querySelectorAll('[class*="control"]');
|
|
295
|
-
controls[N].click();
|
|
296
|
-
var allEls = document.querySelectorAll('*');
|
|
297
|
-
for (var i = 0; i < allEls.length; i++) {
|
|
298
|
-
if (allEls[i].textContent.trim() === 'Target Option' && allEls[i].children.length === 0) {
|
|
299
|
-
allEls[i].dispatchEvent(new MouseEvent('mousedown', {bubbles: true}));
|
|
300
|
-
allEls[i].click();
|
|
301
|
-
break;
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
```
|
|
305
|
-
|
|
306
|
-
**Page text with large embedded content** (e.g. uploaded log files previewed inline): full-page `get_page_text()` pagination becomes unwieldy. Scope to a specific section instead:
|
|
307
|
-
```
|
|
308
|
-
get_page_text(selector=".section-3") — scope to a CSS selector
|
|
309
|
-
get_page_text(selector="#upload-form") — scope to an id
|
|
310
|
-
```
|
|
311
|
-
Use `execute_script("document.querySelectorAll('section').length")` to find structural selectors first.
|
|
312
|
-
|
|
313
|
-
**Page content rendered as images** (e.g. qualification "Examples" tabs that show PNG screenshots
|
|
314
|
-
instead of DOM text): `get_page_text()` returns nothing useful. Zoom out and screenshot instead:
|
|
315
|
-
|
|
316
|
-
```js
|
|
317
|
-
// Shrink to fit wide content, then screenshot
|
|
318
|
-
document.body.style.zoom = '0.4';
|
|
319
|
-
// use take_screenshot() to read it
|
|
320
|
-
// restore afterward:
|
|
321
|
-
document.body.style.zoom = '1';
|
|
322
|
-
```
|
|
323
|
-
|
|
324
|
-
**Downloads via `execute_script`**: Creating a Blob URL and clicking an anchor via
|
|
325
|
-
`execute_script` sometimes fails due to CSP or timing. If a download doesn't trigger:
|
|
326
|
-
1. Retry the exact same `execute_script` call
|
|
327
|
-
2. If still failing, use `find_and_highlight` to show the user a download button to click manually
|
|
328
|
-
|
|
329
|
-
**React-controlled native radios/checkboxes that don't update `checked`**: `click_element`
|
|
330
|
-
auto-handles this for native `<input type=radio>` and `<input type=checkbox>` inputs
|
|
331
|
-
(including labels that wrap or `for=` reference them). The flow:
|
|
332
|
-
- If a radio is already `checked=true`, `click_element` skips the click — re-clicking can
|
|
333
|
-
toggle it OFF on React forms whose `onChange` interprets the click as a deselect. The
|
|
334
|
-
response says `"X — radio already checked, click skipped"`.
|
|
335
|
-
- If the standard click fires but the input's `checked` state didn't change as expected
|
|
336
|
-
(radio still unchecked, or checkbox didn't toggle), `click_element` automatically
|
|
337
|
-
dispatches the full pointer-event chain (`pointerdown → mousedown → pointerup → mouseup
|
|
338
|
-
→ click`) on the input. The response says `"now checked (after pointer-chain fallback)"`.
|
|
339
|
-
|
|
340
|
-
You only need to drop into `execute_script` for the no-native-input case below.
|
|
341
|
-
|
|
342
|
-
**Shadow DOM `[role=radio]` / role-only custom radios silently no-op**: On sites like
|
|
343
|
-
Outlier where the radio is a `[role=radio]` div with no underlying `<input>`,
|
|
344
|
-
`click_element`'s native-input fallback can't help — the click target has no `.checked`
|
|
345
|
-
property to verify. Two things must be true: (a) the element must be scrolled into view
|
|
346
|
-
FIRST (`scrollIntoView({block:'center'})`), and (b) the full pointer-event chain must
|
|
347
|
-
fire — not just `click()`:
|
|
348
|
-
```js
|
|
349
|
-
['pointerdown','mousedown','pointerup','mouseup','click'].forEach(t =>
|
|
350
|
-
el.dispatchEvent(new MouseEvent(t, {bubbles: true, cancelable: true}))
|
|
351
|
-
);
|
|
352
|
-
```
|
|
353
|
-
After scroll, re-query the radio list — its length may change as more content becomes
|
|
354
|
-
visible. Then verify `aria-checked === "true"` before moving on.
|
|
355
|
-
|
|
356
|
-
**Visibility-detection overlays** (e.g. Multimango's "Content Hidden" black overlay):
|
|
357
|
-
Some sites render a full-screen overlay when the tab loses focus, triggered by
|
|
358
|
-
`document.visibilityState` / `document.hidden`. Chromeflow tab-switching triggers it.
|
|
359
|
-
Workaround — remove the overlay and patch the APIs:
|
|
360
|
-
```js
|
|
361
|
-
document.querySelectorAll('[style*="z-index: 99999"]').forEach(el => el.remove());
|
|
362
|
-
Object.defineProperty(document, 'hidden', { get: () => false, configurable: true });
|
|
363
|
-
Object.defineProperty(document, 'visibilityState', { get: () => 'visible', configurable: true });
|
|
364
|
-
['visibilitychange','blur'].forEach(t =>
|
|
365
|
-
document.addEventListener(t, e => e.stopImmediatePropagation(), true)
|
|
366
|
-
);
|
|
367
|
-
```
|
|
368
|
-
Re-apply after every navigation.
|
|
369
|
-
|
|
370
|
-
**Never use Bash to work around a stuck browser interaction.**
|
package/dist/index.js
DELETED
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
3
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
4
|
-
import { readFileSync } from "node:fs";
|
|
5
|
-
import { fileURLToPath } from "node:url";
|
|
6
|
-
import { WsBridge } from "./ws-bridge.js";
|
|
7
|
-
import { registerBrowserTools } from "./tools/browser.js";
|
|
8
|
-
import { registerHighlightTools } from "./tools/highlight.js";
|
|
9
|
-
import { registerCaptureTools } from "./tools/capture.js";
|
|
10
|
-
import { registerFlowTools } from "./tools/flow.js";
|
|
11
|
-
import { runSetup, runUpdate, runUninstall, runDoctor } from "./setup.js";
|
|
12
|
-
const PACKAGE_VERSION = (() => {
|
|
13
|
-
try {
|
|
14
|
-
const pkgPath = fileURLToPath(new URL("../package.json", import.meta.url));
|
|
15
|
-
return JSON.parse(readFileSync(pkgPath, "utf8")).version;
|
|
16
|
-
} catch {
|
|
17
|
-
return "unknown";
|
|
18
|
-
}
|
|
19
|
-
})();
|
|
20
|
-
if (process.argv[2] === "setup") {
|
|
21
|
-
runSetup().catch((err) => {
|
|
22
|
-
console.error(err);
|
|
23
|
-
process.exit(1);
|
|
24
|
-
});
|
|
25
|
-
} else if (process.argv[2] === "update") {
|
|
26
|
-
runUpdate().catch((err) => {
|
|
27
|
-
console.error(err);
|
|
28
|
-
process.exit(1);
|
|
29
|
-
});
|
|
30
|
-
} else if (process.argv[2] === "uninstall") {
|
|
31
|
-
runUninstall().catch((err) => {
|
|
32
|
-
console.error(err);
|
|
33
|
-
process.exit(1);
|
|
34
|
-
});
|
|
35
|
-
} else if (process.argv[2] === "doctor") {
|
|
36
|
-
runDoctor(PACKAGE_VERSION).catch((err) => {
|
|
37
|
-
console.error(err);
|
|
38
|
-
process.exit(1);
|
|
39
|
-
});
|
|
40
|
-
} else {
|
|
41
|
-
main().catch((err) => {
|
|
42
|
-
console.error("[chromeflow] Fatal error:", err);
|
|
43
|
-
process.exit(1);
|
|
44
|
-
});
|
|
45
|
-
}
|
|
46
|
-
async function main() {
|
|
47
|
-
const bridge = new WsBridge();
|
|
48
|
-
const server = new McpServer({
|
|
49
|
-
name: "chromeflow",
|
|
50
|
-
version: PACKAGE_VERSION
|
|
51
|
-
});
|
|
52
|
-
registerBrowserTools(server, bridge);
|
|
53
|
-
registerHighlightTools(server, bridge);
|
|
54
|
-
registerCaptureTools(server, bridge);
|
|
55
|
-
registerFlowTools(server, bridge);
|
|
56
|
-
const registered = server._registeredTools ?? {};
|
|
57
|
-
const toolNames = Object.keys(registered).sort();
|
|
58
|
-
console.error(`[chromeflow] v${PACKAGE_VERSION} \u2014 registered ${toolNames.length} tools`);
|
|
59
|
-
if (toolNames.length > 0) {
|
|
60
|
-
console.error(`[chromeflow] tools: ${toolNames.join(", ")}`);
|
|
61
|
-
} else {
|
|
62
|
-
console.error(`[chromeflow] WARNING: no tools registered. Try \`npx chromeflow doctor\`.`);
|
|
63
|
-
}
|
|
64
|
-
server.prompt(
|
|
65
|
-
"chromeflow-status",
|
|
66
|
-
"Check if the chromeflow Chrome extension is connected and which tab is active",
|
|
67
|
-
async () => {
|
|
68
|
-
const connected = bridge.isConnected();
|
|
69
|
-
if (!connected) {
|
|
70
|
-
return {
|
|
71
|
-
messages: [{
|
|
72
|
-
role: "user",
|
|
73
|
-
content: {
|
|
74
|
-
type: "text",
|
|
75
|
-
text: "Check chromeflow status. The Chrome extension is NOT connected. Tell the user to reload the chromeflow extension in chrome://extensions."
|
|
76
|
-
}
|
|
77
|
-
}]
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
try {
|
|
81
|
-
const response = await bridge.request({ type: "list_tabs" }, 3e3);
|
|
82
|
-
const tabs = response.tabs;
|
|
83
|
-
const active = tabs.find((t) => t.active);
|
|
84
|
-
const tabList = tabs.map((t) => `${t.index}. ${t.active ? "[active] " : ""}${t.title} \u2014 ${t.url}`).join("\n");
|
|
85
|
-
return {
|
|
86
|
-
messages: [{
|
|
87
|
-
role: "user",
|
|
88
|
-
content: {
|
|
89
|
-
type: "text",
|
|
90
|
-
text: `Check chromeflow status.
|
|
91
|
-
|
|
92
|
-
Extension: Connected
|
|
93
|
-
Active tab: ${active?.title ?? "none"} \u2014 ${active?.url ?? ""}
|
|
94
|
-
All tabs:
|
|
95
|
-
${tabList}`
|
|
96
|
-
}
|
|
97
|
-
}]
|
|
98
|
-
};
|
|
99
|
-
} catch {
|
|
100
|
-
return {
|
|
101
|
-
messages: [{
|
|
102
|
-
role: "user",
|
|
103
|
-
content: {
|
|
104
|
-
type: "text",
|
|
105
|
-
text: "Check chromeflow status. Extension is connected but not responding. The user may need to reload it."
|
|
106
|
-
}
|
|
107
|
-
}]
|
|
108
|
-
};
|
|
109
|
-
}
|
|
110
|
-
}
|
|
111
|
-
);
|
|
112
|
-
const transport = new StdioServerTransport();
|
|
113
|
-
await server.connect(transport);
|
|
114
|
-
console.error(`[chromeflow] v${PACKAGE_VERSION} MCP server running. Waiting for Claude...`);
|
|
115
|
-
}
|