chromeflow 0.9.11 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/chromeflow.mjs +202 -54
  2. package/package.json +1 -1
@@ -24664,6 +24664,11 @@ var WsBridge = class {
24664
24664
  } catch {
24665
24665
  return;
24666
24666
  }
24667
+ if (msg.type === "progress") {
24668
+ const pending = this.pending.get(msg.requestId);
24669
+ if (pending) pending.refresh();
24670
+ return;
24671
+ }
24667
24672
  if (msg.type === "ready") {
24668
24673
  console.error("[chromeflow] Extension ready");
24669
24674
  const cwd = process.cwd();
@@ -24677,14 +24682,14 @@ var WsBridge = class {
24677
24682
  }));
24678
24683
  return;
24679
24684
  }
24680
- const pending = this.pending.get(msg.requestId);
24681
- if (pending) {
24682
- clearTimeout(pending.timer);
24685
+ const pending2 = this.pending.get(msg.requestId);
24686
+ if (pending2) {
24687
+ clearTimeout(pending2.timer);
24683
24688
  this.pending.delete(msg.requestId);
24684
24689
  if (msg.type === "error") {
24685
- pending.reject(new Error(msg.message));
24690
+ pending2.reject(new Error(msg.message));
24686
24691
  } else {
24687
- pending.resolve(msg);
24692
+ pending2.resolve(msg);
24688
24693
  }
24689
24694
  }
24690
24695
  });
@@ -24720,11 +24725,23 @@ var WsBridge = class {
24720
24725
  }
24721
24726
  const requestId = crypto.randomUUID();
24722
24727
  return new Promise((resolve2, reject) => {
24723
- const timer = setTimeout(() => {
24728
+ let lastProgressAt = Date.now();
24729
+ const fire = () => {
24724
24730
  this.pending.delete(requestId);
24725
- reject(new Error(`Request timed out after ${timeoutMs}ms`));
24726
- }, timeoutMs);
24727
- this.pending.set(requestId, { resolve: resolve2, reject, timer });
24731
+ reject(new Error(`Request timed out after ${timeoutMs}ms (last progress ${Date.now() - lastProgressAt}ms ago). The operation may have completed on the page; verify state before retrying.`));
24732
+ };
24733
+ let timer = setTimeout(fire, timeoutMs);
24734
+ const refresh = () => {
24735
+ clearTimeout(timer);
24736
+ lastProgressAt = Date.now();
24737
+ timer = setTimeout(fire, timeoutMs);
24738
+ };
24739
+ this.pending.set(requestId, {
24740
+ resolve: resolve2,
24741
+ reject,
24742
+ timer,
24743
+ refresh
24744
+ });
24728
24745
  this.client.send(JSON.stringify({ ...message, requestId }));
24729
24746
  });
24730
24747
  }
@@ -24832,9 +24849,11 @@ Examples: switch_to_tab({tab: 1}) for the first tab, switch_to_tab({tab: "form"}
24832
24849
  };
24833
24850
  }
24834
24851
  const q = String(raw);
24835
- await bridge.request({ type: "switch_to_tab", query: q });
24852
+ const response = await bridge.request({ type: "switch_to_tab", query: q });
24853
+ const r = response;
24854
+ const echo = r.url ? ` \u2192 "${r.title ?? ""}" (${r.url})` : "";
24836
24855
  return {
24837
- content: [{ type: "text", text: `Switched to tab matching "${q}"` }]
24856
+ content: [{ type: "text", text: `Switched to tab matching "${q}"${echo}` }]
24838
24857
  };
24839
24858
  }
24840
24859
  );
@@ -24892,15 +24911,18 @@ ${keptList}` }]
24892
24911
  );
24893
24912
  server.tool(
24894
24913
  "take_screenshot",
24895
- `Capture a screenshot of the active tab. By default the image is returned to the agent inline UNLESS it exceeds ~500KB base64, in which case it's saved to a temp file and the path is returned instead (preserves the agent's context window). Set inline="always" to force inline regardless of size, or inline="never" to always write to a file. Set save_to or copy_to_clipboard to also share the image with the user. Reserved for cases where DOM lookup has already failed \u2014 use get_page_text and find_text for reading content.`,
24914
+ `Capture a screenshot of the active tab. By default the image is returned to the agent inline UNLESS it exceeds ~500KB base64, in which case it's saved to a temp file and the path is returned instead (preserves the agent's context window). Set inline="always" to force inline regardless of size, or inline="never" to always write to a file. Set save_to or copy_to_clipboard to also share the image with the user. Reserved for cases where DOM lookup has already failed \u2014 use get_page_text and find_text for reading content.
24915
+
24916
+ Refuses fast on pages that are in fullscreen mode (captureVisibleTab hangs there). Exit fullscreen first with execute_script("document.exitFullscreen()") or pass allow_fullscreen: true if you really must try anyway.`,
24896
24917
  {
24897
24918
  copy_to_clipboard: external_exports.boolean().optional().describe("Copy the PNG to the system clipboard (macOS only). Default false."),
24898
24919
  save_to: external_exports.enum(["downloads", "cwd", "none"]).optional().describe(`Save the PNG to disk: "downloads" (~/Downloads), "cwd" (the agent's working directory), or "none" (default \u2014 image returned only to the agent, no disk artifact).`),
24899
- inline: external_exports.enum(["auto", "always", "never"]).optional().describe('Whether to return the image base64 inline to the agent. "auto" (default): inline if under 500KB base64, otherwise write to a temp file and return the path. "always": inline regardless of size \u2014 large images may exceed the MCP token ceiling. "never": always return the path, never inline.')
24920
+ inline: external_exports.enum(["auto", "always", "never"]).optional().describe('Whether to return the image base64 inline to the agent. "auto" (default): inline if under 500KB base64, otherwise write to a temp file and return the path. "always": inline regardless of size \u2014 large images may exceed the MCP token ceiling. "never": always return the path, never inline.'),
24921
+ allow_fullscreen: external_exports.boolean().optional().describe("Bypass the fullscreen fast-fail. Default false. captureVisibleTab usually hangs in fullscreen mode and the request times out \u2014 set this only when you've confirmed the page can produce a screenshot in fullscreen.")
24900
24922
  },
24901
- async ({ copy_to_clipboard = false, save_to = "none", inline = "auto" }) => {
24923
+ async ({ copy_to_clipboard = false, save_to = "none", inline = "auto", allow_fullscreen }) => {
24902
24924
  const sharing = copy_to_clipboard || save_to !== "none";
24903
- const response = await bridge.request({ type: "screenshot", grid: !sharing });
24925
+ const response = await bridge.request({ type: "screenshot", grid: !sharing, allow_fullscreen });
24904
24926
  if (response.type !== "screenshot_response") {
24905
24927
  throw new Error("Unexpected response from extension");
24906
24928
  }
@@ -24928,8 +24950,10 @@ ${keptList}` }]
24928
24950
  } catch {
24929
24951
  }
24930
24952
  }
24953
+ const r = response;
24954
+ const meta = r.viewport && r.page && r.scroll ? ` viewport=${r.viewport.width}x${r.viewport.height}, page=${r.page.width}x${r.page.height}, scroll=(${r.scroll.x},${r.scroll.y}).` : "";
24931
24955
  if (shouldInline) {
24932
- const msg = notes.length ? notes.join(". ") + "." : `Screenshot captured (${response.width}x${response.height}, ${base64Len} base64 chars). Analyze the image to identify element positions for highlighting.`;
24956
+ const msg = notes.length ? notes.join(". ") + "." + meta : `Screenshot captured (${response.width}x${response.height}, ${base64Len} base64 chars).${meta} Analyze the image to identify element positions for highlighting.`;
24933
24957
  return {
24934
24958
  content: [
24935
24959
  { type: "image", data: response.image, mimeType: "image/png" },
@@ -24937,7 +24961,7 @@ ${keptList}` }]
24937
24961
  ]
24938
24962
  };
24939
24963
  }
24940
- notes.push(`Image saved to ${landedPath} (${response.width}x${response.height}, ~${Math.round(imageBuffer.byteLength / 1024)}KB) \u2014 Read the file or use OS image viewer. To force inline despite size, pass inline="always".`);
24964
+ notes.push(`Image saved to ${landedPath} (${response.width}x${response.height}, ~${Math.round(imageBuffer.byteLength / 1024)}KB).${meta} Read the file or use OS image viewer. To force inline despite size, pass inline="always".`);
24941
24965
  return {
24942
24966
  content: [{ type: "text", text: notes.join(". ") + "." }]
24943
24967
  };
@@ -25021,34 +25045,37 @@ The saved file path can be passed directly to set_file_input(hint, file_path) to
25021
25045
  "get_form_fields",
25022
25046
  `Inventory form fields on the active page (inputs, textareas, selects, CodeMirror editors). Sorted top-to-bottom by y-position; includes fields below the fold.
25023
25047
 
25024
- Pass \`query\` to filter+rank by label/placeholder/aria-label/name/id (the old find_input behavior \u2014 match strength reported as aria-eq / placeholder-eq / label-text-eq / name-eq / id-eq / *-includes / fuzzy-text-walk). Pass \`exact: true\` to refuse fuzzy text-walk matches.`,
25048
+ Pass \`query\` to filter+rank by label/placeholder/aria-label/name/id (the old find_input behavior \u2014 match strength reported as aria-eq / placeholder-eq / label-text-eq / name-eq / id-eq / *-includes / fuzzy-text-walk). Pass \`exact: true\` to refuse fuzzy text-walk matches.
25049
+
25050
+ Pass \`only_empty: true\` to filter the inventory to required-but-empty fields. This is the "why is Submit disabled" diagnostic: it returns just the required fields that haven't been filled yet (or radios/checkboxes still unchecked) and skips everything that's already populated. Required-ness is detected via the \`required\` attribute, \`aria-required\`, or a trailing \`*\` in the associated label text.`,
25025
25051
  {
25026
25052
  query: external_exports.string().optional().describe("If set, filter+rank fields by hint matching label/placeholder/aria-label/name/id."),
25027
25053
  max: external_exports.number().int().min(1).optional().describe("Maximum fields to return when query is set (default 5). Ignored without query (full inventory)."),
25028
25054
  type_filter: external_exports.string().optional().describe('Restrict to a specific input type (e.g. "email", "checkbox", "file"). Only with query.'),
25029
25055
  exact: external_exports.boolean().optional().describe("Refuse fuzzy text-walk and *-includes matches. Only with query."),
25030
- frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside. Cross-origin iframes are not supported.")
25056
+ frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside. Cross-origin iframes are not supported."),
25057
+ only_empty: external_exports.boolean().optional().describe("Filter inventory to required-but-empty fields only. Use as a Submit-disabled diagnostic. Ignored when query is set.")
25031
25058
  },
25032
- async ({ query, max, type_filter, exact, frame }) => {
25059
+ async ({ query, max, type_filter, exact, frame, only_empty }) => {
25033
25060
  if (query !== void 0) {
25034
25061
  const response2 = await bridge.request({ type: "find_input", query, type_filter, max, exact, frame });
25035
25062
  if (response2.type !== "find_input_response") throw new Error("Unexpected response");
25036
25063
  const r2 = response2;
25037
25064
  if (r2.frame_error) return { content: [{ type: "text", text: r2.frame_error }] };
25038
25065
  if (r2.fields.length === 0) return { content: [{ type: "text", text: `No form fields matched "${query}".` }] };
25039
- const header = `Found ${r2.fields.length}${r2.truncated ? ` of ${r2.total_matches}` : ""} input(s) for "${query}":`;
25066
+ const header2 = `Found ${r2.fields.length}${r2.truncated ? ` of ${r2.total_matches}` : ""} input(s) for "${query}":`;
25040
25067
  const lines2 = r2.fields.map((f, i) => {
25041
25068
  const ph = f.placeholder ? ` placeholder="${f.placeholder}"` : "";
25042
25069
  const val = f.value ? ` value="${f.value}"` : "";
25043
25070
  const under = f.under ? ` [under: "${f.under}"]` : "";
25044
25071
  return ` ${i + 1}. "${f.label}" type=${f.type}${ph}${val}${under} \u2014 match: ${f.match_kind}`;
25045
25072
  });
25046
- return { content: [{ type: "text", text: `${header}
25073
+ return { content: [{ type: "text", text: `${header2}
25047
25074
  ${lines2.join("\n")}
25048
25075
 
25049
25076
  To fill: fill_input("${r2.fields[0].label}", "<value>")` }] };
25050
25077
  }
25051
- const response = await bridge.request({ type: "get_form_fields" });
25078
+ const response = await bridge.request({ type: "get_form_fields", only_empty });
25052
25079
  if (response.type !== "form_fields_response") throw new Error("Unexpected response");
25053
25080
  const r = response;
25054
25081
  const fields = r.fields;
@@ -25059,20 +25086,25 @@ To fill: fill_input("${r2.fields[0].label}", "<value>")` }] };
25059
25086
 
25060
25087
  \u2139 OAuth providers detected on this form: ${r.oauthIndicators.join(", ")}. If the user wants to sign in via one of these, click it instead of filling email/password.` : "";
25061
25088
  if (fields.length === 0) {
25062
- return { content: [{ type: "text", text: "No form fields found on page." + (r.warning ?? "") + captchaLine + oauthLine }] };
25089
+ const empty = only_empty ? "No required-but-empty fields detected." : "No form fields found on page.";
25090
+ return { content: [{ type: "text", text: empty + (r.warning ?? "") + captchaLine + oauthLine }] };
25063
25091
  }
25064
25092
  const lines = fields.map((f) => {
25065
25093
  const val = f.value ? ` [currently: "${f.value}"]` : "";
25066
25094
  const ctx = f.context ? ` [under: "${f.context}"]` : "";
25067
- return `${f.index}. [${f.type}] "${f.label}"${val}${ctx} \u2014 y:${f.y}`;
25095
+ const req = f.required ? " *required" : "";
25096
+ return `${f.index}. [${f.type}] "${f.label}"${req}${val}${ctx} \u2014 y:${f.y}`;
25068
25097
  });
25069
- return { content: [{ type: "text", text: `Form fields (${fields.length} total, sorted top-to-bottom):
25098
+ const header = only_empty ? `Required-but-empty fields (${fields.length}):` : `Form fields (${fields.length} total, sorted top-to-bottom):`;
25099
+ return { content: [{ type: "text", text: `${header}
25070
25100
  ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
25071
25101
  }
25072
25102
  );
25073
25103
  server.tool(
25074
25104
  "type_text",
25075
- `Type text into the currently focused element via CDP keystrokes (produces isTrusted=true events). Use when fill_input fails because the page validates isTrusted (CodeMirror/Monaco/Ace editors, shadow DOM inputs, isTrusted-gated forms). Pass \`into_selector\` to focus the target before typing (shadow-piercing CSS) \u2014 combined with \`clear_first: true\`, this collapses the old "wait_for_click \u2192 execute_script selectAll \u2192 type_text" pattern into a single call. Pass \`frame: "iframe.selector"\` to type into a same-origin iframe's first editable element.`,
25105
+ `Type text into the currently focused element via CDP keystrokes (produces isTrusted=true events). Use when fill_input fails because the page validates isTrusted (CodeMirror/Monaco/Ace editors, shadow DOM inputs, isTrusted-gated forms). Pass \`into_selector\` to focus the target before typing (shadow-piercing CSS) \u2014 combined with \`clear_first: true\`, this collapses the old "wait_for_click \u2192 execute_script selectAll \u2192 type_text" pattern into a single call. Pass \`frame: "iframe.selector"\` to type into a same-origin iframe's first editable element.
25106
+
25107
+ **TipTap / ProseMirror auto-recovery**: when \`into_selector\` targets a contenteditable inside a \`.tiptap\` / \`.ProseMirror\` / \`[data-tiptap-editor]\` ancestor, type_text verifies post-type that the text actually landed. If tiptap's internal state machine silently dropped the CDP keystrokes (a known failure mode where the editor reverts to placeholder a few seconds later), type_text automatically re-fills via \`document.execCommand('insertText', ...)\` which tiptap accepts. The response message records "TipTap/ProseMirror silently dropped..., recovered via execCommand insertText" when this fired.`,
25076
25108
  {
25077
25109
  text: external_exports.string().describe("The text to type into the focused element"),
25078
25110
  into_selector: external_exports.string().optional().describe(
@@ -25086,7 +25118,7 @@ ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
25086
25118
  )
25087
25119
  },
25088
25120
  async ({ text, frame, into_selector, clear_first }) => {
25089
- const timeoutMs = Math.max(3e4, text.length * 90 + 15e3);
25121
+ const timeoutMs = Math.max(3e4, text.length * 110 + 15e3);
25090
25122
  const response = await bridge.request(
25091
25123
  { type: "type_text", text, frame, into_selector, clear_first },
25092
25124
  timeoutMs
@@ -25122,6 +25154,15 @@ ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
25122
25154
  "execute_script",
25123
25155
  `Execute JavaScript in a tab's MAIN world (the page's own context, not the extension's isolated world). Use for reading framework state or DOM properties not visible in text \u2014 prefer get_page_text for visible content. Top-level \`return\` and \`await\` are supported.
25124
25156
 
25157
+ **Object returns are auto-stringified** \u2014 return an object/array and the response carries its JSON. No need to wrap return values in JSON.stringify yourself.
25158
+
25159
+ **Shadow-piercing helpers are pre-injected** into every script:
25160
+ - \`$deep(selector, root?)\` \u2014 querySelector that walks open shadow roots
25161
+ - \`$deepAll(selector, root?)\` \u2014 querySelectorAll equivalent, returns an array
25162
+ - \`shadowDocument\` \u2014 the first attached open shadow root on the page, or \`document\` if none. Useful when an SPA mounts ALL of its UI inside a single root shadow host (Outlier-style annotation dashboards): replace every \`document.querySelector*\` call with \`shadowDocument.querySelector*\` and the same code now reaches the SPA's content.
25163
+
25164
+ The helpers pierce OPEN shadow roots only \u2014 MAIN world can't reach closed roots. For closed roots, use find_text / get_page_text / click_element / fill_input which pierce both kinds via chrome.dom.openOrClosedShadowRoot.
25165
+
25125
25166
  MAIN-world means the page's Content-Security-Policy applies: \`fetch()\` against authenticated APIs is often blocked by the page's connect-src directive. When that happens, switch to fetch_url \u2014 it runs in the extension's privileged context (full host_permissions, automatic cookie jar, no page CSP).
25126
25167
 
25127
25168
  CSP-strict pages that disallow eval (Stripe, GitHub) silently fall through to a CDP eval path. Page alerts (alert/confirm/prompt) fired since the last script appear as PAGE ALERT in the result.
@@ -25316,11 +25357,48 @@ Never use take_screenshot just to read page content \u2014 paginate with startIn
25316
25357
 
25317
25358
  ` + text;
25318
25359
  }
25360
+ if ((startIndex ?? 0) === 0 && r.viewport && r.page && r.scroll) {
25361
+ const footer = `
25362
+
25363
+ ---
25364
+ viewport: ${r.viewport.width}x${r.viewport.height}, page: ${r.page.width}x${r.page.height}, scroll: (${r.scroll.x}, ${r.scroll.y})`;
25365
+ text = text + footer;
25366
+ }
25319
25367
  return {
25320
25368
  content: [{ type: "text", text: text || "(no text found on page)" }]
25321
25369
  };
25322
25370
  }
25323
25371
  );
25372
+ server.tool(
25373
+ "get_page_html",
25374
+ `Get the raw HTML of the current page or a scoped element. Use when you need to parse structure (tables, attribute values, nested data) and \`get_page_text\` strips too much, or when you're extracting structured data from a page Claude can't easily reason about from text alone.
25375
+
25376
+ Pierces open AND closed shadow roots for the \`selector\` lookup (Radix portals, Stencil/Lit web components). \`<script>\`, \`<style>\`, \`<noscript>\` are stripped before returning.
25377
+
25378
+ Default \`max_chars\` is 50,000. If the page is bigger, the response carries \`truncated: true\` and \`total_chars\` so you can decide whether to scope further with \`selector\`.
25379
+
25380
+ When the goal is "is X on this page?" or "find clickable Y", use \`find_text\` instead \u2014 it returns a focused match list rather than a wall of HTML.`,
25381
+ {
25382
+ selector: external_exports.string().optional().describe(
25383
+ "CSS selector to scope the HTML to. Pierces closed shadow roots. Omit to return the main content area (or body)."
25384
+ ),
25385
+ max_chars: external_exports.number().int().min(1e3).optional().describe("Truncate after this many chars (default 50000). The response includes total_chars so you know if you missed anything.")
25386
+ },
25387
+ async ({ selector, max_chars }) => {
25388
+ const response = await bridge.request({ type: "get_page_html", selector, max_chars });
25389
+ if (response.type !== "page_html_response") throw new Error("Unexpected response");
25390
+ const r = response;
25391
+ const notes = [];
25392
+ if (r.selector_missed) notes.push(`selector "${selector}" not found, returning full body HTML`);
25393
+ if (r.selector_in_shadow) notes.push(`selector matched inside a closed shadow root`);
25394
+ if (r.truncated) notes.push(`truncated at ${max_chars ?? 5e4} of ${r.total_chars} chars; scope further with selector to see more`);
25395
+ const header = notes.length > 0 ? `[${notes.join("; ")}]
25396
+ ` : "";
25397
+ return {
25398
+ content: [{ type: "text", text: header + r.html }]
25399
+ };
25400
+ }
25401
+ );
25324
25402
  server.tool(
25325
25403
  "get_console_logs",
25326
25404
  `Read the browser console output (log, warn, error, info) captured since the page loaded.
@@ -25554,7 +25632,13 @@ ${r.body_text}` : "";
25554
25632
  function registerFlowTools(server, bridge) {
25555
25633
  server.tool(
25556
25634
  "click_element",
25557
- `Click an interactive element by its visible text or aria-label. Optionally pass an until_* clause to verify the click took effect:
25635
+ `Click an interactive element by its visible text/aria-label (textHint) OR by direct CSS selector (selector). Pass exactly one.
25636
+
25637
+ \`textHint\` mode: fuzzy-rank against visible text, aria-label, button content. Ranks visible candidates ahead of hidden.
25638
+
25639
+ \`selector\` mode: pierces open AND closed shadow roots via queryAllDeep. Use when the target has no visible text (icon buttons, custom-element placeholders like Reddit's collapsed comment composer, drop-zone overlays). Skips the textHint matcher entirely. \`nth\` still picks the Nth match.
25640
+
25641
+ Optionally pass an until_* clause to verify the click took effect:
25558
25642
  - until_selector \u2014 CSS selector that should appear after the click
25559
25643
  - until_url_contains \u2014 substring that should appear in the URL (requires an actual URL change if the substring was already in the pre-click URL)
25560
25644
  - until_text_contains \u2014 substring that should appear in page text
@@ -25563,14 +25647,17 @@ function registerFlowTools(server, bridge) {
25563
25647
 
25564
25648
  Returns {success, message, before_url, after_url, navigated}. \`navigated\` is true when the post-click URL differs from the pre-click URL \u2014 surfaces silent redirects without a second list_tabs call. Refuses to click 0\xD70 elements and now ranks visible candidates above hidden when text/aria match; when forced to refuse a hidden element it surfaces the next visible candidate in the error message.
25565
25649
 
25566
- Scope matching with \`within_selector\` or \`near_text\` restricts where matches are searched \u2014 useful for long forms with repeated labels per section (e.g. one "Minor Issue(s)" radio per evaluation axis). \`within_selector\` is a CSS selector; \`near_text\` finds the nearest container whose heading starts with the given text.
25650
+ Scope matching with \`within_selector\` or \`near_text\` restricts where matches are searched \u2014 useful for long forms with repeated labels per section (e.g. one "Approve" radio per row). \`within_selector\` is a CSS selector; \`near_text\` finds the nearest container whose heading starts with the given text.
25567
25651
 
25568
25652
  Shadow DOM (open AND closed) is pierced by default via chrome.dom.openOrClosedShadowRoot \u2014 Reddit faceplate-* / r-post-form-submit-button / web-component-heavy SPAs no longer need manual deepFind recipes.
25569
25653
 
25570
25654
  ANTI-BOT SUBMIT CEILING \u2014 synthetic clicks on social/auth platforms (Reddit, X / Twitter, GitHub device-code, mcp.so) are silently rejected by isTrusted-aware form validators and CSRF/reCAPTCHA gates. Pass \`expect_submit: true\` to detect this case (returns success=false with "submit silently rejected" when no signal fires within 4s). For confirmed anti-bot sites, do NOT retry \u2014 pre-fill the form, then highlight the submit button and call wait_for_click so a real human gesture fires the submission.`,
25571
25655
  {
25572
- textHint: external_exports.string().describe(
25573
- "The visible label of the button or link (e.g. 'Save product', 'Continue', 'Add a product', 'Create')"
25656
+ textHint: external_exports.string().optional().describe(
25657
+ "The visible label of the button or link (e.g. 'Save product', 'Continue', 'Add a product', 'Create'). Exactly one of textHint or selector must be set."
25658
+ ),
25659
+ selector: external_exports.string().optional().describe(
25660
+ `CSS selector for the element to click (e.g. 'faceplate-textarea-input', '#open-composer', 'button[aria-label="More options"]'). Pierces open AND closed shadow roots via queryAllDeep. Use when the target has no usable text. Exactly one of textHint or selector must be set.`
25574
25661
  ),
25575
25662
  nth: external_exports.number().int().min(1).optional().describe("Which match to click when multiple elements share the same label (1 = first/topmost, default 1). Visible candidates are ranked above hidden, so a hidden flair-dropdown won't claim nth=1 over the visible submit button."),
25576
25663
  until_selector: external_exports.string().optional().describe('Wait until this CSS selector appears on the page after the click (e.g. ".success-toast"). Returns success=false if it does not appear within until_timeout_ms.'),
@@ -25579,16 +25666,25 @@ ANTI-BOT SUBMIT CEILING \u2014 synthetic clicks on social/auth platforms (Reddit
25579
25666
  until_url_changes: external_exports.boolean().optional().describe('Wait until the URL changes after the click \u2014 for navigating submits whose destination URL is unknown ahead of time. Succeeds on any change away from the pre-click URL. Combine with until_url_contains for "must change AND must contain X".'),
25580
25667
  until_timeout_ms: external_exports.number().int().min(500).optional().describe("How long to wait for the until-condition, in milliseconds (default 5000). Only used if one of until_* is set."),
25581
25668
  expect_submit: external_exports.boolean().optional().describe(`Broad anti-bot detector. After the click, watch up to 4s for ANY of: URL change, [role=alert] / [data-sonner-toast] / .toast / .notification / aria-live appearance, [role=dialog] / [aria-modal=true] appearance. Returns success=false with "submit silently rejected (likely anti-bot)" when no signal fires. Use on form submits when the until_* destination isn't known. Ignored when any until_* is set (those are more specific).`),
25582
- within_selector: external_exports.string().optional().describe(`Limit candidate matches to this CSS selector's subtree (mirrors find_text's scope_selector). Use to scope nth-counting to one section of a long form: click_element("Minor Issue(s)", nth=1, within_selector="#response-b-style"). Returns success=false with scope_missed=true if the selector does not match.`),
25669
+ within_selector: external_exports.string().optional().describe(`Limit candidate matches to this CSS selector's subtree (mirrors find_text's scope_selector). Use to scope nth-counting to one section of a long form: click_element("Approve", nth=1, within_selector="#section-b"). Returns success=false with scope_missed=true if the selector does not match.`),
25583
25670
  near_text: external_exports.string().optional().describe("Find the nearest container whose heading starts with this text, then scope candidates to that container's subtree. Ignored when within_selector is set. Useful when the target section has no stable CSS selector but the heading is unique."),
25584
- try_fiber: external_exports.boolean().optional().describe(`Opt-in last-resort fallback when silently_rejected fires. After the 1500ms activity probe reports zero activity, chromeflow walks the React fiber tree from the matched element (up to 12 levels), finds the nearest \`__reactProps$.onClick\` prop, and invokes it with a minimal synthetic event. Useful on React-heavy SPAs whose action buttons pass through isTrusted=true checks even on CDP events. Returns fiber_attempted=true in the response when the path was taken. Do NOT default to this \u2014 fiber-prop walking is undocumented and may misbehave on mangled production builds. Reserve for repeat silently_rejected on a known-safe React site.`)
25671
+ try_fiber: external_exports.boolean().optional().describe(`Opt-in last-resort fallback when silently_rejected fires. After the 1500ms activity probe reports zero activity, chromeflow walks the React fiber tree from the matched element (up to 12 levels), finds the nearest \`__reactProps$.onClick\` prop, and invokes it with a minimal synthetic event. Useful on React-heavy SPAs whose action buttons pass through isTrusted=true checks even on CDP events. Returns fiber_attempted=true in the response when the path was taken. Do NOT default to this \u2014 fiber-prop walking is undocumented and may misbehave on mangled production builds. Reserve for repeat silently_rejected on a known-safe React site.`),
25672
+ via: external_exports.enum(["auto", "cdp", "fiber"]).optional().describe(`Click dispatch mode. "auto" (default): CDP click, then fiber fallback when try_fiber=true and the activity probe failed. "cdp": CDP click only, no fiber fallback ever. "fiber": skip the CDP bezier + activity probe entirely and invoke __reactProps$.onClick directly. Use "fiber" on React-heavy SPAs (Outlier-style dashboards) where you already know the site is fiber-only \u2014 cuts ~3 seconds of ceremony off the round trip. The fiber path is undocumented React internal access, prefer "auto" until you've confirmed the site needs it.`),
25673
+ in_dialog: external_exports.boolean().optional().describe(`Scope candidate matches to the topmost open dialog (\`[role=dialog]\`, \`[role=alertdialog]\`, or \`<dialog open>\`), highest z-index wins. Use when Radix/Headless UI dialogs portal to document.body and a generic textHint like "Cancel" would otherwise match the wrong button. Returns scope_missed=true when no dialog is open.`),
25674
+ dialog_query: external_exports.string().optional().describe(`Scope candidate matches to a specific dialog by heading or aria-label substring. Use when multiple dialogs are open and in_dialog (topmost) would pick the wrong one \u2014 e.g. click_element("Confirm", dialog_query="Delete account"). Mutually exclusive with in_dialog; dialog_query wins when both are set.`)
25585
25675
  },
25586
- async ({ textHint, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber }) => {
25676
+ async ({ textHint, selector, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber, via, in_dialog, dialog_query }) => {
25677
+ if (!textHint && !selector || textHint && selector) {
25678
+ return {
25679
+ content: [{ type: "text", text: "click_element requires exactly one of textHint or selector" }]
25680
+ };
25681
+ }
25682
+ const targetLabel = textHint ?? `selector="${selector}"`;
25587
25683
  const wsTimeout = Math.max(3e4, (until_timeout_ms ?? 0) + 1e4);
25588
25684
  let response;
25589
25685
  try {
25590
25686
  response = await bridge.request(
25591
- { type: "click_element", textHint, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber },
25687
+ { type: "click_element", textHint, selector, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber, via, in_dialog, dialog_query },
25592
25688
  wsTimeout
25593
25689
  );
25594
25690
  } catch (err) {
@@ -25606,14 +25702,14 @@ Current URL: ${activeTab.url}`;
25606
25702
  content: [
25607
25703
  {
25608
25704
  type: "text",
25609
- text: `Could not confirm click on "${textHint}": ${errMsg}. The click MAY have already fired \u2014 the page just took longer than ${wsTimeout}ms to respond. Verify with get_page_text or wait_for_selector before retrying. Re-clicking can toggle the wrong way on React-controlled radios.${stateLine}`
25705
+ text: `Could not confirm click on "${targetLabel}": ${errMsg}. The click MAY have already fired \u2014 the page just took longer than ${wsTimeout}ms to respond. Verify with get_page_text or wait_for_selector before retrying. Re-clicking can toggle the wrong way on React-controlled radios.${stateLine}`
25610
25706
  }
25611
25707
  ]
25612
25708
  };
25613
25709
  }
25614
25710
  return {
25615
25711
  content: [
25616
- { type: "text", text: `Could not click "${textHint}": ${errMsg}` }
25712
+ { type: "text", text: `Could not click "${targetLabel}": ${errMsg}` }
25617
25713
  ]
25618
25714
  };
25619
25715
  }
@@ -25635,7 +25731,7 @@ Current URL: ${activeTab.url}`;
25635
25731
  content: [
25636
25732
  {
25637
25733
  type: "text",
25638
- text: `Could not click "${textHint}": ${r.message}${navLine}${focusLine}`
25734
+ text: `Could not click "${targetLabel}": ${r.message}${navLine}${focusLine}`
25639
25735
  }
25640
25736
  ]
25641
25737
  };
@@ -25681,26 +25777,57 @@ Clicked element: <${r.target.tag}>${r.target.text ? ` "${r.target.text}"` : ""}
25681
25777
  };
25682
25778
  }
25683
25779
  );
25780
+ server.tool(
25781
+ "click_at_coordinates",
25782
+ `Dispatch a real CDP mouse click at viewport (x, y). The only way to interact with cross-origin iframes \u2014 \`click_element\` refuses cross-origin frames because \`find_text\` can't enter them, but a CDP-level mouse event resolves at the renderer process and reaches the iframe's content the way an OS-level click does.
25783
+
25784
+ Coordinates are viewport CSS pixels, NOT screen coordinates. \`list_frames\` reports each iframe at \`(x, y, width, height)\` in this same space, so to click 50px in / 80px down inside an iframe: \`click_at_coordinates(frame.x + 50, frame.y + 80)\`.
25785
+
25786
+ Runs the same humanlike sequence as \`click_element\` (bezier approach path, settle-hover micro-tremor, press, release, post-click micro-move) so behavioural fingerprinters can't distinguish the call from any other chromeflow click. Skips the activity probe \u2014 cross-origin iframe activity isn't observable from the parent.
25787
+
25788
+ Refuses obviously-bad coordinates (negative, > 10000). Use this only when DOM matching has failed and you have a known target position from \`list_frames\` or a screenshot.`,
25789
+ {
25790
+ x: external_exports.number().describe("Viewport CSS X coordinate (left=0). Get from list_frames or a screenshot grid."),
25791
+ y: external_exports.number().describe("Viewport CSS Y coordinate (top=0). Get from list_frames or a screenshot grid."),
25792
+ button: external_exports.enum(["left", "right", "middle"]).optional().describe('Mouse button (default "left").'),
25793
+ double: external_exports.boolean().optional().describe("Fire a double-click instead of a single click. Default false.")
25794
+ },
25795
+ async ({ x, y, button, double }) => {
25796
+ const response = await bridge.request({ type: "click_at_coordinates", x, y, button, double });
25797
+ const r = response;
25798
+ const navLine = r.navigated && r.after_url ? `
25799
+ \u2192 Navigated: ${r.after_url}` : "";
25800
+ return { content: [{ type: "text", text: `${r.message}${navLine}` }] };
25801
+ }
25802
+ );
25684
25803
  server.tool(
25685
25804
  "wait_for",
25686
- `Wait for one of: a CSS selector to appear, a text substring to appear, or an existing element's subtree to mutate. Pass exactly one of \`selector\`, \`text\`, or \`change_in\`. Pierces open AND closed shadow roots (text \`scope_selector\` pierces too). Pass \`shadow_root: true\` when waiting for the host's shadowRoot to attach (post-SPA-navigation hydration). \`scope_selector\` limits text-mode search; \`regex: true\` interprets text as a case-insensitive regex; \`frame: "iframe.selector"\` waits inside a same-origin iframe (text mode). Pass \`since: "now"\` in text mode to skip the initial check and only resolve on text appearing in a NEW DOM mutation \u2014 defeats the "stale instruction panels still in DOM" false-positive.`,
25805
+ `Wait for one of: a CSS selector to appear, a text substring (or any of an array of substrings) to appear, or an existing element's subtree to mutate. Pass exactly one of \`selector\`, \`text\`, or \`change_in\`. Pierces open AND closed shadow roots (text \`scope_selector\` pierces too). Pass \`shadow_root: true\` when waiting for the host's shadowRoot to attach (post-SPA-navigation hydration). \`scope_selector\` limits text-mode search; \`regex: true\` interprets text as a case-insensitive regex; \`frame: "iframe.selector"\` waits inside a same-origin iframe (text mode).
25806
+
25807
+ Text mode accepts an array \u2014 \`text: ["New session", "Error", "Stop"]\` resolves on the first match and the response carries \`matched_query\` so you know which entry fired. Useful for "wait for success OR failure" without a polling loop.
25808
+
25809
+ On timeout, the response carries \`last_text\` \u2014 the trailing 240 chars of the scope's content \u2014 so you can see the page state when the wait gave up. If the deploy panel shows "Starting up... 47%" and never reaches "Live", you'll see "Starting up... 47%" in last_text and know to extend the timeout instead of debugging a phantom failure.
25810
+
25811
+ Pass \`since: "now"\` in text mode to skip the initial check and only resolve on text appearing in a NEW DOM mutation \u2014 defeats the "stale instruction panels still in DOM" false-positive. When the wait DOES match on the initial check faster than 50ms, the response carries \`initial_match_warning\` suggesting since:"now" so you don't accidentally short-circuit on stale state.`,
25687
25812
  {
25688
25813
  selector: external_exports.string().optional().describe("CSS selector to wait for."),
25689
- text: external_exports.string().optional().describe("Text substring (or regex with regex=true) to wait for."),
25814
+ text: external_exports.union([external_exports.string(), external_exports.array(external_exports.string()).min(1)]).optional().describe('Text substring(s) to wait for. String for single match, array for "any of" mode (resolves on the first match; response includes matched_query and matched_index).'),
25690
25815
  change_in: external_exports.string().optional().describe("CSS selector of an existing element whose subtree should mutate (MutationObserver)."),
25691
25816
  timeout_ms: external_exports.number().int().optional().describe("Max ms to wait (default 30000)."),
25692
25817
  poll_interval_ms: external_exports.number().int().optional().describe("Selector-mode poll interval (default 500). Set to 15000 for slow server-side jobs."),
25693
25818
  shadow_root: external_exports.boolean().optional().describe("Selector mode: require the matched host to have an attached shadowRoot. Default false."),
25694
25819
  scope_selector: external_exports.string().optional().describe("Text mode: limit search to this CSS selector's subtree. Pierces shadow roots."),
25695
- regex: external_exports.boolean().optional().describe("Text mode: interpret query as a case-insensitive regex."),
25820
+ regex: external_exports.boolean().optional().describe("Text mode: interpret query (each entry, if an array) as a case-insensitive regex."),
25696
25821
  frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to wait inside (text mode)."),
25697
25822
  since: external_exports.enum(["now"]).optional().describe(`Text mode: gate on a NEW mutation. Skips the initial check so already-present matches don't short-circuit. Use when the page keeps stale text in the DOM after a route change (e.g. stacked instruction panels) and you need to wait for the next render.`),
25823
+ whole_word: external_exports.boolean().optional().describe(`Text mode: gate matches on word boundaries. Use for common English words ("Live", "New", "Done") that would otherwise substring-match unrelated content (e.g. "Live" matching "delivery"). Default false.`),
25698
25824
  settle_ms: external_exports.number().int().optional().describe("change_in mode: ms to wait after the first mutation for batching (default 150)."),
25699
25825
  max_chars: external_exports.number().int().min(50).optional().describe("change_in mode: cap the returned text content (default 1000). Chat-style mutations can dump huge text; agents that need more should opt in explicitly.")
25700
25826
  },
25701
25827
  async (args) => {
25702
- const { selector, text, change_in, timeout_ms, poll_interval_ms, shadow_root, scope_selector, regex, frame, since, settle_ms, max_chars } = args;
25703
- const set = [selector, text, change_in].filter((v) => v !== void 0 && v !== null && v !== "").length;
25828
+ const { selector, text, change_in, timeout_ms, poll_interval_ms, shadow_root, scope_selector, regex, frame, since, settle_ms, max_chars, whole_word } = args;
25829
+ const isTextSet = text !== void 0 && text !== null && !(Array.isArray(text) && text.length === 0) && text !== "";
25830
+ const set = [selector, isTextSet ? text : void 0, change_in].filter((v) => v !== void 0 && v !== null && v !== "").length;
25704
25831
  if (set !== 1) {
25705
25832
  return { content: [{ type: "text", text: "wait_for: pass exactly one of selector, text, or change_in." }] };
25706
25833
  }
@@ -25715,15 +25842,24 @@ Clicked element: <${r.target.tag}>${r.target.text ? ` "${r.target.text}"` : ""}
25715
25842
  }
25716
25843
  if (text !== void 0) {
25717
25844
  const response2 = await bridge.request(
25718
- { type: "wait_for_text", query: text, timeout_ms: timeoutMs, scope_selector, regex, frame, since },
25845
+ { type: "wait_for_text", query: text, timeout_ms: timeoutMs, scope_selector, regex, frame, since, whole_word },
25719
25846
  timeoutMs + 5e3
25720
25847
  );
25721
25848
  const r2 = response2;
25722
25849
  if (r2.frame_error) return { content: [{ type: "text", text: r2.frame_error }] };
25723
- if (!r2.found) return { content: [{ type: "text", text: `Text "${text}" did not appear within ${timeoutMs}ms.` }] };
25724
- return { content: [{ type: "text", text: `Found "${text}" after ${r2.elapsed_ms}ms.
25850
+ const display = Array.isArray(text) ? text.map((t) => `"${t}"`).join(" / ") : `"${text}"`;
25851
+ if (!r2.found) {
25852
+ const tail = r2.last_text ? `
25853
+ Last text seen in scope (trailing 240 chars): ${JSON.stringify(r2.last_text)}` : "";
25854
+ return { content: [{ type: "text", text: `Text ${display} did not appear within ${timeoutMs}ms.${tail}` }] };
25855
+ }
25856
+ const whichMatched = r2.matched_query ? `
25857
+ matched: "${r2.matched_query}" (index ${r2.matched_index})` : "";
25858
+ const warn = r2.initial_match_warning ? `
25859
+ \u26A0 ${r2.initial_match_warning}` : "";
25860
+ return { content: [{ type: "text", text: `Found ${display} after ${r2.elapsed_ms}ms.${whichMatched}
25725
25861
  selector: ${r2.selector}
25726
- context: ${r2.context}` }] };
25862
+ context: ${r2.context}${warn}` }] };
25727
25863
  }
25728
25864
  const response = await bridge.request(
25729
25865
  { type: "wait_for_change", selector: change_in, timeout: timeoutMs, settle: settle_ms ?? 150 },
@@ -25755,7 +25891,11 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
25755
25891
  );
25756
25892
  server.tool(
25757
25893
  "find_text",
25758
- `Search the active page for text and return actionable matches (text, surrounding context, best-effort CSS selector, clickable flag). Use this instead of get_page_text when checking "is X on the page?" or locating a clickable target. Pierces open shadow roots. Pass \`frame: "iframe.selector"\` for same-origin iframe search.`,
25894
+ `Search the active page for text and return actionable matches (text, surrounding context, best-effort CSS selector, clickable flag). Use this instead of get_page_text when checking "is X on the page?" or locating a clickable target. Pierces open AND closed shadow roots. Pass \`frame: "iframe.selector"\` for same-origin iframe search.
25895
+
25896
+ When visible_only=true (the default) filters out all matches AND there were hidden matches, the response surfaces the hidden count so you can re-run with visible_only=false instead of guessing "is this on the page or not?"
25897
+
25898
+ Scope helpers: \`in_dialog: true\` restricts the search to the topmost open dialog; \`dialog_query: "Select"\` restricts it to a dialog whose heading or aria-label matches. Mirrors click_element's dialog scoping so the same flag works across discovery and action.`,
25759
25899
  {
25760
25900
  query: external_exports.string().describe("Text to search for. Substring by default; regex=true \u2192 case-insensitive regex."),
25761
25901
  max: external_exports.number().int().min(1).optional().describe("Maximum matches to return (default 5). total_matches is reported even when truncated."),
@@ -25763,9 +25903,12 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
25763
25903
  regex: external_exports.boolean().optional().describe("Treat query as regex (case-insensitive). Default false."),
25764
25904
  visible_only: external_exports.boolean().optional().describe("Skip display:none / visibility:hidden / aria-hidden=true. Default true."),
25765
25905
  context_chars: external_exports.number().int().min(0).optional().describe("Surrounding context chars per match (default 40)."),
25766
- frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside.")
25906
+ frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside."),
25907
+ in_dialog: external_exports.boolean().optional().describe("Scope to the topmost open [role=dialog] / [role=alertdialog] / <dialog open>. Mirrors click_element."),
25908
+ dialog_query: external_exports.string().optional().describe("Scope to the dialog whose heading or aria-label contains this substring. Mirrors click_element."),
25909
+ whole_word: external_exports.boolean().optional().describe(`Gate matches on word boundaries. Use for common English words ("Live", "New", "Done", "Confirm") that would otherwise substring-match unrelated pre-rendered content (e.g. "Live" matching "delivery", "Done" matching "abandoned"). Default false to preserve the substring-by-default contract; flip on whenever your query is a single common word that may also appear inside larger words.`)
25767
25910
  },
25768
- async ({ query, max, scope_selector, regex, visible_only, context_chars, frame }) => {
25911
+ async ({ query, max, scope_selector, regex, visible_only, context_chars, frame, in_dialog, dialog_query, whole_word }) => {
25769
25912
  const response = await bridge.request({
25770
25913
  type: "find_text",
25771
25914
  query,
@@ -25774,7 +25917,10 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
25774
25917
  regex,
25775
25918
  visible_only,
25776
25919
  context_chars: context_chars ?? 40,
25777
- frame
25920
+ frame,
25921
+ in_dialog,
25922
+ dialog_query,
25923
+ whole_word
25778
25924
  });
25779
25925
  const r = response;
25780
25926
  if (r.frame_error) {
@@ -25788,9 +25934,11 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
25788
25934
  };
25789
25935
  }
25790
25936
  if (r.matches.length === 0) {
25937
+ const hidden = r.hidden_count ?? 0;
25938
+ const hint = visible_only !== false && hidden > 0 ? ` ${hidden} hidden match(es) skipped (display:none / visibility:hidden / aria-hidden / off-viewport). Set visible_only=false to include them.` : "";
25791
25939
  return {
25792
25940
  content: [
25793
- { type: "text", text: `No matches found for "${query}".` }
25941
+ { type: "text", text: `No visible matches found for "${query}".${hint}` }
25794
25942
  ]
25795
25943
  };
25796
25944
  }
@@ -25900,7 +26048,7 @@ ${lines.join("\n")}${shadowSection}` }] };
25900
26048
  }
25901
26049
 
25902
26050
  // packages/mcp-server/src/index.ts
25903
- var PACKAGE_VERSION = true ? "0.9.11" : "dev";
26051
+ var PACKAGE_VERSION = true ? "0.10.0" : "dev";
25904
26052
  main().catch((err) => {
25905
26053
  console.error("[chromeflow] Fatal error:", err);
25906
26054
  process.exit(1);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "chromeflow",
3
- "version": "0.9.11",
3
+ "version": "0.10.0",
4
4
  "description": "MCP server for chromeflow — lets Claude Code or Codex CLI drive your real Chrome browser with sessions intact. Plugin install recommended; npx chromeflow for manual MCP wiring.",
5
5
  "type": "module",
6
6
  "main": "./bin/chromeflow.mjs",