chromeflow 0.9.12 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/chromeflow.mjs +189 -49
  2. package/package.json +1 -1
@@ -24664,6 +24664,11 @@ var WsBridge = class {
24664
24664
  } catch {
24665
24665
  return;
24666
24666
  }
24667
+ if (msg.type === "progress") {
24668
+ const pending = this.pending.get(msg.requestId);
24669
+ if (pending) pending.refresh();
24670
+ return;
24671
+ }
24667
24672
  if (msg.type === "ready") {
24668
24673
  console.error("[chromeflow] Extension ready");
24669
24674
  const cwd = process.cwd();
@@ -24677,14 +24682,14 @@ var WsBridge = class {
24677
24682
  }));
24678
24683
  return;
24679
24684
  }
24680
- const pending = this.pending.get(msg.requestId);
24681
- if (pending) {
24682
- clearTimeout(pending.timer);
24685
+ const pending2 = this.pending.get(msg.requestId);
24686
+ if (pending2) {
24687
+ clearTimeout(pending2.timer);
24683
24688
  this.pending.delete(msg.requestId);
24684
24689
  if (msg.type === "error") {
24685
- pending.reject(new Error(msg.message));
24690
+ pending2.reject(new Error(msg.message));
24686
24691
  } else {
24687
- pending.resolve(msg);
24692
+ pending2.resolve(msg);
24688
24693
  }
24689
24694
  }
24690
24695
  });
@@ -24720,11 +24725,23 @@ var WsBridge = class {
24720
24725
  }
24721
24726
  const requestId = crypto.randomUUID();
24722
24727
  return new Promise((resolve2, reject) => {
24723
- const timer = setTimeout(() => {
24728
+ let lastProgressAt = Date.now();
24729
+ const fire = () => {
24724
24730
  this.pending.delete(requestId);
24725
- reject(new Error(`Request timed out after ${timeoutMs}ms`));
24726
- }, timeoutMs);
24727
- this.pending.set(requestId, { resolve: resolve2, reject, timer });
24731
+ reject(new Error(`Request timed out after ${timeoutMs}ms (last progress ${Date.now() - lastProgressAt}ms ago). The operation may have completed on the page; verify state before retrying.`));
24732
+ };
24733
+ let timer = setTimeout(fire, timeoutMs);
24734
+ const refresh = () => {
24735
+ clearTimeout(timer);
24736
+ lastProgressAt = Date.now();
24737
+ timer = setTimeout(fire, timeoutMs);
24738
+ };
24739
+ this.pending.set(requestId, {
24740
+ resolve: resolve2,
24741
+ reject,
24742
+ timer,
24743
+ refresh
24744
+ });
24728
24745
  this.client.send(JSON.stringify({ ...message, requestId }));
24729
24746
  });
24730
24747
  }
@@ -24808,6 +24825,11 @@ After tabs.onUpdated fires status=complete, chromeflow also runs a 6s settle che
24808
24825
  text += `
24809
24826
 
24810
24827
  \u26A0 expect_selector "${expect_selector}" never appeared within the 6s settle window. The page may be partially loaded or stuck.`;
24828
+ }
24829
+ if (r.anti_bot_detected) {
24830
+ text += `
24831
+
24832
+ \u26A0 anti_bot_detected: "${r.anti_bot_detected}" \u2014 the page returned a known block / challenge response. Page content is unlikely to be the intended target. Don't try to interact with it; navigate elsewhere or surface to the user.`;
24811
24833
  }
24812
24834
  return { content: [{ type: "text", text }] };
24813
24835
  }
@@ -24832,9 +24854,11 @@ Examples: switch_to_tab({tab: 1}) for the first tab, switch_to_tab({tab: "form"}
24832
24854
  };
24833
24855
  }
24834
24856
  const q = String(raw);
24835
- await bridge.request({ type: "switch_to_tab", query: q });
24857
+ const response = await bridge.request({ type: "switch_to_tab", query: q });
24858
+ const r = response;
24859
+ const echo = r.url ? ` \u2192 "${r.title ?? ""}" (${r.url})` : "";
24836
24860
  return {
24837
- content: [{ type: "text", text: `Switched to tab matching "${q}"` }]
24861
+ content: [{ type: "text", text: `Switched to tab matching "${q}"${echo}` }]
24838
24862
  };
24839
24863
  }
24840
24864
  );
@@ -24892,15 +24916,18 @@ ${keptList}` }]
24892
24916
  );
24893
24917
  server.tool(
24894
24918
  "take_screenshot",
24895
- `Capture a screenshot of the active tab. By default the image is returned to the agent inline UNLESS it exceeds ~500KB base64, in which case it's saved to a temp file and the path is returned instead (preserves the agent's context window). Set inline="always" to force inline regardless of size, or inline="never" to always write to a file. Set save_to or copy_to_clipboard to also share the image with the user. Reserved for cases where DOM lookup has already failed \u2014 use get_page_text and find_text for reading content.`,
24919
+ `Capture a screenshot of the active tab. By default the image is returned to the agent inline UNLESS it exceeds ~500KB base64, in which case it's saved to a temp file and the path is returned instead (preserves the agent's context window). Set inline="always" to force inline regardless of size, or inline="never" to always write to a file. Set save_to or copy_to_clipboard to also share the image with the user. Reserved for cases where DOM lookup has already failed \u2014 use get_page_text and find_text for reading content.
24920
+
24921
+ Refuses fast on pages that are in fullscreen mode (captureVisibleTab hangs there). Exit fullscreen first with execute_script("document.exitFullscreen()") or pass allow_fullscreen: true if you really must try anyway.`,
24896
24922
  {
24897
24923
  copy_to_clipboard: external_exports.boolean().optional().describe("Copy the PNG to the system clipboard (macOS only). Default false."),
24898
24924
  save_to: external_exports.enum(["downloads", "cwd", "none"]).optional().describe(`Save the PNG to disk: "downloads" (~/Downloads), "cwd" (the agent's working directory), or "none" (default \u2014 image returned only to the agent, no disk artifact).`),
24899
- inline: external_exports.enum(["auto", "always", "never"]).optional().describe('Whether to return the image base64 inline to the agent. "auto" (default): inline if under 500KB base64, otherwise write to a temp file and return the path. "always": inline regardless of size \u2014 large images may exceed the MCP token ceiling. "never": always return the path, never inline.')
24925
+ inline: external_exports.enum(["auto", "always", "never"]).optional().describe('Whether to return the image base64 inline to the agent. "auto" (default): inline if under 500KB base64, otherwise write to a temp file and return the path. "always": inline regardless of size \u2014 large images may exceed the MCP token ceiling. "never": always return the path, never inline.'),
24926
+ allow_fullscreen: external_exports.boolean().optional().describe("Bypass the fullscreen fast-fail. Default false. captureVisibleTab usually hangs in fullscreen mode and the request times out \u2014 set this only when you've confirmed the page can produce a screenshot in fullscreen.")
24900
24927
  },
24901
- async ({ copy_to_clipboard = false, save_to = "none", inline = "auto" }) => {
24928
+ async ({ copy_to_clipboard = false, save_to = "none", inline = "auto", allow_fullscreen }) => {
24902
24929
  const sharing = copy_to_clipboard || save_to !== "none";
24903
- const response = await bridge.request({ type: "screenshot", grid: !sharing });
24930
+ const response = await bridge.request({ type: "screenshot", grid: !sharing, allow_fullscreen });
24904
24931
  if (response.type !== "screenshot_response") {
24905
24932
  throw new Error("Unexpected response from extension");
24906
24933
  }
@@ -24928,8 +24955,10 @@ ${keptList}` }]
24928
24955
  } catch {
24929
24956
  }
24930
24957
  }
24958
+ const r = response;
24959
+ const meta = r.viewport && r.page && r.scroll ? ` viewport=${r.viewport.width}x${r.viewport.height}, page=${r.page.width}x${r.page.height}, scroll=(${r.scroll.x},${r.scroll.y}).` : "";
24931
24960
  if (shouldInline) {
24932
- const msg = notes.length ? notes.join(". ") + "." : `Screenshot captured (${response.width}x${response.height}, ${base64Len} base64 chars). Analyze the image to identify element positions for highlighting.`;
24961
+ const msg = notes.length ? notes.join(". ") + "." + meta : `Screenshot captured (${response.width}x${response.height}, ${base64Len} base64 chars).${meta} Analyze the image to identify element positions for highlighting.`;
24933
24962
  return {
24934
24963
  content: [
24935
24964
  { type: "image", data: response.image, mimeType: "image/png" },
@@ -24937,7 +24966,7 @@ ${keptList}` }]
24937
24966
  ]
24938
24967
  };
24939
24968
  }
24940
- notes.push(`Image saved to ${landedPath} (${response.width}x${response.height}, ~${Math.round(imageBuffer.byteLength / 1024)}KB) \u2014 Read the file or use OS image viewer. To force inline despite size, pass inline="always".`);
24969
+ notes.push(`Image saved to ${landedPath} (${response.width}x${response.height}, ~${Math.round(imageBuffer.byteLength / 1024)}KB).${meta} Read the file or use OS image viewer. To force inline despite size, pass inline="always".`);
24941
24970
  return {
24942
24971
  content: [{ type: "text", text: notes.join(". ") + "." }]
24943
24972
  };
@@ -25021,34 +25050,37 @@ The saved file path can be passed directly to set_file_input(hint, file_path) to
25021
25050
  "get_form_fields",
25022
25051
  `Inventory form fields on the active page (inputs, textareas, selects, CodeMirror editors). Sorted top-to-bottom by y-position; includes fields below the fold.
25023
25052
 
25024
- Pass \`query\` to filter+rank by label/placeholder/aria-label/name/id (the old find_input behavior \u2014 match strength reported as aria-eq / placeholder-eq / label-text-eq / name-eq / id-eq / *-includes / fuzzy-text-walk). Pass \`exact: true\` to refuse fuzzy text-walk matches.`,
25053
+ Pass \`query\` to filter+rank by label/placeholder/aria-label/name/id (the old find_input behavior \u2014 match strength reported as aria-eq / placeholder-eq / label-text-eq / name-eq / id-eq / *-includes / fuzzy-text-walk). Pass \`exact: true\` to refuse fuzzy text-walk matches.
25054
+
25055
+ Pass \`only_empty: true\` to filter the inventory to required-but-empty fields. This is the "why is Submit disabled" diagnostic: it returns just the required fields that haven't been filled yet (or radios/checkboxes still unchecked) and skips everything that's already populated. Required-ness is detected via the \`required\` attribute, \`aria-required\`, or a trailing \`*\` in the associated label text.`,
25025
25056
  {
25026
25057
  query: external_exports.string().optional().describe("If set, filter+rank fields by hint matching label/placeholder/aria-label/name/id."),
25027
25058
  max: external_exports.number().int().min(1).optional().describe("Maximum fields to return when query is set (default 5). Ignored without query (full inventory)."),
25028
25059
  type_filter: external_exports.string().optional().describe('Restrict to a specific input type (e.g. "email", "checkbox", "file"). Only with query.'),
25029
25060
  exact: external_exports.boolean().optional().describe("Refuse fuzzy text-walk and *-includes matches. Only with query."),
25030
- frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside. Cross-origin iframes are not supported.")
25061
+ frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside. Cross-origin iframes are not supported."),
25062
+ only_empty: external_exports.boolean().optional().describe("Filter inventory to required-but-empty fields only. Use as a Submit-disabled diagnostic. Ignored when query is set.")
25031
25063
  },
25032
- async ({ query, max, type_filter, exact, frame }) => {
25064
+ async ({ query, max, type_filter, exact, frame, only_empty }) => {
25033
25065
  if (query !== void 0) {
25034
25066
  const response2 = await bridge.request({ type: "find_input", query, type_filter, max, exact, frame });
25035
25067
  if (response2.type !== "find_input_response") throw new Error("Unexpected response");
25036
25068
  const r2 = response2;
25037
25069
  if (r2.frame_error) return { content: [{ type: "text", text: r2.frame_error }] };
25038
25070
  if (r2.fields.length === 0) return { content: [{ type: "text", text: `No form fields matched "${query}".` }] };
25039
- const header = `Found ${r2.fields.length}${r2.truncated ? ` of ${r2.total_matches}` : ""} input(s) for "${query}":`;
25071
+ const header2 = `Found ${r2.fields.length}${r2.truncated ? ` of ${r2.total_matches}` : ""} input(s) for "${query}":`;
25040
25072
  const lines2 = r2.fields.map((f, i) => {
25041
25073
  const ph = f.placeholder ? ` placeholder="${f.placeholder}"` : "";
25042
25074
  const val = f.value ? ` value="${f.value}"` : "";
25043
25075
  const under = f.under ? ` [under: "${f.under}"]` : "";
25044
25076
  return ` ${i + 1}. "${f.label}" type=${f.type}${ph}${val}${under} \u2014 match: ${f.match_kind}`;
25045
25077
  });
25046
- return { content: [{ type: "text", text: `${header}
25078
+ return { content: [{ type: "text", text: `${header2}
25047
25079
  ${lines2.join("\n")}
25048
25080
 
25049
25081
  To fill: fill_input("${r2.fields[0].label}", "<value>")` }] };
25050
25082
  }
25051
- const response = await bridge.request({ type: "get_form_fields" });
25083
+ const response = await bridge.request({ type: "get_form_fields", only_empty });
25052
25084
  if (response.type !== "form_fields_response") throw new Error("Unexpected response");
25053
25085
  const r = response;
25054
25086
  const fields = r.fields;
@@ -25059,20 +25091,25 @@ To fill: fill_input("${r2.fields[0].label}", "<value>")` }] };
25059
25091
 
25060
25092
  \u2139 OAuth providers detected on this form: ${r.oauthIndicators.join(", ")}. If the user wants to sign in via one of these, click it instead of filling email/password.` : "";
25061
25093
  if (fields.length === 0) {
25062
- return { content: [{ type: "text", text: "No form fields found on page." + (r.warning ?? "") + captchaLine + oauthLine }] };
25094
+ const empty = only_empty ? "No required-but-empty fields detected." : "No form fields found on page.";
25095
+ return { content: [{ type: "text", text: empty + (r.warning ?? "") + captchaLine + oauthLine }] };
25063
25096
  }
25064
25097
  const lines = fields.map((f) => {
25065
25098
  const val = f.value ? ` [currently: "${f.value}"]` : "";
25066
25099
  const ctx = f.context ? ` [under: "${f.context}"]` : "";
25067
- return `${f.index}. [${f.type}] "${f.label}"${val}${ctx} \u2014 y:${f.y}`;
25100
+ const req = f.required ? " *required" : "";
25101
+ return `${f.index}. [${f.type}] "${f.label}"${req}${val}${ctx} \u2014 y:${f.y}`;
25068
25102
  });
25069
- return { content: [{ type: "text", text: `Form fields (${fields.length} total, sorted top-to-bottom):
25103
+ const header = only_empty ? `Required-but-empty fields (${fields.length}):` : `Form fields (${fields.length} total, sorted top-to-bottom):`;
25104
+ return { content: [{ type: "text", text: `${header}
25070
25105
  ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
25071
25106
  }
25072
25107
  );
25073
25108
  server.tool(
25074
25109
  "type_text",
25075
- `Type text into the currently focused element via CDP keystrokes (produces isTrusted=true events). Use when fill_input fails because the page validates isTrusted (CodeMirror/Monaco/Ace editors, shadow DOM inputs, isTrusted-gated forms). Pass \`into_selector\` to focus the target before typing (shadow-piercing CSS) \u2014 combined with \`clear_first: true\`, this collapses the old "wait_for_click \u2192 execute_script selectAll \u2192 type_text" pattern into a single call. Pass \`frame: "iframe.selector"\` to type into a same-origin iframe's first editable element.`,
25110
+ `Type text into the currently focused element via CDP keystrokes (produces isTrusted=true events). Use when fill_input fails because the page validates isTrusted (CodeMirror/Monaco/Ace editors, shadow DOM inputs, isTrusted-gated forms). Pass \`into_selector\` to focus the target before typing (shadow-piercing CSS) \u2014 combined with \`clear_first: true\`, this collapses the old "wait_for_click \u2192 execute_script selectAll \u2192 type_text" pattern into a single call. Pass \`frame: "iframe.selector"\` to type into a same-origin iframe's first editable element.
25111
+
25112
+ **TipTap / ProseMirror auto-recovery**: when \`into_selector\` targets a contenteditable inside a \`.tiptap\` / \`.ProseMirror\` / \`[data-tiptap-editor]\` ancestor, type_text verifies post-type that the text actually landed. If tiptap's internal state machine silently dropped the CDP keystrokes (a known failure mode where the editor reverts to placeholder a few seconds later), type_text automatically re-fills via \`document.execCommand('insertText', ...)\` which tiptap accepts. The response message records "TipTap/ProseMirror silently dropped..., recovered via execCommand insertText" when this fired.`,
25076
25113
  {
25077
25114
  text: external_exports.string().describe("The text to type into the focused element"),
25078
25115
  into_selector: external_exports.string().optional().describe(
@@ -25086,7 +25123,7 @@ ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
25086
25123
  )
25087
25124
  },
25088
25125
  async ({ text, frame, into_selector, clear_first }) => {
25089
- const timeoutMs = Math.max(3e4, text.length * 90 + 15e3);
25126
+ const timeoutMs = Math.max(3e4, text.length * 110 + 15e3);
25090
25127
  const response = await bridge.request(
25091
25128
  { type: "type_text", text, frame, into_selector, clear_first },
25092
25129
  timeoutMs
@@ -25122,6 +25159,15 @@ ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
25122
25159
  "execute_script",
25123
25160
  `Execute JavaScript in a tab's MAIN world (the page's own context, not the extension's isolated world). Use for reading framework state or DOM properties not visible in text \u2014 prefer get_page_text for visible content. Top-level \`return\` and \`await\` are supported.
25124
25161
 
25162
+ **Object returns are auto-stringified** \u2014 return an object/array and the response carries its JSON. No need to wrap return values in JSON.stringify yourself.
25163
+
25164
+ **Shadow-piercing helpers are pre-injected** into every script:
25165
+ - \`$deep(selector, root?)\` \u2014 querySelector that walks open shadow roots
25166
+ - \`$deepAll(selector, root?)\` \u2014 querySelectorAll equivalent, returns an array
25167
+ - \`shadowDocument\` \u2014 the first attached open shadow root on the page, or \`document\` if none. Useful when an SPA mounts ALL of its UI inside a single root shadow host (Outlier-style annotation dashboards): replace every \`document.querySelector*\` call with \`shadowDocument.querySelector*\` and the same code now reaches the SPA's content.
25168
+
25169
+ The helpers pierce OPEN shadow roots only \u2014 MAIN world can't reach closed roots. For closed roots, use find_text / get_page_text / click_element / fill_input which pierce both kinds via chrome.dom.openOrClosedShadowRoot.
25170
+
25125
25171
  MAIN-world means the page's Content-Security-Policy applies: \`fetch()\` against authenticated APIs is often blocked by the page's connect-src directive. When that happens, switch to fetch_url \u2014 it runs in the extension's privileged context (full host_permissions, automatic cookie jar, no page CSP).
25126
25172
 
25127
25173
  CSP-strict pages that disallow eval (Stripe, GitHub) silently fall through to a CDP eval path. Page alerts (alert/confirm/prompt) fired since the last script appear as PAGE ALERT in the result.
@@ -25316,11 +25362,48 @@ Never use take_screenshot just to read page content \u2014 paginate with startIn
25316
25362
 
25317
25363
  ` + text;
25318
25364
  }
25365
+ if ((startIndex ?? 0) === 0 && r.viewport && r.page && r.scroll) {
25366
+ const footer = `
25367
+
25368
+ ---
25369
+ viewport: ${r.viewport.width}x${r.viewport.height}, page: ${r.page.width}x${r.page.height}, scroll: (${r.scroll.x}, ${r.scroll.y})`;
25370
+ text = text + footer;
25371
+ }
25319
25372
  return {
25320
25373
  content: [{ type: "text", text: text || "(no text found on page)" }]
25321
25374
  };
25322
25375
  }
25323
25376
  );
25377
+ server.tool(
25378
+ "get_page_html",
25379
+ `Get the raw HTML of the current page or a scoped element. Use when you need to parse structure (tables, attribute values, nested data) and \`get_page_text\` strips too much, or when you're extracting structured data from a page Claude can't easily reason about from text alone.
25380
+
25381
+ Pierces open AND closed shadow roots for the \`selector\` lookup (Radix portals, Stencil/Lit web components). \`<script>\`, \`<style>\`, \`<noscript>\` are stripped before returning.
25382
+
25383
+ Default \`max_chars\` is 50,000. If the page is bigger, the response carries \`truncated: true\` and \`total_chars\` so you can decide whether to scope further with \`selector\`.
25384
+
25385
+ When the goal is "is X on this page?" or "find clickable Y", use \`find_text\` instead \u2014 it returns a focused match list rather than a wall of HTML.`,
25386
+ {
25387
+ selector: external_exports.string().optional().describe(
25388
+ "CSS selector to scope the HTML to. Pierces closed shadow roots. Omit to return the main content area (or body)."
25389
+ ),
25390
+ max_chars: external_exports.number().int().min(1e3).optional().describe("Truncate after this many chars (default 50000). The response includes total_chars so you know if you missed anything.")
25391
+ },
25392
+ async ({ selector, max_chars }) => {
25393
+ const response = await bridge.request({ type: "get_page_html", selector, max_chars });
25394
+ if (response.type !== "page_html_response") throw new Error("Unexpected response");
25395
+ const r = response;
25396
+ const notes = [];
25397
+ if (r.selector_missed) notes.push(`selector "${selector}" not found, returning full body HTML`);
25398
+ if (r.selector_in_shadow) notes.push(`selector matched inside a closed shadow root`);
25399
+ if (r.truncated) notes.push(`truncated at ${max_chars ?? 5e4} of ${r.total_chars} chars; scope further with selector to see more`);
25400
+ const header = notes.length > 0 ? `[${notes.join("; ")}]
25401
+ ` : "";
25402
+ return {
25403
+ content: [{ type: "text", text: header + r.html }]
25404
+ };
25405
+ }
25406
+ );
25324
25407
  server.tool(
25325
25408
  "get_console_logs",
25326
25409
  `Read the browser console output (log, warn, error, info) captured since the page loaded.
@@ -25511,6 +25594,8 @@ Set binary=true for non-text responses (PDFs, images, zips) \u2014 the body is r
25511
25594
  }, wsTimeout);
25512
25595
  if (response.type !== "fetch_url_response") throw new Error(`Unexpected response: ${response.type}`);
25513
25596
  const r = response;
25597
+ const antiBotLine = r.anti_bot_detected ? `
25598
+ \u26A0 anti_bot_detected: "${r.anti_bot_detected}" \u2014 response body matches a known block / challenge page. Don't parse as the expected JSON/HTML; the user's IP may be challenged or the endpoint may require a real browser context.` : "";
25514
25599
  if (to_file) {
25515
25600
  const cwd = process.cwd();
25516
25601
  const resolved = isAbsolute(to_file) ? to_file : resolve(cwd, to_file);
@@ -25529,14 +25614,14 @@ Set binary=true for non-text responses (PDFs, images, zips) \u2014 the body is r
25529
25614
  type: "text",
25530
25615
  text: `HTTP ${r.status} ${r.status_text} \u2014 ${r.content_type || "no content-type"} \u2014 ${r.total_bytes} bytes
25531
25616
  Written to: ${resolved}
25532
- Size on disk: ${buf.byteLength}
25617
+ Size on disk: ${buf.byteLength}${antiBotLine}
25533
25618
 
25534
25619
  Headers:
25535
25620
  ${hdrLines}`
25536
25621
  }]
25537
25622
  };
25538
25623
  }
25539
- const header = `HTTP ${r.status} ${r.status_text} \u2014 ${r.content_type || "no content-type"} \u2014 ${r.total_bytes} bytes${r.truncated ? ` (truncated to ${max_bytes ?? 1e5}; set to_file=<path> to capture the full ${r.total_bytes} bytes)` : ""}`;
25624
+ const header = `HTTP ${r.status} ${r.status_text} \u2014 ${r.content_type || "no content-type"} \u2014 ${r.total_bytes} bytes${r.truncated ? ` (truncated to ${max_bytes ?? 1e5}; set to_file=<path> to capture the full ${r.total_bytes} bytes)` : ""}${antiBotLine}`;
25540
25625
  const bodyPart = r.body_base64 ? `
25541
25626
 
25542
25627
  [base64, ${r.body_base64.length} chars]
@@ -25573,7 +25658,7 @@ Scope matching with \`within_selector\` or \`near_text\` restricts where matches
25573
25658
 
25574
25659
  Shadow DOM (open AND closed) is pierced by default via chrome.dom.openOrClosedShadowRoot \u2014 Reddit faceplate-* / r-post-form-submit-button / web-component-heavy SPAs no longer need manual deepFind recipes.
25575
25660
 
25576
- ANTI-BOT SUBMIT CEILING \u2014 synthetic clicks on social/auth platforms (Reddit, X / Twitter, GitHub device-code, mcp.so) are silently rejected by isTrusted-aware form validators and CSRF/reCAPTCHA gates. Pass \`expect_submit: true\` to detect this case (returns success=false with "submit silently rejected" when no signal fires within 4s). For confirmed anti-bot sites, do NOT retry \u2014 pre-fill the form, then highlight the submit button and call wait_for_click so a real human gesture fires the submission.`,
25661
+ ANTI-BOT SUBMIT CEILING \u2014 synthetic clicks on social/auth platforms (Reddit, X / Twitter, mcp.so) are silently rejected by isTrusted-aware form validators and CSRF/reCAPTCHA gates. Pass \`expect_submit: true\` to detect this case (returns success=false with "submit silently rejected" when no signal fires within 4s). For confirmed anti-bot sites, do NOT retry \u2014 pre-fill the form, then highlight the submit button and call wait_for_click so a real human gesture fires the submission.`,
25577
25662
  {
25578
25663
  textHint: external_exports.string().optional().describe(
25579
25664
  "The visible label of the button or link (e.g. 'Save product', 'Continue', 'Add a product', 'Create'). Exactly one of textHint or selector must be set."
@@ -25590,9 +25675,12 @@ ANTI-BOT SUBMIT CEILING \u2014 synthetic clicks on social/auth platforms (Reddit
25590
25675
  expect_submit: external_exports.boolean().optional().describe(`Broad anti-bot detector. After the click, watch up to 4s for ANY of: URL change, [role=alert] / [data-sonner-toast] / .toast / .notification / aria-live appearance, [role=dialog] / [aria-modal=true] appearance. Returns success=false with "submit silently rejected (likely anti-bot)" when no signal fires. Use on form submits when the until_* destination isn't known. Ignored when any until_* is set (those are more specific).`),
25591
25676
  within_selector: external_exports.string().optional().describe(`Limit candidate matches to this CSS selector's subtree (mirrors find_text's scope_selector). Use to scope nth-counting to one section of a long form: click_element("Approve", nth=1, within_selector="#section-b"). Returns success=false with scope_missed=true if the selector does not match.`),
25592
25677
  near_text: external_exports.string().optional().describe("Find the nearest container whose heading starts with this text, then scope candidates to that container's subtree. Ignored when within_selector is set. Useful when the target section has no stable CSS selector but the heading is unique."),
25593
- try_fiber: external_exports.boolean().optional().describe(`Opt-in last-resort fallback when silently_rejected fires. After the 1500ms activity probe reports zero activity, chromeflow walks the React fiber tree from the matched element (up to 12 levels), finds the nearest \`__reactProps$.onClick\` prop, and invokes it with a minimal synthetic event. Useful on React-heavy SPAs whose action buttons pass through isTrusted=true checks even on CDP events. Returns fiber_attempted=true in the response when the path was taken. Do NOT default to this \u2014 fiber-prop walking is undocumented and may misbehave on mangled production builds. Reserve for repeat silently_rejected on a known-safe React site.`)
25678
+ try_fiber: external_exports.boolean().optional().describe(`Opt-in last-resort fallback when silently_rejected fires. After the 1500ms activity probe reports zero activity, chromeflow walks the React fiber tree from the matched element (up to 12 levels), finds the nearest \`__reactProps$.onClick\` prop, and invokes it with a minimal synthetic event. Useful on React-heavy SPAs whose action buttons pass through isTrusted=true checks even on CDP events. Returns fiber_attempted=true in the response when the path was taken. Do NOT default to this \u2014 fiber-prop walking is undocumented and may misbehave on mangled production builds. Reserve for repeat silently_rejected on a known-safe React site.`),
25679
+ via: external_exports.enum(["auto", "cdp", "fiber"]).optional().describe(`Click dispatch mode. "auto" (default): CDP click, then fiber fallback when try_fiber=true and the activity probe failed. "cdp": CDP click only, no fiber fallback ever. "fiber": skip the CDP bezier + activity probe entirely and invoke __reactProps$.onClick directly. Use "fiber" on React-heavy SPAs (Outlier-style dashboards) where you already know the site is fiber-only \u2014 cuts ~3 seconds of ceremony off the round trip. The fiber path is undocumented React internal access, prefer "auto" until you've confirmed the site needs it.`),
25680
+ in_dialog: external_exports.boolean().optional().describe(`Scope candidate matches to the topmost open dialog (\`[role=dialog]\`, \`[role=alertdialog]\`, or \`<dialog open>\`), highest z-index wins. Use when Radix/Headless UI dialogs portal to document.body and a generic textHint like "Cancel" would otherwise match the wrong button. Returns scope_missed=true when no dialog is open.`),
25681
+ dialog_query: external_exports.string().optional().describe(`Scope candidate matches to a specific dialog by heading or aria-label substring. Use when multiple dialogs are open and in_dialog (topmost) would pick the wrong one \u2014 e.g. click_element("Confirm", dialog_query="Delete account"). Mutually exclusive with in_dialog; dialog_query wins when both are set.`)
25594
25682
  },
25595
- async ({ textHint, selector, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber }) => {
25683
+ async ({ textHint, selector, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber, via, in_dialog, dialog_query }) => {
25596
25684
  if (!textHint && !selector || textHint && selector) {
25597
25685
  return {
25598
25686
  content: [{ type: "text", text: "click_element requires exactly one of textHint or selector" }]
@@ -25603,7 +25691,7 @@ ANTI-BOT SUBMIT CEILING \u2014 synthetic clicks on social/auth platforms (Reddit
25603
25691
  let response;
25604
25692
  try {
25605
25693
  response = await bridge.request(
25606
- { type: "click_element", textHint, selector, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber },
25694
+ { type: "click_element", textHint, selector, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber, via, in_dialog, dialog_query },
25607
25695
  wsTimeout
25608
25696
  );
25609
25697
  } catch (err) {
@@ -25696,26 +25784,57 @@ Clicked element: <${r.target.tag}>${r.target.text ? ` "${r.target.text}"` : ""}
25696
25784
  };
25697
25785
  }
25698
25786
  );
25787
+ server.tool(
25788
+ "click_at_coordinates",
25789
+ `Dispatch a real CDP mouse click at viewport (x, y). The only way to interact with cross-origin iframes \u2014 \`click_element\` refuses cross-origin frames because \`find_text\` can't enter them, but a CDP-level mouse event resolves at the renderer process and reaches the iframe's content the way an OS-level click does.
25790
+
25791
+ Coordinates are viewport CSS pixels, NOT screen coordinates. \`list_frames\` reports each iframe at \`(x, y, width, height)\` in this same space, so to click 50px in / 80px down inside an iframe: \`click_at_coordinates(frame.x + 50, frame.y + 80)\`.
25792
+
25793
+ Runs the same humanlike sequence as \`click_element\` (bezier approach path, settle-hover micro-tremor, press, release, post-click micro-move) so behavioural fingerprinters can't distinguish the call from any other chromeflow click. Skips the activity probe \u2014 cross-origin iframe activity isn't observable from the parent.
25794
+
25795
+ Refuses obviously-bad coordinates (negative, > 10000). Use this only when DOM matching has failed and you have a known target position from \`list_frames\` or a screenshot.`,
25796
+ {
25797
+ x: external_exports.number().describe("Viewport CSS X coordinate (left=0). Get from list_frames or a screenshot grid."),
25798
+ y: external_exports.number().describe("Viewport CSS Y coordinate (top=0). Get from list_frames or a screenshot grid."),
25799
+ button: external_exports.enum(["left", "right", "middle"]).optional().describe('Mouse button (default "left").'),
25800
+ double: external_exports.boolean().optional().describe("Fire a double-click instead of a single click. Default false.")
25801
+ },
25802
+ async ({ x, y, button, double }) => {
25803
+ const response = await bridge.request({ type: "click_at_coordinates", x, y, button, double });
25804
+ const r = response;
25805
+ const navLine = r.navigated && r.after_url ? `
25806
+ \u2192 Navigated: ${r.after_url}` : "";
25807
+ return { content: [{ type: "text", text: `${r.message}${navLine}` }] };
25808
+ }
25809
+ );
25699
25810
  server.tool(
25700
25811
  "wait_for",
25701
- `Wait for one of: a CSS selector to appear, a text substring to appear, or an existing element's subtree to mutate. Pass exactly one of \`selector\`, \`text\`, or \`change_in\`. Pierces open AND closed shadow roots (text \`scope_selector\` pierces too). Pass \`shadow_root: true\` when waiting for the host's shadowRoot to attach (post-SPA-navigation hydration). \`scope_selector\` limits text-mode search; \`regex: true\` interprets text as a case-insensitive regex; \`frame: "iframe.selector"\` waits inside a same-origin iframe (text mode). Pass \`since: "now"\` in text mode to skip the initial check and only resolve on text appearing in a NEW DOM mutation \u2014 defeats the "stale instruction panels still in DOM" false-positive.`,
25812
+ `Wait for one of: a CSS selector to appear, a text substring (or any of an array of substrings) to appear, or an existing element's subtree to mutate. Pass exactly one of \`selector\`, \`text\`, or \`change_in\`. Pierces open AND closed shadow roots (text \`scope_selector\` pierces too). Pass \`shadow_root: true\` when waiting for the host's shadowRoot to attach (post-SPA-navigation hydration). \`scope_selector\` limits text-mode search; \`regex: true\` interprets text as a case-insensitive regex; \`frame: "iframe.selector"\` waits inside a same-origin iframe (text mode).
25813
+
25814
+ Text mode accepts an array \u2014 \`text: ["New session", "Error", "Stop"]\` resolves on the first match and the response carries \`matched_query\` so you know which entry fired. Useful for "wait for success OR failure" without a polling loop.
25815
+
25816
+ On timeout, the response carries \`last_text\` \u2014 the trailing 240 chars of the scope's content \u2014 so you can see the page state when the wait gave up. If the deploy panel shows "Starting up... 47%" and never reaches "Live", you'll see "Starting up... 47%" in last_text and know to extend the timeout instead of debugging a phantom failure.
25817
+
25818
+ Pass \`since: "now"\` in text mode to skip the initial check and only resolve on text appearing in a NEW DOM mutation \u2014 defeats the "stale instruction panels still in DOM" false-positive. When the wait DOES match on the initial check faster than 50ms, the response carries \`initial_match_warning\` suggesting since:"now" so you don't accidentally short-circuit on stale state.`,
25702
25819
  {
25703
25820
  selector: external_exports.string().optional().describe("CSS selector to wait for."),
25704
- text: external_exports.string().optional().describe("Text substring (or regex with regex=true) to wait for."),
25821
+ text: external_exports.union([external_exports.string(), external_exports.array(external_exports.string()).min(1)]).optional().describe('Text substring(s) to wait for. String for single match, array for "any of" mode (resolves on the first match; response includes matched_query and matched_index).'),
25705
25822
  change_in: external_exports.string().optional().describe("CSS selector of an existing element whose subtree should mutate (MutationObserver)."),
25706
25823
  timeout_ms: external_exports.number().int().optional().describe("Max ms to wait (default 30000)."),
25707
25824
  poll_interval_ms: external_exports.number().int().optional().describe("Selector-mode poll interval (default 500). Set to 15000 for slow server-side jobs."),
25708
25825
  shadow_root: external_exports.boolean().optional().describe("Selector mode: require the matched host to have an attached shadowRoot. Default false."),
25709
25826
  scope_selector: external_exports.string().optional().describe("Text mode: limit search to this CSS selector's subtree. Pierces shadow roots."),
25710
- regex: external_exports.boolean().optional().describe("Text mode: interpret query as a case-insensitive regex."),
25827
+ regex: external_exports.boolean().optional().describe("Text mode: interpret query (each entry, if an array) as a case-insensitive regex."),
25711
25828
  frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to wait inside (text mode)."),
25712
25829
  since: external_exports.enum(["now"]).optional().describe(`Text mode: gate on a NEW mutation. Skips the initial check so already-present matches don't short-circuit. Use when the page keeps stale text in the DOM after a route change (e.g. stacked instruction panels) and you need to wait for the next render.`),
25830
+ whole_word: external_exports.boolean().optional().describe(`Text mode: gate matches on word boundaries. Use for common English words ("Live", "New", "Done") that would otherwise substring-match unrelated content (e.g. "Live" matching "delivery"). Default false.`),
25713
25831
  settle_ms: external_exports.number().int().optional().describe("change_in mode: ms to wait after the first mutation for batching (default 150)."),
25714
25832
  max_chars: external_exports.number().int().min(50).optional().describe("change_in mode: cap the returned text content (default 1000). Chat-style mutations can dump huge text; agents that need more should opt in explicitly.")
25715
25833
  },
25716
25834
  async (args) => {
25717
- const { selector, text, change_in, timeout_ms, poll_interval_ms, shadow_root, scope_selector, regex, frame, since, settle_ms, max_chars } = args;
25718
- const set = [selector, text, change_in].filter((v) => v !== void 0 && v !== null && v !== "").length;
25835
+ const { selector, text, change_in, timeout_ms, poll_interval_ms, shadow_root, scope_selector, regex, frame, since, settle_ms, max_chars, whole_word } = args;
25836
+ const isTextSet = text !== void 0 && text !== null && !(Array.isArray(text) && text.length === 0) && text !== "";
25837
+ const set = [selector, isTextSet ? text : void 0, change_in].filter((v) => v !== void 0 && v !== null && v !== "").length;
25719
25838
  if (set !== 1) {
25720
25839
  return { content: [{ type: "text", text: "wait_for: pass exactly one of selector, text, or change_in." }] };
25721
25840
  }
@@ -25730,15 +25849,24 @@ Clicked element: <${r.target.tag}>${r.target.text ? ` "${r.target.text}"` : ""}
25730
25849
  }
25731
25850
  if (text !== void 0) {
25732
25851
  const response2 = await bridge.request(
25733
- { type: "wait_for_text", query: text, timeout_ms: timeoutMs, scope_selector, regex, frame, since },
25852
+ { type: "wait_for_text", query: text, timeout_ms: timeoutMs, scope_selector, regex, frame, since, whole_word },
25734
25853
  timeoutMs + 5e3
25735
25854
  );
25736
25855
  const r2 = response2;
25737
25856
  if (r2.frame_error) return { content: [{ type: "text", text: r2.frame_error }] };
25738
- if (!r2.found) return { content: [{ type: "text", text: `Text "${text}" did not appear within ${timeoutMs}ms.` }] };
25739
- return { content: [{ type: "text", text: `Found "${text}" after ${r2.elapsed_ms}ms.
25857
+ const display = Array.isArray(text) ? text.map((t) => `"${t}"`).join(" / ") : `"${text}"`;
25858
+ if (!r2.found) {
25859
+ const tail = r2.last_text ? `
25860
+ Last text seen in scope (trailing 240 chars): ${JSON.stringify(r2.last_text)}` : "";
25861
+ return { content: [{ type: "text", text: `Text ${display} did not appear within ${timeoutMs}ms.${tail}` }] };
25862
+ }
25863
+ const whichMatched = r2.matched_query ? `
25864
+ matched: "${r2.matched_query}" (index ${r2.matched_index})` : "";
25865
+ const warn = r2.initial_match_warning ? `
25866
+ \u26A0 ${r2.initial_match_warning}` : "";
25867
+ return { content: [{ type: "text", text: `Found ${display} after ${r2.elapsed_ms}ms.${whichMatched}
25740
25868
  selector: ${r2.selector}
25741
- context: ${r2.context}` }] };
25869
+ context: ${r2.context}${warn}` }] };
25742
25870
  }
25743
25871
  const response = await bridge.request(
25744
25872
  { type: "wait_for_change", selector: change_in, timeout: timeoutMs, settle: settle_ms ?? 150 },
@@ -25770,7 +25898,11 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
25770
25898
  );
25771
25899
  server.tool(
25772
25900
  "find_text",
25773
- `Search the active page for text and return actionable matches (text, surrounding context, best-effort CSS selector, clickable flag). Use this instead of get_page_text when checking "is X on the page?" or locating a clickable target. Pierces open shadow roots. Pass \`frame: "iframe.selector"\` for same-origin iframe search.`,
25901
+ `Search the active page for text and return actionable matches (text, surrounding context, best-effort CSS selector, clickable flag). Use this instead of get_page_text when checking "is X on the page?" or locating a clickable target. Pierces open AND closed shadow roots. Pass \`frame: "iframe.selector"\` for same-origin iframe search.
25902
+
25903
+ When visible_only=true (the default) filters out all matches AND there were hidden matches, the response surfaces the hidden count so you can re-run with visible_only=false instead of guessing "is this on the page or not?"
25904
+
25905
+ Scope helpers: \`in_dialog: true\` restricts the search to the topmost open dialog; \`dialog_query: "Select"\` restricts it to a dialog whose heading or aria-label matches. Mirrors click_element's dialog scoping so the same flag works across discovery and action.`,
25774
25906
  {
25775
25907
  query: external_exports.string().describe("Text to search for. Substring by default; regex=true \u2192 case-insensitive regex."),
25776
25908
  max: external_exports.number().int().min(1).optional().describe("Maximum matches to return (default 5). total_matches is reported even when truncated."),
@@ -25778,9 +25910,12 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
25778
25910
  regex: external_exports.boolean().optional().describe("Treat query as regex (case-insensitive). Default false."),
25779
25911
  visible_only: external_exports.boolean().optional().describe("Skip display:none / visibility:hidden / aria-hidden=true. Default true."),
25780
25912
  context_chars: external_exports.number().int().min(0).optional().describe("Surrounding context chars per match (default 40)."),
25781
- frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside.")
25913
+ frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside."),
25914
+ in_dialog: external_exports.boolean().optional().describe("Scope to the topmost open [role=dialog] / [role=alertdialog] / <dialog open>. Mirrors click_element."),
25915
+ dialog_query: external_exports.string().optional().describe("Scope to the dialog whose heading or aria-label contains this substring. Mirrors click_element."),
25916
+ whole_word: external_exports.boolean().optional().describe(`Gate matches on word boundaries. Use for common English words ("Live", "New", "Done", "Confirm") that would otherwise substring-match unrelated pre-rendered content (e.g. "Live" matching "delivery", "Done" matching "abandoned"). Default false to preserve the substring-by-default contract; flip on whenever your query is a single common word that may also appear inside larger words.`)
25782
25917
  },
25783
- async ({ query, max, scope_selector, regex, visible_only, context_chars, frame }) => {
25918
+ async ({ query, max, scope_selector, regex, visible_only, context_chars, frame, in_dialog, dialog_query, whole_word }) => {
25784
25919
  const response = await bridge.request({
25785
25920
  type: "find_text",
25786
25921
  query,
@@ -25789,7 +25924,10 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
25789
25924
  regex,
25790
25925
  visible_only,
25791
25926
  context_chars: context_chars ?? 40,
25792
- frame
25927
+ frame,
25928
+ in_dialog,
25929
+ dialog_query,
25930
+ whole_word
25793
25931
  });
25794
25932
  const r = response;
25795
25933
  if (r.frame_error) {
@@ -25803,9 +25941,11 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
25803
25941
  };
25804
25942
  }
25805
25943
  if (r.matches.length === 0) {
25944
+ const hidden = r.hidden_count ?? 0;
25945
+ const hint = visible_only !== false && hidden > 0 ? ` ${hidden} hidden match(es) skipped (display:none / visibility:hidden / aria-hidden / off-viewport). Set visible_only=false to include them.` : "";
25806
25946
  return {
25807
25947
  content: [
25808
- { type: "text", text: `No matches found for "${query}".` }
25948
+ { type: "text", text: `No visible matches found for "${query}".${hint}` }
25809
25949
  ]
25810
25950
  };
25811
25951
  }
@@ -25915,7 +26055,7 @@ ${lines.join("\n")}${shadowSection}` }] };
25915
26055
  }
25916
26056
 
25917
26057
  // packages/mcp-server/src/index.ts
25918
- var PACKAGE_VERSION = true ? "0.9.12" : "dev";
26058
+ var PACKAGE_VERSION = true ? "0.10.1" : "dev";
25919
26059
  main().catch((err) => {
25920
26060
  console.error("[chromeflow] Fatal error:", err);
25921
26061
  process.exit(1);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "chromeflow",
3
- "version": "0.9.12",
3
+ "version": "0.10.1",
4
4
  "description": "MCP server for chromeflow — lets Claude Code or Codex CLI drive your real Chrome browser with sessions intact. Plugin install recommended; npx chromeflow for manual MCP wiring.",
5
5
  "type": "module",
6
6
  "main": "./bin/chromeflow.mjs",