npm - chromeflow - Versions diffs - 0.9.11 → 0.10.0 - Mend

chromeflow 0.9.11 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/bin/chromeflow.mjs +202 -54
package/package.json +1 -1

package/bin/chromeflow.mjs CHANGED Viewed

@@ -24664,6 +24664,11 @@ var WsBridge = class {
         } catch {
           return;
         }
+        if (msg.type === "progress") {
+          const pending = this.pending.get(msg.requestId);
+          if (pending) pending.refresh();
+          return;
+        }
         if (msg.type === "ready") {
           console.error("[chromeflow] Extension ready");
           const cwd = process.cwd();
@@ -24677,14 +24682,14 @@ var WsBridge = class {
           }));
           return;
         }
-        const pending = this.pending.get(msg.requestId);
-        if (pending) {
-          clearTimeout(pending.timer);
+        const pending2 = this.pending.get(msg.requestId);
+        if (pending2) {
+          clearTimeout(pending2.timer);
           this.pending.delete(msg.requestId);
           if (msg.type === "error") {
-            pending.reject(new Error(msg.message));
+            pending2.reject(new Error(msg.message));
           } else {
-            pending.resolve(msg);
+            pending2.resolve(msg);
           }
         }
       });
@@ -24720,11 +24725,23 @@ var WsBridge = class {
     }
     const requestId = crypto.randomUUID();
     return new Promise((resolve2, reject) => {
-      const timer = setTimeout(() => {
+      let lastProgressAt = Date.now();
+      const fire = () => {
         this.pending.delete(requestId);
-        reject(new Error(`Request timed out after ${timeoutMs}ms`));
-      }, timeoutMs);
-      this.pending.set(requestId, { resolve: resolve2, reject, timer });
+        reject(new Error(`Request timed out after ${timeoutMs}ms (last progress ${Date.now() - lastProgressAt}ms ago). The operation may have completed on the page; verify state before retrying.`));
+      };
+      let timer = setTimeout(fire, timeoutMs);
+      const refresh = () => {
+        clearTimeout(timer);
+        lastProgressAt = Date.now();
+        timer = setTimeout(fire, timeoutMs);
+      };
+      this.pending.set(requestId, {
+        resolve: resolve2,
+        reject,
+        timer,
+        refresh
+      });
       this.client.send(JSON.stringify({ ...message, requestId }));
     });
   }
@@ -24832,9 +24849,11 @@ Examples: switch_to_tab({tab: 1}) for the first tab, switch_to_tab({tab: "form"}
         };
       }
       const q = String(raw);
-      await bridge.request({ type: "switch_to_tab", query: q });
+      const response = await bridge.request({ type: "switch_to_tab", query: q });
+      const r = response;
+      const echo = r.url ? ` \u2192 "${r.title ?? ""}" (${r.url})` : "";
       return {
-        content: [{ type: "text", text: `Switched to tab matching "${q}"` }]
+        content: [{ type: "text", text: `Switched to tab matching "${q}"${echo}` }]
       };
     }
   );
@@ -24892,15 +24911,18 @@ ${keptList}` }]
   );
   server.tool(
     "take_screenshot",
-    `Capture a screenshot of the active tab. By default the image is returned to the agent inline UNLESS it exceeds ~500KB base64, in which case it's saved to a temp file and the path is returned instead (preserves the agent's context window). Set inline="always" to force inline regardless of size, or inline="never" to always write to a file. Set save_to or copy_to_clipboard to also share the image with the user. Reserved for cases where DOM lookup has already failed \u2014 use get_page_text and find_text for reading content.`,
+    `Capture a screenshot of the active tab. By default the image is returned to the agent inline UNLESS it exceeds ~500KB base64, in which case it's saved to a temp file and the path is returned instead (preserves the agent's context window). Set inline="always" to force inline regardless of size, or inline="never" to always write to a file. Set save_to or copy_to_clipboard to also share the image with the user. Reserved for cases where DOM lookup has already failed \u2014 use get_page_text and find_text for reading content.
+Refuses fast on pages that are in fullscreen mode (captureVisibleTab hangs there). Exit fullscreen first with execute_script("document.exitFullscreen()") or pass allow_fullscreen: true if you really must try anyway.`,
     {
       copy_to_clipboard: external_exports.boolean().optional().describe("Copy the PNG to the system clipboard (macOS only). Default false."),
       save_to: external_exports.enum(["downloads", "cwd", "none"]).optional().describe(`Save the PNG to disk: "downloads" (~/Downloads), "cwd" (the agent's working directory), or "none" (default \u2014 image returned only to the agent, no disk artifact).`),
-      inline: external_exports.enum(["auto", "always", "never"]).optional().describe('Whether to return the image base64 inline to the agent. "auto" (default): inline if under 500KB base64, otherwise write to a temp file and return the path. "always": inline regardless of size \u2014 large images may exceed the MCP token ceiling. "never": always return the path, never inline.')
+      inline: external_exports.enum(["auto", "always", "never"]).optional().describe('Whether to return the image base64 inline to the agent. "auto" (default): inline if under 500KB base64, otherwise write to a temp file and return the path. "always": inline regardless of size \u2014 large images may exceed the MCP token ceiling. "never": always return the path, never inline.'),
+      allow_fullscreen: external_exports.boolean().optional().describe("Bypass the fullscreen fast-fail. Default false. captureVisibleTab usually hangs in fullscreen mode and the request times out \u2014 set this only when you've confirmed the page can produce a screenshot in fullscreen.")
     },
-    async ({ copy_to_clipboard = false, save_to = "none", inline = "auto" }) => {
+    async ({ copy_to_clipboard = false, save_to = "none", inline = "auto", allow_fullscreen }) => {
       const sharing = copy_to_clipboard || save_to !== "none";
-      const response = await bridge.request({ type: "screenshot", grid: !sharing });
+      const response = await bridge.request({ type: "screenshot", grid: !sharing, allow_fullscreen });
       if (response.type !== "screenshot_response") {
         throw new Error("Unexpected response from extension");
       }
@@ -24928,8 +24950,10 @@ ${keptList}` }]
         } catch {
         }
       }
+      const r = response;
+      const meta = r.viewport && r.page && r.scroll ? ` viewport=${r.viewport.width}x${r.viewport.height}, page=${r.page.width}x${r.page.height}, scroll=(${r.scroll.x},${r.scroll.y}).` : "";
       if (shouldInline) {
-        const msg = notes.length ? notes.join(". ") + "." : `Screenshot captured (${response.width}x${response.height}, ${base64Len} base64 chars). Analyze the image to identify element positions for highlighting.`;
+        const msg = notes.length ? notes.join(". ") + "." + meta : `Screenshot captured (${response.width}x${response.height}, ${base64Len} base64 chars).${meta} Analyze the image to identify element positions for highlighting.`;
         return {
           content: [
             { type: "image", data: response.image, mimeType: "image/png" },
@@ -24937,7 +24961,7 @@ ${keptList}` }]
           ]
         };
       }
-      notes.push(`Image saved to ${landedPath} (${response.width}x${response.height}, ~${Math.round(imageBuffer.byteLength / 1024)}KB) \u2014 Read the file or use OS image viewer. To force inline despite size, pass inline="always".`);
+      notes.push(`Image saved to ${landedPath} (${response.width}x${response.height}, ~${Math.round(imageBuffer.byteLength / 1024)}KB).${meta} Read the file or use OS image viewer. To force inline despite size, pass inline="always".`);
       return {
         content: [{ type: "text", text: notes.join(". ") + "." }]
       };
@@ -25021,34 +25045,37 @@ The saved file path can be passed directly to set_file_input(hint, file_path) to
     "get_form_fields",
     `Inventory form fields on the active page (inputs, textareas, selects, CodeMirror editors). Sorted top-to-bottom by y-position; includes fields below the fold.
-Pass \`query\` to filter+rank by label/placeholder/aria-label/name/id (the old find_input behavior \u2014 match strength reported as aria-eq / placeholder-eq / label-text-eq / name-eq / id-eq / *-includes / fuzzy-text-walk). Pass \`exact: true\` to refuse fuzzy text-walk matches.`,
+Pass \`query\` to filter+rank by label/placeholder/aria-label/name/id (the old find_input behavior \u2014 match strength reported as aria-eq / placeholder-eq / label-text-eq / name-eq / id-eq / *-includes / fuzzy-text-walk). Pass \`exact: true\` to refuse fuzzy text-walk matches.
+Pass \`only_empty: true\` to filter the inventory to required-but-empty fields. This is the "why is Submit disabled" diagnostic: it returns just the required fields that haven't been filled yet (or radios/checkboxes still unchecked) and skips everything that's already populated. Required-ness is detected via the \`required\` attribute, \`aria-required\`, or a trailing \`*\` in the associated label text.`,
     {
       query: external_exports.string().optional().describe("If set, filter+rank fields by hint matching label/placeholder/aria-label/name/id."),
       max: external_exports.number().int().min(1).optional().describe("Maximum fields to return when query is set (default 5). Ignored without query (full inventory)."),
       type_filter: external_exports.string().optional().describe('Restrict to a specific input type (e.g. "email", "checkbox", "file"). Only with query.'),
       exact: external_exports.boolean().optional().describe("Refuse fuzzy text-walk and *-includes matches. Only with query."),
-      frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside. Cross-origin iframes are not supported.")
+      frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside. Cross-origin iframes are not supported."),
+      only_empty: external_exports.boolean().optional().describe("Filter inventory to required-but-empty fields only. Use as a Submit-disabled diagnostic. Ignored when query is set.")
     },
-    async ({ query, max, type_filter, exact, frame }) => {
+    async ({ query, max, type_filter, exact, frame, only_empty }) => {
       if (query !== void 0) {
         const response2 = await bridge.request({ type: "find_input", query, type_filter, max, exact, frame });
         if (response2.type !== "find_input_response") throw new Error("Unexpected response");
         const r2 = response2;
         if (r2.frame_error) return { content: [{ type: "text", text: r2.frame_error }] };
         if (r2.fields.length === 0) return { content: [{ type: "text", text: `No form fields matched "${query}".` }] };
-        const header = `Found ${r2.fields.length}${r2.truncated ? ` of ${r2.total_matches}` : ""} input(s) for "${query}":`;
+        const header2 = `Found ${r2.fields.length}${r2.truncated ? ` of ${r2.total_matches}` : ""} input(s) for "${query}":`;
         const lines2 = r2.fields.map((f, i) => {
           const ph = f.placeholder ? ` placeholder="${f.placeholder}"` : "";
           const val = f.value ? ` value="${f.value}"` : "";
           const under = f.under ? ` [under: "${f.under}"]` : "";
           return `  ${i + 1}. "${f.label}" type=${f.type}${ph}${val}${under} \u2014 match: ${f.match_kind}`;
         });
-        return { content: [{ type: "text", text: `${header}
+        return { content: [{ type: "text", text: `${header2}
 ${lines2.join("\n")}
 To fill: fill_input("${r2.fields[0].label}", "<value>")` }] };
       }
-      const response = await bridge.request({ type: "get_form_fields" });
+      const response = await bridge.request({ type: "get_form_fields", only_empty });
       if (response.type !== "form_fields_response") throw new Error("Unexpected response");
       const r = response;
       const fields = r.fields;
@@ -25059,20 +25086,25 @@ To fill: fill_input("${r2.fields[0].label}", "<value>")` }] };
 \u2139 OAuth providers detected on this form: ${r.oauthIndicators.join(", ")}. If the user wants to sign in via one of these, click it instead of filling email/password.` : "";
       if (fields.length === 0) {
-        return { content: [{ type: "text", text: "No form fields found on page." + (r.warning ?? "") + captchaLine + oauthLine }] };
+        const empty = only_empty ? "No required-but-empty fields detected." : "No form fields found on page.";
+        return { content: [{ type: "text", text: empty + (r.warning ?? "") + captchaLine + oauthLine }] };
       }
       const lines = fields.map((f) => {
         const val = f.value ? ` [currently: "${f.value}"]` : "";
         const ctx = f.context ? ` [under: "${f.context}"]` : "";
-        return `${f.index}. [${f.type}] "${f.label}"${val}${ctx} \u2014 y:${f.y}`;
+        const req = f.required ? " *required" : "";
+        return `${f.index}. [${f.type}] "${f.label}"${req}${val}${ctx} \u2014 y:${f.y}`;
       });
-      return { content: [{ type: "text", text: `Form fields (${fields.length} total, sorted top-to-bottom):
+      const header = only_empty ? `Required-but-empty fields (${fields.length}):` : `Form fields (${fields.length} total, sorted top-to-bottom):`;
+      return { content: [{ type: "text", text: `${header}
 ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
     }
   );
   server.tool(
     "type_text",
-    `Type text into the currently focused element via CDP keystrokes (produces isTrusted=true events). Use when fill_input fails because the page validates isTrusted (CodeMirror/Monaco/Ace editors, shadow DOM inputs, isTrusted-gated forms). Pass \`into_selector\` to focus the target before typing (shadow-piercing CSS) \u2014 combined with \`clear_first: true\`, this collapses the old "wait_for_click \u2192 execute_script selectAll \u2192 type_text" pattern into a single call. Pass \`frame: "iframe.selector"\` to type into a same-origin iframe's first editable element.`,
+    `Type text into the currently focused element via CDP keystrokes (produces isTrusted=true events). Use when fill_input fails because the page validates isTrusted (CodeMirror/Monaco/Ace editors, shadow DOM inputs, isTrusted-gated forms). Pass \`into_selector\` to focus the target before typing (shadow-piercing CSS) \u2014 combined with \`clear_first: true\`, this collapses the old "wait_for_click \u2192 execute_script selectAll \u2192 type_text" pattern into a single call. Pass \`frame: "iframe.selector"\` to type into a same-origin iframe's first editable element.
+**TipTap / ProseMirror auto-recovery**: when \`into_selector\` targets a contenteditable inside a \`.tiptap\` / \`.ProseMirror\` / \`[data-tiptap-editor]\` ancestor, type_text verifies post-type that the text actually landed. If tiptap's internal state machine silently dropped the CDP keystrokes (a known failure mode where the editor reverts to placeholder a few seconds later), type_text automatically re-fills via \`document.execCommand('insertText', ...)\` which tiptap accepts. The response message records "TipTap/ProseMirror silently dropped..., recovered via execCommand insertText" when this fired.`,
     {
       text: external_exports.string().describe("The text to type into the focused element"),
       into_selector: external_exports.string().optional().describe(
@@ -25086,7 +25118,7 @@ ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
       )
     },
     async ({ text, frame, into_selector, clear_first }) => {
-      const timeoutMs = Math.max(3e4, text.length * 90 + 15e3);
+      const timeoutMs = Math.max(3e4, text.length * 110 + 15e3);
       const response = await bridge.request(
         { type: "type_text", text, frame, into_selector, clear_first },
         timeoutMs
@@ -25122,6 +25154,15 @@ ${lines.join("\n")}${r.warning ?? ""}${captchaLine}${oauthLine}` }] };
     "execute_script",
     `Execute JavaScript in a tab's MAIN world (the page's own context, not the extension's isolated world). Use for reading framework state or DOM properties not visible in text \u2014 prefer get_page_text for visible content. Top-level \`return\` and \`await\` are supported.
+**Object returns are auto-stringified** \u2014 return an object/array and the response carries its JSON. No need to wrap return values in JSON.stringify yourself.
+**Shadow-piercing helpers are pre-injected** into every script:
+- \`$deep(selector, root?)\` \u2014 querySelector that walks open shadow roots
+- \`$deepAll(selector, root?)\` \u2014 querySelectorAll equivalent, returns an array
+- \`shadowDocument\` \u2014 the first attached open shadow root on the page, or \`document\` if none. Useful when an SPA mounts ALL of its UI inside a single root shadow host (Outlier-style annotation dashboards): replace every \`document.querySelector*\` call with \`shadowDocument.querySelector*\` and the same code now reaches the SPA's content.
+The helpers pierce OPEN shadow roots only \u2014 MAIN world can't reach closed roots. For closed roots, use find_text / get_page_text / click_element / fill_input which pierce both kinds via chrome.dom.openOrClosedShadowRoot.
 MAIN-world means the page's Content-Security-Policy applies: \`fetch()\` against authenticated APIs is often blocked by the page's connect-src directive. When that happens, switch to fetch_url \u2014 it runs in the extension's privileged context (full host_permissions, automatic cookie jar, no page CSP).
 CSP-strict pages that disallow eval (Stripe, GitHub) silently fall through to a CDP eval path. Page alerts (alert/confirm/prompt) fired since the last script appear as PAGE ALERT in the result.
@@ -25316,11 +25357,48 @@ Never use take_screenshot just to read page content \u2014 paginate with startIn
 ` + text;
       }
+      if ((startIndex ?? 0) === 0 && r.viewport && r.page && r.scroll) {
+        const footer = `
+---
+viewport: ${r.viewport.width}x${r.viewport.height}, page: ${r.page.width}x${r.page.height}, scroll: (${r.scroll.x}, ${r.scroll.y})`;
+        text = text + footer;
+      }
       return {
         content: [{ type: "text", text: text || "(no text found on page)" }]
       };
     }
   );
+  server.tool(
+    "get_page_html",
+    `Get the raw HTML of the current page or a scoped element. Use when you need to parse structure (tables, attribute values, nested data) and \`get_page_text\` strips too much, or when you're extracting structured data from a page Claude can't easily reason about from text alone.
+Pierces open AND closed shadow roots for the \`selector\` lookup (Radix portals, Stencil/Lit web components). \`<script>\`, \`<style>\`, \`<noscript>\` are stripped before returning.
+Default \`max_chars\` is 50,000. If the page is bigger, the response carries \`truncated: true\` and \`total_chars\` so you can decide whether to scope further with \`selector\`.
+When the goal is "is X on this page?" or "find clickable Y", use \`find_text\` instead \u2014 it returns a focused match list rather than a wall of HTML.`,
+    {
+      selector: external_exports.string().optional().describe(
+        "CSS selector to scope the HTML to. Pierces closed shadow roots. Omit to return the main content area (or body)."
+      ),
+      max_chars: external_exports.number().int().min(1e3).optional().describe("Truncate after this many chars (default 50000). The response includes total_chars so you know if you missed anything.")
+    },
+    async ({ selector, max_chars }) => {
+      const response = await bridge.request({ type: "get_page_html", selector, max_chars });
+      if (response.type !== "page_html_response") throw new Error("Unexpected response");
+      const r = response;
+      const notes = [];
+      if (r.selector_missed) notes.push(`selector "${selector}" not found, returning full body HTML`);
+      if (r.selector_in_shadow) notes.push(`selector matched inside a closed shadow root`);
+      if (r.truncated) notes.push(`truncated at ${max_chars ?? 5e4} of ${r.total_chars} chars; scope further with selector to see more`);
+      const header = notes.length > 0 ? `[${notes.join("; ")}]
+` : "";
+      return {
+        content: [{ type: "text", text: header + r.html }]
+      };
+    }
+  );
   server.tool(
     "get_console_logs",
     `Read the browser console output (log, warn, error, info) captured since the page loaded.
@@ -25554,7 +25632,13 @@ ${r.body_text}` : "";
 function registerFlowTools(server, bridge) {
   server.tool(
     "click_element",
-    `Click an interactive element by its visible text or aria-label. Optionally pass an until_* clause to verify the click took effect:
+    `Click an interactive element by its visible text/aria-label (textHint) OR by direct CSS selector (selector). Pass exactly one.
+\`textHint\` mode: fuzzy-rank against visible text, aria-label, button content. Ranks visible candidates ahead of hidden.
+\`selector\` mode: pierces open AND closed shadow roots via queryAllDeep. Use when the target has no visible text (icon buttons, custom-element placeholders like Reddit's collapsed comment composer, drop-zone overlays). Skips the textHint matcher entirely. \`nth\` still picks the Nth match.
+Optionally pass an until_* clause to verify the click took effect:
 - until_selector \u2014 CSS selector that should appear after the click
 - until_url_contains \u2014 substring that should appear in the URL (requires an actual URL change if the substring was already in the pre-click URL)
 - until_text_contains \u2014 substring that should appear in page text
@@ -25563,14 +25647,17 @@ function registerFlowTools(server, bridge) {
 Returns {success, message, before_url, after_url, navigated}. \`navigated\` is true when the post-click URL differs from the pre-click URL \u2014 surfaces silent redirects without a second list_tabs call. Refuses to click 0\xD70 elements and now ranks visible candidates above hidden when text/aria match; when forced to refuse a hidden element it surfaces the next visible candidate in the error message.
-Scope matching with \`within_selector\` or \`near_text\` restricts where matches are searched \u2014 useful for long forms with repeated labels per section (e.g. one "Minor Issue(s)" radio per evaluation axis). \`within_selector\` is a CSS selector; \`near_text\` finds the nearest container whose heading starts with the given text.
+Scope matching with \`within_selector\` or \`near_text\` restricts where matches are searched \u2014 useful for long forms with repeated labels per section (e.g. one "Approve" radio per row). \`within_selector\` is a CSS selector; \`near_text\` finds the nearest container whose heading starts with the given text.
 Shadow DOM (open AND closed) is pierced by default via chrome.dom.openOrClosedShadowRoot \u2014 Reddit faceplate-* / r-post-form-submit-button / web-component-heavy SPAs no longer need manual deepFind recipes.
 ANTI-BOT SUBMIT CEILING \u2014 synthetic clicks on social/auth platforms (Reddit, X / Twitter, GitHub device-code, mcp.so) are silently rejected by isTrusted-aware form validators and CSRF/reCAPTCHA gates. Pass \`expect_submit: true\` to detect this case (returns success=false with "submit silently rejected" when no signal fires within 4s). For confirmed anti-bot sites, do NOT retry \u2014 pre-fill the form, then highlight the submit button and call wait_for_click so a real human gesture fires the submission.`,
     {
-      textHint: external_exports.string().describe(
-        "The visible label of the button or link (e.g. 'Save product', 'Continue', 'Add a product', 'Create')"
+      textHint: external_exports.string().optional().describe(
+        "The visible label of the button or link (e.g. 'Save product', 'Continue', 'Add a product', 'Create'). Exactly one of textHint or selector must be set."
+      ),
+      selector: external_exports.string().optional().describe(
+        `CSS selector for the element to click (e.g. 'faceplate-textarea-input', '#open-composer', 'button[aria-label="More options"]'). Pierces open AND closed shadow roots via queryAllDeep. Use when the target has no usable text. Exactly one of textHint or selector must be set.`
       ),
       nth: external_exports.number().int().min(1).optional().describe("Which match to click when multiple elements share the same label (1 = first/topmost, default 1). Visible candidates are ranked above hidden, so a hidden flair-dropdown won't claim nth=1 over the visible submit button."),
       until_selector: external_exports.string().optional().describe('Wait until this CSS selector appears on the page after the click (e.g. ".success-toast"). Returns success=false if it does not appear within until_timeout_ms.'),
@@ -25579,16 +25666,25 @@ ANTI-BOT SUBMIT CEILING \u2014 synthetic clicks on social/auth platforms (Reddit
       until_url_changes: external_exports.boolean().optional().describe('Wait until the URL changes after the click \u2014 for navigating submits whose destination URL is unknown ahead of time. Succeeds on any change away from the pre-click URL. Combine with until_url_contains for "must change AND must contain X".'),
       until_timeout_ms: external_exports.number().int().min(500).optional().describe("How long to wait for the until-condition, in milliseconds (default 5000). Only used if one of until_* is set."),
       expect_submit: external_exports.boolean().optional().describe(`Broad anti-bot detector. After the click, watch up to 4s for ANY of: URL change, [role=alert] / [data-sonner-toast] / .toast / .notification / aria-live appearance, [role=dialog] / [aria-modal=true] appearance. Returns success=false with "submit silently rejected (likely anti-bot)" when no signal fires. Use on form submits when the until_* destination isn't known. Ignored when any until_* is set (those are more specific).`),
-      within_selector: external_exports.string().optional().describe(`Limit candidate matches to this CSS selector's subtree (mirrors find_text's scope_selector). Use to scope nth-counting to one section of a long form: click_element("Minor Issue(s)", nth=1, within_selector="#response-b-style"). Returns success=false with scope_missed=true if the selector does not match.`),
+      within_selector: external_exports.string().optional().describe(`Limit candidate matches to this CSS selector's subtree (mirrors find_text's scope_selector). Use to scope nth-counting to one section of a long form: click_element("Approve", nth=1, within_selector="#section-b"). Returns success=false with scope_missed=true if the selector does not match.`),
       near_text: external_exports.string().optional().describe("Find the nearest container whose heading starts with this text, then scope candidates to that container's subtree. Ignored when within_selector is set. Useful when the target section has no stable CSS selector but the heading is unique."),
-      try_fiber: external_exports.boolean().optional().describe(`Opt-in last-resort fallback when silently_rejected fires. After the 1500ms activity probe reports zero activity, chromeflow walks the React fiber tree from the matched element (up to 12 levels), finds the nearest \`__reactProps$.onClick\` prop, and invokes it with a minimal synthetic event. Useful on React-heavy SPAs whose action buttons pass through isTrusted=true checks even on CDP events. Returns fiber_attempted=true in the response when the path was taken. Do NOT default to this \u2014 fiber-prop walking is undocumented and may misbehave on mangled production builds. Reserve for repeat silently_rejected on a known-safe React site.`)
+      try_fiber: external_exports.boolean().optional().describe(`Opt-in last-resort fallback when silently_rejected fires. After the 1500ms activity probe reports zero activity, chromeflow walks the React fiber tree from the matched element (up to 12 levels), finds the nearest \`__reactProps$.onClick\` prop, and invokes it with a minimal synthetic event. Useful on React-heavy SPAs whose action buttons pass through isTrusted=true checks even on CDP events. Returns fiber_attempted=true in the response when the path was taken. Do NOT default to this \u2014 fiber-prop walking is undocumented and may misbehave on mangled production builds. Reserve for repeat silently_rejected on a known-safe React site.`),
+      via: external_exports.enum(["auto", "cdp", "fiber"]).optional().describe(`Click dispatch mode. "auto" (default): CDP click, then fiber fallback when try_fiber=true and the activity probe failed. "cdp": CDP click only, no fiber fallback ever. "fiber": skip the CDP bezier + activity probe entirely and invoke __reactProps$.onClick directly. Use "fiber" on React-heavy SPAs (Outlier-style dashboards) where you already know the site is fiber-only \u2014 cuts ~3 seconds of ceremony off the round trip. The fiber path is undocumented React internal access, prefer "auto" until you've confirmed the site needs it.`),
+      in_dialog: external_exports.boolean().optional().describe(`Scope candidate matches to the topmost open dialog (\`[role=dialog]\`, \`[role=alertdialog]\`, or \`<dialog open>\`), highest z-index wins. Use when Radix/Headless UI dialogs portal to document.body and a generic textHint like "Cancel" would otherwise match the wrong button. Returns scope_missed=true when no dialog is open.`),
+      dialog_query: external_exports.string().optional().describe(`Scope candidate matches to a specific dialog by heading or aria-label substring. Use when multiple dialogs are open and in_dialog (topmost) would pick the wrong one \u2014 e.g. click_element("Confirm", dialog_query="Delete account"). Mutually exclusive with in_dialog; dialog_query wins when both are set.`)
     },
-    async ({ textHint, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber }) => {
+    async ({ textHint, selector, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber, via, in_dialog, dialog_query }) => {
+      if (!textHint && !selector || textHint && selector) {
+        return {
+          content: [{ type: "text", text: "click_element requires exactly one of textHint or selector" }]
+        };
+      }
+      const targetLabel = textHint ?? `selector="${selector}"`;
       const wsTimeout = Math.max(3e4, (until_timeout_ms ?? 0) + 1e4);
       let response;
       try {
         response = await bridge.request(
-          { type: "click_element", textHint, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber },
+          { type: "click_element", textHint, selector, nth, until_selector, until_url_contains, until_text_contains, until_url_changes, until_timeout_ms, expect_submit, within_selector, near_text, try_fiber, via, in_dialog, dialog_query },
           wsTimeout
         );
       } catch (err) {
@@ -25606,14 +25702,14 @@ Current URL: ${activeTab.url}`;
             content: [
               {
                 type: "text",
-                text: `Could not confirm click on "${textHint}": ${errMsg}. The click MAY have already fired \u2014 the page just took longer than ${wsTimeout}ms to respond. Verify with get_page_text or wait_for_selector before retrying. Re-clicking can toggle the wrong way on React-controlled radios.${stateLine}`
+                text: `Could not confirm click on "${targetLabel}": ${errMsg}. The click MAY have already fired \u2014 the page just took longer than ${wsTimeout}ms to respond. Verify with get_page_text or wait_for_selector before retrying. Re-clicking can toggle the wrong way on React-controlled radios.${stateLine}`
               }
             ]
           };
         }
         return {
           content: [
-            { type: "text", text: `Could not click "${textHint}": ${errMsg}` }
+            { type: "text", text: `Could not click "${targetLabel}": ${errMsg}` }
           ]
         };
       }
@@ -25635,7 +25731,7 @@ Current URL: ${activeTab.url}`;
           content: [
             {
               type: "text",
-              text: `Could not click "${textHint}": ${r.message}${navLine}${focusLine}`
+              text: `Could not click "${targetLabel}": ${r.message}${navLine}${focusLine}`
             }
           ]
         };
@@ -25681,26 +25777,57 @@ Clicked element: <${r.target.tag}>${r.target.text ? ` "${r.target.text}"` : ""}
       };
     }
   );
+  server.tool(
+    "click_at_coordinates",
+    `Dispatch a real CDP mouse click at viewport (x, y). The only way to interact with cross-origin iframes \u2014 \`click_element\` refuses cross-origin frames because \`find_text\` can't enter them, but a CDP-level mouse event resolves at the renderer process and reaches the iframe's content the way an OS-level click does.
+Coordinates are viewport CSS pixels, NOT screen coordinates. \`list_frames\` reports each iframe at \`(x, y, width, height)\` in this same space, so to click 50px in / 80px down inside an iframe: \`click_at_coordinates(frame.x + 50, frame.y + 80)\`.
+Runs the same humanlike sequence as \`click_element\` (bezier approach path, settle-hover micro-tremor, press, release, post-click micro-move) so behavioural fingerprinters can't distinguish the call from any other chromeflow click. Skips the activity probe \u2014 cross-origin iframe activity isn't observable from the parent.
+Refuses obviously-bad coordinates (negative, > 10000). Use this only when DOM matching has failed and you have a known target position from \`list_frames\` or a screenshot.`,
+    {
+      x: external_exports.number().describe("Viewport CSS X coordinate (left=0). Get from list_frames or a screenshot grid."),
+      y: external_exports.number().describe("Viewport CSS Y coordinate (top=0). Get from list_frames or a screenshot grid."),
+      button: external_exports.enum(["left", "right", "middle"]).optional().describe('Mouse button (default "left").'),
+      double: external_exports.boolean().optional().describe("Fire a double-click instead of a single click. Default false.")
+    },
+    async ({ x, y, button, double }) => {
+      const response = await bridge.request({ type: "click_at_coordinates", x, y, button, double });
+      const r = response;
+      const navLine = r.navigated && r.after_url ? `
+\u2192 Navigated: ${r.after_url}` : "";
+      return { content: [{ type: "text", text: `${r.message}${navLine}` }] };
+    }
+  );
   server.tool(
     "wait_for",
-    `Wait for one of: a CSS selector to appear, a text substring to appear, or an existing element's subtree to mutate. Pass exactly one of \`selector\`, \`text\`, or \`change_in\`. Pierces open AND closed shadow roots (text \`scope_selector\` pierces too). Pass \`shadow_root: true\` when waiting for the host's shadowRoot to attach (post-SPA-navigation hydration). \`scope_selector\` limits text-mode search; \`regex: true\` interprets text as a case-insensitive regex; \`frame: "iframe.selector"\` waits inside a same-origin iframe (text mode). Pass \`since: "now"\` in text mode to skip the initial check and only resolve on text appearing in a NEW DOM mutation \u2014 defeats the "stale instruction panels still in DOM" false-positive.`,
+    `Wait for one of: a CSS selector to appear, a text substring (or any of an array of substrings) to appear, or an existing element's subtree to mutate. Pass exactly one of \`selector\`, \`text\`, or \`change_in\`. Pierces open AND closed shadow roots (text \`scope_selector\` pierces too). Pass \`shadow_root: true\` when waiting for the host's shadowRoot to attach (post-SPA-navigation hydration). \`scope_selector\` limits text-mode search; \`regex: true\` interprets text as a case-insensitive regex; \`frame: "iframe.selector"\` waits inside a same-origin iframe (text mode).
+Text mode accepts an array \u2014 \`text: ["New session", "Error", "Stop"]\` resolves on the first match and the response carries \`matched_query\` so you know which entry fired. Useful for "wait for success OR failure" without a polling loop.
+On timeout, the response carries \`last_text\` \u2014 the trailing 240 chars of the scope's content \u2014 so you can see the page state when the wait gave up. If the deploy panel shows "Starting up... 47%" and never reaches "Live", you'll see "Starting up... 47%" in last_text and know to extend the timeout instead of debugging a phantom failure.
+Pass \`since: "now"\` in text mode to skip the initial check and only resolve on text appearing in a NEW DOM mutation \u2014 defeats the "stale instruction panels still in DOM" false-positive. When the wait DOES match on the initial check faster than 50ms, the response carries \`initial_match_warning\` suggesting since:"now" so you don't accidentally short-circuit on stale state.`,
     {
       selector: external_exports.string().optional().describe("CSS selector to wait for."),
-      text: external_exports.string().optional().describe("Text substring (or regex with regex=true) to wait for."),
+      text: external_exports.union([external_exports.string(), external_exports.array(external_exports.string()).min(1)]).optional().describe('Text substring(s) to wait for. String for single match, array for "any of" mode (resolves on the first match; response includes matched_query and matched_index).'),
       change_in: external_exports.string().optional().describe("CSS selector of an existing element whose subtree should mutate (MutationObserver)."),
       timeout_ms: external_exports.number().int().optional().describe("Max ms to wait (default 30000)."),
       poll_interval_ms: external_exports.number().int().optional().describe("Selector-mode poll interval (default 500). Set to 15000 for slow server-side jobs."),
       shadow_root: external_exports.boolean().optional().describe("Selector mode: require the matched host to have an attached shadowRoot. Default false."),
       scope_selector: external_exports.string().optional().describe("Text mode: limit search to this CSS selector's subtree. Pierces shadow roots."),
-      regex: external_exports.boolean().optional().describe("Text mode: interpret query as a case-insensitive regex."),
+      regex: external_exports.boolean().optional().describe("Text mode: interpret query (each entry, if an array) as a case-insensitive regex."),
       frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to wait inside (text mode)."),
       since: external_exports.enum(["now"]).optional().describe(`Text mode: gate on a NEW mutation. Skips the initial check so already-present matches don't short-circuit. Use when the page keeps stale text in the DOM after a route change (e.g. stacked instruction panels) and you need to wait for the next render.`),
+      whole_word: external_exports.boolean().optional().describe(`Text mode: gate matches on word boundaries. Use for common English words ("Live", "New", "Done") that would otherwise substring-match unrelated content (e.g. "Live" matching "delivery"). Default false.`),
       settle_ms: external_exports.number().int().optional().describe("change_in mode: ms to wait after the first mutation for batching (default 150)."),
       max_chars: external_exports.number().int().min(50).optional().describe("change_in mode: cap the returned text content (default 1000). Chat-style mutations can dump huge text; agents that need more should opt in explicitly.")
     },
     async (args) => {
-      const { selector, text, change_in, timeout_ms, poll_interval_ms, shadow_root, scope_selector, regex, frame, since, settle_ms, max_chars } = args;
-      const set = [selector, text, change_in].filter((v) => v !== void 0 && v !== null && v !== "").length;
+      const { selector, text, change_in, timeout_ms, poll_interval_ms, shadow_root, scope_selector, regex, frame, since, settle_ms, max_chars, whole_word } = args;
+      const isTextSet = text !== void 0 && text !== null && !(Array.isArray(text) && text.length === 0) && text !== "";
+      const set = [selector, isTextSet ? text : void 0, change_in].filter((v) => v !== void 0 && v !== null && v !== "").length;
       if (set !== 1) {
         return { content: [{ type: "text", text: "wait_for: pass exactly one of selector, text, or change_in." }] };
       }
@@ -25715,15 +25842,24 @@ Clicked element: <${r.target.tag}>${r.target.text ? ` "${r.target.text}"` : ""}
       }
       if (text !== void 0) {
         const response2 = await bridge.request(
-          { type: "wait_for_text", query: text, timeout_ms: timeoutMs, scope_selector, regex, frame, since },
+          { type: "wait_for_text", query: text, timeout_ms: timeoutMs, scope_selector, regex, frame, since, whole_word },
           timeoutMs + 5e3
         );
         const r2 = response2;
         if (r2.frame_error) return { content: [{ type: "text", text: r2.frame_error }] };
-        if (!r2.found) return { content: [{ type: "text", text: `Text "${text}" did not appear within ${timeoutMs}ms.` }] };
-        return { content: [{ type: "text", text: `Found "${text}" after ${r2.elapsed_ms}ms.
+        const display = Array.isArray(text) ? text.map((t) => `"${t}"`).join(" / ") : `"${text}"`;
+        if (!r2.found) {
+          const tail = r2.last_text ? `
+Last text seen in scope (trailing 240 chars): ${JSON.stringify(r2.last_text)}` : "";
+          return { content: [{ type: "text", text: `Text ${display} did not appear within ${timeoutMs}ms.${tail}` }] };
+        }
+        const whichMatched = r2.matched_query ? `
+matched: "${r2.matched_query}" (index ${r2.matched_index})` : "";
+        const warn = r2.initial_match_warning ? `
+\u26A0 ${r2.initial_match_warning}` : "";
+        return { content: [{ type: "text", text: `Found ${display} after ${r2.elapsed_ms}ms.${whichMatched}
 selector: ${r2.selector}
-context: ${r2.context}` }] };
+context: ${r2.context}${warn}` }] };
       }
       const response = await bridge.request(
         { type: "wait_for_change", selector: change_in, timeout: timeoutMs, settle: settle_ms ?? 150 },
@@ -25755,7 +25891,11 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
   );
   server.tool(
     "find_text",
-    `Search the active page for text and return actionable matches (text, surrounding context, best-effort CSS selector, clickable flag). Use this instead of get_page_text when checking "is X on the page?" or locating a clickable target. Pierces open shadow roots. Pass \`frame: "iframe.selector"\` for same-origin iframe search.`,
+    `Search the active page for text and return actionable matches (text, surrounding context, best-effort CSS selector, clickable flag). Use this instead of get_page_text when checking "is X on the page?" or locating a clickable target. Pierces open AND closed shadow roots. Pass \`frame: "iframe.selector"\` for same-origin iframe search.
+When visible_only=true (the default) filters out all matches AND there were hidden matches, the response surfaces the hidden count so you can re-run with visible_only=false instead of guessing "is this on the page or not?"
+Scope helpers: \`in_dialog: true\` restricts the search to the topmost open dialog; \`dialog_query: "Select"\` restricts it to a dialog whose heading or aria-label matches. Mirrors click_element's dialog scoping so the same flag works across discovery and action.`,
     {
       query: external_exports.string().describe("Text to search for. Substring by default; regex=true \u2192 case-insensitive regex."),
       max: external_exports.number().int().min(1).optional().describe("Maximum matches to return (default 5). total_matches is reported even when truncated."),
@@ -25763,9 +25903,12 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
       regex: external_exports.boolean().optional().describe("Treat query as regex (case-insensitive). Default false."),
       visible_only: external_exports.boolean().optional().describe("Skip display:none / visibility:hidden / aria-hidden=true. Default true."),
       context_chars: external_exports.number().int().min(0).optional().describe("Surrounding context chars per match (default 40)."),
-      frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside.")
+      frame: external_exports.string().optional().describe("Same-origin iframe CSS selector to search inside."),
+      in_dialog: external_exports.boolean().optional().describe("Scope to the topmost open [role=dialog] / [role=alertdialog] / <dialog open>. Mirrors click_element."),
+      dialog_query: external_exports.string().optional().describe("Scope to the dialog whose heading or aria-label contains this substring. Mirrors click_element."),
+      whole_word: external_exports.boolean().optional().describe(`Gate matches on word boundaries. Use for common English words ("Live", "New", "Done", "Confirm") that would otherwise substring-match unrelated pre-rendered content (e.g. "Live" matching "delivery", "Done" matching "abandoned"). Default false to preserve the substring-by-default contract; flip on whenever your query is a single common word that may also appear inside larger words.`)
     },
-    async ({ query, max, scope_selector, regex, visible_only, context_chars, frame }) => {
+    async ({ query, max, scope_selector, regex, visible_only, context_chars, frame, in_dialog, dialog_query, whole_word }) => {
       const response = await bridge.request({
         type: "find_text",
         query,
@@ -25774,7 +25917,10 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
         regex,
         visible_only,
         context_chars: context_chars ?? 40,
-        frame
+        frame,
+        in_dialog,
+        dialog_query,
+        whole_word
       });
       const r = response;
       if (r.frame_error) {
@@ -25788,9 +25934,11 @@ Examples: scroll_to_element("#submit-btn"), scroll_to_element("Billing address")
         };
       }
       if (r.matches.length === 0) {
+        const hidden = r.hidden_count ?? 0;
+        const hint = visible_only !== false && hidden > 0 ? ` ${hidden} hidden match(es) skipped (display:none / visibility:hidden / aria-hidden / off-viewport). Set visible_only=false to include them.` : "";
         return {
           content: [
-            { type: "text", text: `No matches found for "${query}".` }
+            { type: "text", text: `No visible matches found for "${query}".${hint}` }
           ]
         };
       }
@@ -25900,7 +26048,7 @@ ${lines.join("\n")}${shadowSection}` }] };
 }
 // packages/mcp-server/src/index.ts
-var PACKAGE_VERSION = true ? "0.9.11" : "dev";
+var PACKAGE_VERSION = true ? "0.10.0" : "dev";
 main().catch((err) => {
   console.error("[chromeflow] Fatal error:", err);
   process.exit(1);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "chromeflow",
-  "version": "0.9.11",
+  "version": "0.10.0",
   "description": "MCP server for chromeflow — lets Claude Code or Codex CLI drive your real Chrome browser with sessions intact. Plugin install recommended; npx chromeflow for manual MCP wiring.",
   "type": "module",
   "main": "./bin/chromeflow.mjs",