hypha-debugger 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8903,6 +8903,196 @@
8903
8903
  }
8904
8904
  }
8905
8905
 
8906
+ /**
8907
+ * Animated AI cursor overlay.
8908
+ * Shows a smooth-moving cursor with click ripple animation.
8909
+ * Adapted from @page-agent/page-controller (MIT License).
8910
+ *
8911
+ * The cursor is injected as a fixed overlay and listens for
8912
+ * custom events dispatched by the page-controller actions.
8913
+ */
8914
+ // SVG cursor graphics (inlined to avoid external file dependencies)
8915
+ const CURSOR_BORDER_SVG = `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" fill="none"><g><path d="M 15 42 L 15 36.99 Q 15 31.99 23.7 31.99 L 28.05 31.99 Q 32.41 31.99 32.41 21.99 L 32.41 17 Q 32.41 12 41.09 16.95 L 76.31 37.05 Q 85 42 76.31 46.95 L 41.09 67.05 Q 32.41 72 32.41 62.01 L 32.41 57.01 Q 32.41 52.01 23.7 52.01 L 19.35 52.01 Q 15 52.01 15 47.01 Z" fill="none" stroke="currentColor" stroke-width="6" stroke-miterlimit="10"/></g></svg>`;
8916
+ const CURSOR_FILL_SVG = `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><g style="filter: drop-shadow(rgba(0, 0, 0, 0.3) 3px 4px 4px);"><path d="M 15 42 L 15 36.99 Q 15 31.99 23.7 31.99 L 28.05 31.99 Q 32.41 31.99 32.41 21.99 L 32.41 17 Q 32.41 12 41.09 16.95 L 76.31 37.05 Q 85 42 76.31 46.95 L 41.09 67.05 Q 32.41 72 32.41 62.01 L 32.41 57.01 Q 32.41 52.01 23.7 52.01 L 19.35 52.01 Q 15 52.01 15 47.01 Z" fill="#ffffff" stroke="none"/></g></svg>`;
8917
+ const CURSOR_CSS = `
8918
+ .hypha-cursor {
8919
+ position: fixed;
8920
+ width: 50px;
8921
+ height: 50px;
8922
+ pointer-events: none;
8923
+ z-index: 2147483646;
8924
+ transition: opacity 0.2s;
8925
+ opacity: 0;
8926
+ }
8927
+ .hypha-cursor.visible {
8928
+ opacity: 1;
8929
+ }
8930
+ .hypha-cursor-border {
8931
+ position: absolute;
8932
+ width: 100%;
8933
+ height: 100%;
8934
+ background: linear-gradient(45deg, rgb(57, 182, 255), rgb(189, 69, 251));
8935
+ mask-image: var(--cursor-border);
8936
+ -webkit-mask-image: var(--cursor-border);
8937
+ mask-size: 100% 100%;
8938
+ -webkit-mask-size: 100% 100%;
8939
+ mask-repeat: no-repeat;
8940
+ -webkit-mask-repeat: no-repeat;
8941
+ transform-origin: center;
8942
+ transform: rotate(-135deg) scale(1.2);
8943
+ margin-left: -10px;
8944
+ margin-top: -14px;
8945
+ }
8946
+ .hypha-cursor-fill {
8947
+ position: absolute;
8948
+ width: 100%;
8949
+ height: 100%;
8950
+ background-image: var(--cursor-fill);
8951
+ background-size: 100% 100%;
8952
+ background-repeat: no-repeat;
8953
+ transform-origin: center;
8954
+ transform: rotate(-135deg) scale(1.2);
8955
+ margin-left: -10px;
8956
+ margin-top: -14px;
8957
+ }
8958
+ .hypha-cursor-ripple {
8959
+ position: absolute;
8960
+ width: 100%;
8961
+ height: 100%;
8962
+ pointer-events: none;
8963
+ margin-left: -50%;
8964
+ margin-top: -50%;
8965
+ }
8966
+ .hypha-cursor-ripple::after {
8967
+ content: '';
8968
+ opacity: 0;
8969
+ position: absolute;
8970
+ inset: 0;
8971
+ border: 3px solid rgba(57, 182, 255, 1);
8972
+ border-radius: 50%;
8973
+ }
8974
+ .hypha-cursor.clicking .hypha-cursor-ripple::after {
8975
+ animation: hypha-cursor-ripple 400ms ease-out forwards;
8976
+ }
8977
+ @keyframes hypha-cursor-ripple {
8978
+ 0% { transform: scale(0); opacity: 1; }
8979
+ 100% { transform: scale(2.5); opacity: 0; }
8980
+ }
8981
+ `;
8982
+ class AICursor {
8983
+ constructor() {
8984
+ this.currentX = 0;
8985
+ this.currentY = 0;
8986
+ this.targetX = 0;
8987
+ this.targetY = 0;
8988
+ this.animating = false;
8989
+ this.visible = false;
8990
+ this.hideTimeout = null;
8991
+ // Create container (not in Shadow DOM — needs to be on top of everything)
8992
+ this.container = document.createElement("div");
8993
+ this.container.id = "hypha-debugger-cursor";
8994
+ this.container.setAttribute("data-browser-use-ignore", "true");
8995
+ this.container.setAttribute("data-page-agent-ignore", "true");
8996
+ // Inject styles
8997
+ const style = document.createElement("style");
8998
+ style.textContent = CURSOR_CSS;
8999
+ this.container.appendChild(style);
9000
+ // Create cursor element
9001
+ this.cursor = document.createElement("div");
9002
+ this.cursor.className = "hypha-cursor";
9003
+ // Set SVG as CSS custom properties (data URIs for mask-image)
9004
+ const borderDataUri = "url(\"data:image/svg+xml," +
9005
+ encodeURIComponent(CURSOR_BORDER_SVG) +
9006
+ '")';
9007
+ const fillDataUri = "url(\"data:image/svg+xml," +
9008
+ encodeURIComponent(CURSOR_FILL_SVG) +
9009
+ '")';
9010
+ this.cursor.style.setProperty("--cursor-border", borderDataUri);
9011
+ this.cursor.style.setProperty("--cursor-fill", fillDataUri);
9012
+ // Ripple layer (behind cursor)
9013
+ const ripple = document.createElement("div");
9014
+ ripple.className = "hypha-cursor-ripple";
9015
+ this.cursor.appendChild(ripple);
9016
+ // Fill layer (white arrow with shadow)
9017
+ const fill = document.createElement("div");
9018
+ fill.className = "hypha-cursor-fill";
9019
+ this.cursor.appendChild(fill);
9020
+ // Border layer (gradient)
9021
+ const border = document.createElement("div");
9022
+ border.className = "hypha-cursor-border";
9023
+ this.cursor.appendChild(border);
9024
+ this.container.appendChild(this.cursor);
9025
+ document.body.appendChild(this.container);
9026
+ // Listen for move/click events from actions
9027
+ window.addEventListener("HyphaDebugger::MovePointerTo", ((event) => {
9028
+ const { x, y } = event.detail;
9029
+ this.moveTo(x, y);
9030
+ }));
9031
+ window.addEventListener("HyphaDebugger::ClickPointer", () => {
9032
+ this.triggerClickAnimation();
9033
+ });
9034
+ }
9035
+ moveTo(x, y) {
9036
+ this.targetX = x;
9037
+ this.targetY = y;
9038
+ // Show cursor
9039
+ if (!this.visible) {
9040
+ this.visible = true;
9041
+ this.currentX = x;
9042
+ this.currentY = y;
9043
+ this.cursor.style.left = `${x}px`;
9044
+ this.cursor.style.top = `${y}px`;
9045
+ this.cursor.classList.add("visible");
9046
+ }
9047
+ // Cancel any pending hide
9048
+ if (this.hideTimeout) {
9049
+ clearTimeout(this.hideTimeout);
9050
+ this.hideTimeout = null;
9051
+ }
9052
+ // Start animation loop if not running
9053
+ if (!this.animating) {
9054
+ this.animating = true;
9055
+ this.animateLoop();
9056
+ }
9057
+ }
9058
+ animateLoop() {
9059
+ const ease = 0.18;
9060
+ const dx = this.targetX - this.currentX;
9061
+ const dy = this.targetY - this.currentY;
9062
+ if (Math.abs(dx) > 1 || Math.abs(dy) > 1) {
9063
+ this.currentX += dx * ease;
9064
+ this.currentY += dy * ease;
9065
+ this.cursor.style.left = `${this.currentX}px`;
9066
+ this.cursor.style.top = `${this.currentY}px`;
9067
+ requestAnimationFrame(() => this.animateLoop());
9068
+ }
9069
+ else {
9070
+ // Snap to target
9071
+ this.currentX = this.targetX;
9072
+ this.currentY = this.targetY;
9073
+ this.cursor.style.left = `${this.currentX}px`;
9074
+ this.cursor.style.top = `${this.currentY}px`;
9075
+ this.animating = false;
9076
+ // Auto-hide cursor after 2s of inactivity
9077
+ this.hideTimeout = setTimeout(() => {
9078
+ this.visible = false;
9079
+ this.cursor.classList.remove("visible");
9080
+ }, 2000);
9081
+ }
9082
+ }
9083
+ triggerClickAnimation() {
9084
+ this.cursor.classList.remove("clicking");
9085
+ // Force reflow to restart CSS animation
9086
+ void this.cursor.offsetHeight;
9087
+ this.cursor.classList.add("clicking");
9088
+ }
9089
+ destroy() {
9090
+ if (this.hideTimeout)
9091
+ clearTimeout(this.hideTimeout);
9092
+ this.container.remove();
9093
+ }
9094
+ }
9095
+
8906
9096
  /**
8907
9097
  * Environment detection and page metadata collection.
8908
9098
  */
@@ -9099,7 +9289,7 @@
9099
9289
  required: ["selector"],
9100
9290
  },
9101
9291
  };
9102
- function clickElement(selector) {
9292
+ function clickElement$1(selector) {
9103
9293
  const el = document.querySelector(selector);
9104
9294
  if (!el) {
9105
9295
  return { success: false, message: `No element found for selector: ${selector}` };
@@ -9114,7 +9304,7 @@
9114
9304
  }));
9115
9305
  return { success: true, message: `Clicked element: ${selector}` };
9116
9306
  }
9117
- clickElement.__schema__ = {
9307
+ clickElement$1.__schema__ = {
9118
9308
  name: "clickElement",
9119
9309
  description: "Click a DOM element matching the CSS selector.",
9120
9310
  parameters: {
@@ -10500,29 +10690,79 @@
10500
10690
  "",
10501
10691
  "# Web Debugger Skill",
10502
10692
  "",
10503
- "This skill allows you to remotely debug and interact with a web page through a set of HTTP API endpoints.",
10693
+ "This skill allows you to remotely debug and interact with a web page through HTTP API endpoints.",
10504
10694
  "",
10505
- "## How to call functions",
10695
+ "## Recommended Workflow (Index-Based Interaction)",
10696
+ "",
10697
+ "The most reliable way to interact with a page is using the smart DOM analysis:",
10506
10698
  "",
10507
- "All functions are available as HTTP endpoints. Use the service URL provided in the instructions.",
10699
+ "### Step 1: Observe the page",
10700
+ "```bash",
10701
+ `curl '{SERVICE_URL}/get_browser_state'`,
10702
+ "```",
10703
+ "This returns all interactive elements indexed as `[0]`, `[1]`, `[2]`, etc.",
10704
+ "Elements are detected via smart heuristics: CSS cursor, ARIA roles, event listeners, tag names.",
10705
+ "Visual highlight labels are overlaid on the page for each detected element.",
10508
10706
  "",
10509
- "**GET request** (for functions with no required parameters):",
10707
+ "Example output:",
10510
10708
  "```",
10511
- `curl '{SERVICE_URL}/get_page_info?_mode=last' -H 'Authorization: Bearer {TOKEN}'`,
10709
+ "[0]<a aria-label=Home>Home />",
10710
+ "[1]<input placeholder=Search... />",
10711
+ "[2]<button>Sign In />",
10712
+ "[3]<select name=language>English />",
10713
+ "[4]<div data-scrollable=\"top=200, bottom=1500\">Content area />",
10512
10714
  "```",
10513
10715
  "",
10514
- "**POST request** (for functions with parameters):",
10716
+ "### Step 2: Act on elements by index",
10717
+ "```bash",
10718
+ "# Click a button (e.g. [2] Sign In):",
10719
+ `curl -X POST '{SERVICE_URL}/click_element_by_index' \\`,
10720
+ ` -H 'Content-Type: application/json' -d '{"index": 2}'`,
10721
+ "",
10722
+ "# Type into an input (e.g. [1] Search):",
10723
+ `curl -X POST '{SERVICE_URL}/input_text' \\`,
10724
+ ` -H 'Content-Type: application/json' -d '{"index": 1, "text": "hello world"}'`,
10725
+ "",
10726
+ "# Select a dropdown option (e.g. [3] Language):",
10727
+ `curl -X POST '{SERVICE_URL}/select_option' \\`,
10728
+ ` -H 'Content-Type: application/json' -d '{"index": 3, "option_text": "French"}'`,
10729
+ "",
10730
+ "# Scroll down:",
10731
+ `curl -X POST '{SERVICE_URL}/scroll' \\`,
10732
+ ` -H 'Content-Type: application/json' -d '{"direction": "down"}'`,
10733
+ "",
10734
+ "# Scroll a specific container (e.g. [4]):",
10735
+ `curl -X POST '{SERVICE_URL}/scroll' \\`,
10736
+ ` -H 'Content-Type: application/json' -d '{"direction": "down", "index": 4}'`,
10515
10737
  "```",
10516
- `curl -X POST '{SERVICE_URL}/query_dom?_mode=last' \\`,
10517
- ` -H 'Authorization: Bearer {TOKEN}' \\`,
10518
- ` -H 'Content-Type: application/json' \\`,
10519
- ` -d '{"selector": "button"}'`,
10738
+ "",
10739
+ "### Step 3: Verify",
10740
+ "```bash",
10741
+ `curl '{SERVICE_URL}/take_screenshot'`,
10742
+ "```",
10743
+ "",
10744
+ "### Remove visual highlights (optional, for clean screenshots)",
10745
+ "```bash",
10746
+ `curl '{SERVICE_URL}/remove_highlights'`,
10520
10747
  "```",
10521
10748
  "",
10522
- "Replace `{SERVICE_URL}` and `{TOKEN}` with the actual values from the instruction block.",
10749
+ "## CSS Selector-Based Functions (Alternative)",
10750
+ "",
10751
+ "You can also use CSS selectors directly for precise targeting:",
10752
+ "```bash",
10753
+ `curl -X POST '{SERVICE_URL}/click_element' \\`,
10754
+ ` -H 'Content-Type: application/json' -d '{"selector": "button.submit"}'`,
10755
+ "",
10756
+ `curl -X POST '{SERVICE_URL}/fill_input' \\`,
10757
+ ` -H 'Content-Type: application/json' -d '{"selector": "#email", "value": "user@example.com"}'`,
10758
+ "```",
10759
+ "",
10760
+ "## How to call functions",
10761
+ "",
10762
+ "All functions are available as HTTP endpoints. Replace `{SERVICE_URL}` with the actual service URL.",
10523
10763
  "",
10524
- "**Note:** The `_mode=last` query parameter ensures the latest debugger instance is used,",
10525
- "even if multiple sessions have connected to the same workspace.",
10764
+ "- **GET** for functions with no required parameters",
10765
+ "- **POST** with JSON body for functions with parameters",
10526
10766
  "",
10527
10767
  ].join("\n");
10528
10768
  // Build the function reference
@@ -10565,8 +10805,7 @@
10565
10805
  }
10566
10806
  functionDocs.push("**Example:**");
10567
10807
  functionDocs.push("```bash");
10568
- functionDocs.push(`curl -X POST '{SERVICE_URL}/${name}?_mode=last' \\`);
10569
- functionDocs.push(` -H 'Authorization: Bearer {TOKEN}' \\`);
10808
+ functionDocs.push(`curl -X POST '{SERVICE_URL}/${name}' \\`);
10570
10809
  functionDocs.push(` -H 'Content-Type: application/json' \\`);
10571
10810
  functionDocs.push(` -d '${JSON.stringify(exampleParams)}'`);
10572
10811
  functionDocs.push("```");
@@ -10574,7 +10813,7 @@
10574
10813
  else {
10575
10814
  functionDocs.push("**Example:**");
10576
10815
  functionDocs.push("```bash");
10577
- functionDocs.push(`curl '{SERVICE_URL}/${name}?_mode=last' -H 'Authorization: Bearer {TOKEN}'`);
10816
+ functionDocs.push(`curl '{SERVICE_URL}/${name}'`);
10578
10817
  functionDocs.push("```");
10579
10818
  }
10580
10819
  }
@@ -10583,7 +10822,7 @@
10583
10822
  functionDocs.push("");
10584
10823
  functionDocs.push("**Example:**");
10585
10824
  functionDocs.push("```bash");
10586
- functionDocs.push(`curl '{SERVICE_URL}/${name}?_mode=last' -H 'Authorization: Bearer {TOKEN}'`);
10825
+ functionDocs.push(`curl '{SERVICE_URL}/${name}'`);
10587
10826
  functionDocs.push("```");
10588
10827
  }
10589
10828
  functionDocs.push("");
@@ -10591,35 +10830,2780 @@
10591
10830
  const tips = [
10592
10831
  "## Tips",
10593
10832
  "",
10594
- "- **Start with `get_page_info`** to understand the page structure, URL, title, and viewport.",
10595
- "- **Use `query_dom`** with CSS selectors to find elements before clicking or filling them.",
10596
- "- **Use `take_screenshot`** to visually verify the page state.",
10833
+ "- **Start with `get_browser_state`** — it's the best way to understand what's on the page and what you can interact with.",
10834
+ "- **Prefer index-based interaction** (`click_element_by_index`, `input_text`, `select_option`) over CSS selectors indices are more reliable across dynamic pages.",
10835
+ "- **After each action, call `get_browser_state` again** element indices change when the DOM updates.",
10836
+ "- **Use `take_screenshot`** to visually verify the page state. Call `remove_highlights` first for a clean view.",
10597
10837
  "- **Use `execute_script`** for anything not covered by the built-in functions — it runs arbitrary JavaScript.",
10838
+ "- **Use `scroll`** with an element index to scroll inside a specific container (e.g. a chat window, sidebar).",
10598
10839
  "- **Use `get_page_info` with `include_logs=true`** to check for JavaScript errors or debug output.",
10599
10840
  "- **Use `get_react_tree`** if the page uses React — it gives you component names, props, and state.",
10600
10841
  "- All POST endpoints accept JSON body with the parameter names as keys.",
10601
- "- All endpoints require the `Authorization: Bearer {TOKEN}` header.",
10602
10842
  "",
10603
10843
  ].join("\n");
10604
10844
  return [frontmatter, intro, functionDocs.join("\n"), tips].join("\n");
10605
10845
  }
10606
10846
 
10847
+ /**
10848
+ * @file port from browser-use
10849
+ * @see https://github.com/browser-use/browser-use/commits/main/browser_use/dom/dom_tree/index.js
10850
+ * @match 0.5.9 d51b6e73daff7165fdd3e44debd667e7f5f7fdc5
10851
+ *
10852
+ * search @edit for all the changed lines.
10853
+ *
10854
+ * @edit export
10855
+ * @edit add interactiveBlacklist interactiveWhitelist
10856
+ * @edit adjustable opacity
10857
+ * @edit direct dom ref
10858
+ * @edit @workaround input.checked
10859
+ * @edit smaller zIndex for highlight
10860
+ * @edit no need for xpath
10861
+ * @edit add `extra` field for extra data
10862
+ * @edit scrollable element detection
10863
+ * @edit add `data-browser-use-ignore` attribute
10864
+ * @edit improve `sampleRect`, filter out rects with 0 area
10865
+ * @edit exclude aria-hidden elements
10866
+ * @edit make sure attributes exist for interactive candidates.
10867
+ */
10868
+
10869
+ var domTree = (
10870
+ args = {
10871
+ doHighlightElements: true,
10872
+ focusHighlightIndex: -1,
10873
+ viewportExpansion: 0,
10874
+ debugMode: false,
10875
+
10876
+ /**
10877
+ * @edit
10878
+ */
10879
+ /** @type {Element[]} */
10880
+ interactiveBlacklist: [],
10881
+ /** @type {Element[]} */
10882
+ interactiveWhitelist: [],
10883
+ highlightOpacity: 0.1,
10884
+ highlightLabelOpacity: 0.5,
10885
+ }
10886
+ ) => {
10887
+ /**
10888
+ * @edit
10889
+ */
10890
+ const { interactiveBlacklist, interactiveWhitelist, highlightOpacity, highlightLabelOpacity } =
10891
+ args;
10892
+
10893
+ const { doHighlightElements, focusHighlightIndex, viewportExpansion, debugMode } = args;
10894
+ let highlightIndex = 0; // Reset highlight index
10895
+
10896
+ /**
10897
+ * @edit add `extra` field for extra data
10898
+ */
10899
+ const extraData = new WeakMap();
10900
+ function addExtraData(element, data) {
10901
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) return
10902
+ extraData.set(element, { ...extraData.get(element), ...data });
10903
+ }
10904
+
10905
+ // Add caching mechanisms at the top level
10906
+ const DOM_CACHE = {
10907
+ boundingRects: new WeakMap(),
10908
+ clientRects: new WeakMap(),
10909
+ computedStyles: new WeakMap(),
10910
+ clearCache: () => {
10911
+ DOM_CACHE.boundingRects = new WeakMap();
10912
+ DOM_CACHE.clientRects = new WeakMap();
10913
+ DOM_CACHE.computedStyles = new WeakMap();
10914
+ },
10915
+ };
10916
+
10917
+ /**
10918
+ * Gets the cached bounding rect for an element.
10919
+ *
10920
+ * @param {HTMLElement} element - The element to get the bounding rect for.
10921
+ * @returns {DOMRect | null} The cached bounding rect, or null if the element is not found.
10922
+ */
10923
+ function getCachedBoundingRect(element) {
10924
+ if (!element) return null
10925
+
10926
+ if (DOM_CACHE.boundingRects.has(element)) {
10927
+ return DOM_CACHE.boundingRects.get(element)
10928
+ }
10929
+
10930
+ const rect = element.getBoundingClientRect();
10931
+
10932
+ if (rect) {
10933
+ DOM_CACHE.boundingRects.set(element, rect);
10934
+ }
10935
+ return rect
10936
+ }
10937
+
10938
+ /**
10939
+ * Gets the cached computed style for an element.
10940
+ *
10941
+ * @param {HTMLElement} element - The element to get the computed style for.
10942
+ * @returns {CSSStyleDeclaration | null} The cached computed style, or null if the element is not found.
10943
+ */
10944
+ function getCachedComputedStyle(element) {
10945
+ if (!element) return null
10946
+
10947
+ if (DOM_CACHE.computedStyles.has(element)) {
10948
+ return DOM_CACHE.computedStyles.get(element)
10949
+ }
10950
+
10951
+ const style = window.getComputedStyle(element);
10952
+
10953
+ if (style) {
10954
+ DOM_CACHE.computedStyles.set(element, style);
10955
+ }
10956
+ return style
10957
+ }
10958
+
10959
+ /**
10960
+ * Gets the cached client rects for an element.
10961
+ *
10962
+ * @param {HTMLElement} element - The element to get the client rects for.
10963
+ * @returns {DOMRectList | null} The cached client rects, or null if the element is not found.
10964
+ */
10965
+ function getCachedClientRects(element) {
10966
+ if (!element) return null
10967
+
10968
+ if (DOM_CACHE.clientRects.has(element)) {
10969
+ return DOM_CACHE.clientRects.get(element)
10970
+ }
10971
+
10972
+ const rects = element.getClientRects();
10973
+
10974
+ if (rects) {
10975
+ DOM_CACHE.clientRects.set(element, rects);
10976
+ }
10977
+ return rects
10978
+ }
10979
+
10980
+ /**
10981
+ * Hash map of DOM nodes indexed by their highlight index.
10982
+ *
10983
+ * @type {Object<string, any>}
10984
+ */
10985
+ const DOM_HASH_MAP = {};
10986
+
10987
+ const ID = { current: 0 };
10988
+
10989
+ const HIGHLIGHT_CONTAINER_ID = 'playwright-highlight-container';
10990
+
10991
+ // // Initialize once and reuse
10992
+ // const viewportObserver = new IntersectionObserver(
10993
+ // (entries) => {
10994
+ // entries.forEach(entry => {
10995
+ // elementVisibilityMap.set(entry.target, entry.isIntersecting);
10996
+ // });
10997
+ // },
10998
+ // { rootMargin: `${viewportExpansion}px` }
10999
+ // );
11000
+
11001
+ /**
11002
+ * Highlights an element in the DOM and returns the index of the next element.
11003
+ *
11004
+ * @param {HTMLElement} element - The element to highlight.
11005
+ * @param {number} index - The index of the element.
11006
+ * @param {HTMLElement | null} parentIframe - The parent iframe node.
11007
+ * @returns {number} The index of the next element.
11008
+ */
11009
+ function highlightElement(element, index, parentIframe = null) {
11010
+ if (!element) return index
11011
+
11012
+ const overlays = [];
11013
+ /**
11014
+ * @type {HTMLElement | null}
11015
+ */
11016
+ let label = null;
11017
+ let labelWidth = 20;
11018
+ let labelHeight = 16;
11019
+ let cleanupFn = null;
11020
+
11021
+ try {
11022
+ // Create or get highlight container
11023
+ let container = document.getElementById(HIGHLIGHT_CONTAINER_ID);
11024
+ if (!container) {
11025
+ container = document.createElement('div');
11026
+ container.id = HIGHLIGHT_CONTAINER_ID;
11027
+ container.style.position = 'fixed';
11028
+ container.style.pointerEvents = 'none';
11029
+ container.style.top = '0';
11030
+ container.style.left = '0';
11031
+ container.style.width = '100%';
11032
+ container.style.height = '100%';
11033
+
11034
+ /**
11035
+ * @edit smaller zIndex for highlight
11036
+ */
11037
+ // Use the maximum valid value in zIndex to ensure the element is not blocked by overlapping elements.
11038
+ // container.style.zIndex = "2147483647";
11039
+ container.style.zIndex = '2147483640';
11040
+
11041
+ container.style.backgroundColor = 'transparent';
11042
+ document.body.appendChild(container);
11043
+ }
11044
+
11045
+ // Get element client rects
11046
+ const rects = element.getClientRects(); // Use getClientRects()
11047
+
11048
+ if (!rects || rects.length === 0) return index // Exit if no rects
11049
+
11050
+ // Generate a color based on the index
11051
+ const colors = [
11052
+ '#FF0000',
11053
+ '#00FF00',
11054
+ '#0000FF',
11055
+ '#FFA500',
11056
+ '#800080',
11057
+ '#008080',
11058
+ '#FF69B4',
11059
+ '#4B0082',
11060
+ '#FF4500',
11061
+ '#2E8B57',
11062
+ '#DC143C',
11063
+ '#4682B4',
11064
+ ];
11065
+ const colorIndex = index % colors.length;
11066
+ let baseColor = colors[colorIndex];
11067
+
11068
+ /**
11069
+ * @edit adjustable opacity
11070
+ */
11071
+ // const backgroundColor = baseColor + "1A"; // 10% opacity version of the color
11072
+ const backgroundColor =
11073
+ baseColor +
11074
+ Math.floor(highlightOpacity * 255)
11075
+ .toString(16)
11076
+ .padStart(2, '0');
11077
+ baseColor =
11078
+ baseColor +
11079
+ Math.floor(highlightLabelOpacity * 255)
11080
+ .toString(16)
11081
+ .padStart(2, '0');
11082
+
11083
+ // Get iframe offset if necessary
11084
+ let iframeOffset = { x: 0, y: 0 };
11085
+ if (parentIframe) {
11086
+ const iframeRect = parentIframe.getBoundingClientRect(); // Keep getBoundingClientRect for iframe offset
11087
+ iframeOffset.x = iframeRect.left;
11088
+ iframeOffset.y = iframeRect.top;
11089
+ }
11090
+
11091
+ // Create fragment to hold overlay elements
11092
+ const fragment = document.createDocumentFragment();
11093
+
11094
+ // Create highlight overlays for each client rect
11095
+ for (const rect of rects) {
11096
+ if (rect.width === 0 || rect.height === 0) continue // Skip empty rects
11097
+
11098
+ const overlay = document.createElement('div');
11099
+ overlay.style.position = 'fixed';
11100
+ overlay.style.border = `2px solid ${baseColor}`;
11101
+ overlay.style.backgroundColor = backgroundColor;
11102
+ overlay.style.pointerEvents = 'none';
11103
+ overlay.style.boxSizing = 'border-box';
11104
+
11105
+ const top = rect.top + iframeOffset.y;
11106
+ const left = rect.left + iframeOffset.x;
11107
+
11108
+ overlay.style.top = `${top}px`;
11109
+ overlay.style.left = `${left}px`;
11110
+ overlay.style.width = `${rect.width}px`;
11111
+ overlay.style.height = `${rect.height}px`;
11112
+
11113
+ fragment.appendChild(overlay);
11114
+ overlays.push({ element: overlay, initialRect: rect }); // Store overlay and its rect
11115
+ }
11116
+
11117
+ // Create and position a single label relative to the first rect
11118
+ const firstRect = rects[0];
11119
+ label = document.createElement('div');
11120
+ label.className = 'playwright-highlight-label';
11121
+ label.style.position = 'fixed';
11122
+ label.style.background = baseColor;
11123
+ label.style.color = 'white';
11124
+ label.style.padding = '1px 4px';
11125
+ label.style.borderRadius = '4px';
11126
+ label.style.fontSize = `${Math.min(12, Math.max(8, firstRect.height / 2))}px`;
11127
+ label.textContent = index.toString();
11128
+
11129
+ labelWidth = label.offsetWidth > 0 ? label.offsetWidth : labelWidth; // Update actual width if possible
11130
+ labelHeight = label.offsetHeight > 0 ? label.offsetHeight : labelHeight; // Update actual height if possible
11131
+
11132
+ const firstRectTop = firstRect.top + iframeOffset.y;
11133
+ const firstRectLeft = firstRect.left + iframeOffset.x;
11134
+
11135
+ let labelTop = firstRectTop + 2;
11136
+ let labelLeft = firstRectLeft + firstRect.width - labelWidth - 2;
11137
+
11138
+ // Adjust label position if first rect is too small
11139
+ if (firstRect.width < labelWidth + 4 || firstRect.height < labelHeight + 4) {
11140
+ labelTop = firstRectTop - labelHeight - 2;
11141
+ labelLeft = firstRectLeft + firstRect.width - labelWidth; // Align with right edge
11142
+ if (labelLeft < iframeOffset.x) labelLeft = firstRectLeft; // Prevent going off-left
11143
+ }
11144
+
11145
+ // Ensure label stays within viewport bounds slightly better
11146
+ labelTop = Math.max(0, Math.min(labelTop, window.innerHeight - labelHeight));
11147
+ labelLeft = Math.max(0, Math.min(labelLeft, window.innerWidth - labelWidth));
11148
+
11149
+ label.style.top = `${labelTop}px`;
11150
+ label.style.left = `${labelLeft}px`;
11151
+
11152
+ fragment.appendChild(label);
11153
+
11154
+ // Update positions on scroll/resize
11155
+ const updatePositions = () => {
11156
+ const newRects = element.getClientRects(); // Get fresh rects
11157
+ let newIframeOffset = { x: 0, y: 0 };
11158
+
11159
+ if (parentIframe) {
11160
+ const iframeRect = parentIframe.getBoundingClientRect(); // Keep getBoundingClientRect for iframe
11161
+ newIframeOffset.x = iframeRect.left;
11162
+ newIframeOffset.y = iframeRect.top;
11163
+ }
11164
+
11165
+ // Update each overlay
11166
+ overlays.forEach((overlayData, i) => {
11167
+ if (i < newRects.length) {
11168
+ // Check if rect still exists
11169
+ const newRect = newRects[i];
11170
+ const newTop = newRect.top + newIframeOffset.y;
11171
+ const newLeft = newRect.left + newIframeOffset.x;
11172
+
11173
+ overlayData.element.style.top = `${newTop}px`;
11174
+ overlayData.element.style.left = `${newLeft}px`;
11175
+ overlayData.element.style.width = `${newRect.width}px`;
11176
+ overlayData.element.style.height = `${newRect.height}px`;
11177
+ overlayData.element.style.display =
11178
+ newRect.width === 0 || newRect.height === 0 ? 'none' : 'block';
11179
+ } else {
11180
+ // If fewer rects now, hide extra overlays
11181
+ overlayData.element.style.display = 'none';
11182
+ }
11183
+ });
11184
+
11185
+ // If there are fewer new rects than overlays, hide the extras
11186
+ if (newRects.length < overlays.length) {
11187
+ for (let i = newRects.length; i < overlays.length; i++) {
11188
+ overlays[i].element.style.display = 'none';
11189
+ }
11190
+ }
11191
+
11192
+ // Update label position based on the first new rect
11193
+ if (label && newRects.length > 0) {
11194
+ const firstNewRect = newRects[0];
11195
+ const firstNewRectTop = firstNewRect.top + newIframeOffset.y;
11196
+ const firstNewRectLeft = firstNewRect.left + newIframeOffset.x;
11197
+
11198
+ let newLabelTop = firstNewRectTop + 2;
11199
+ let newLabelLeft = firstNewRectLeft + firstNewRect.width - labelWidth - 2;
11200
+
11201
+ if (firstNewRect.width < labelWidth + 4 || firstNewRect.height < labelHeight + 4) {
11202
+ newLabelTop = firstNewRectTop - labelHeight - 2;
11203
+ newLabelLeft = firstNewRectLeft + firstNewRect.width - labelWidth;
11204
+ if (newLabelLeft < newIframeOffset.x) newLabelLeft = firstNewRectLeft;
11205
+ }
11206
+
11207
+ // Ensure label stays within viewport bounds
11208
+ newLabelTop = Math.max(0, Math.min(newLabelTop, window.innerHeight - labelHeight));
11209
+ newLabelLeft = Math.max(0, Math.min(newLabelLeft, window.innerWidth - labelWidth));
11210
+
11211
+ label.style.top = `${newLabelTop}px`;
11212
+ label.style.left = `${newLabelLeft}px`;
11213
+ label.style.display = 'block';
11214
+ } else if (label) {
11215
+ // Hide label if element has no rects anymore
11216
+ label.style.display = 'none';
11217
+ }
11218
+ };
11219
+
11220
+ const throttleFunction = (func, delay) => {
11221
+ let lastCall = 0;
11222
+ return (...args) => {
11223
+ const now = performance.now();
11224
+ if (now - lastCall < delay) return
11225
+ lastCall = now;
11226
+ return func(...args)
11227
+ }
11228
+ };
11229
+
11230
+ const throttledUpdatePositions = throttleFunction(updatePositions, 16); // ~60fps
11231
+ window.addEventListener('scroll', throttledUpdatePositions, true);
11232
+ window.addEventListener('resize', throttledUpdatePositions);
11233
+
11234
+ // Add cleanup function
11235
+ cleanupFn = () => {
11236
+ window.removeEventListener('scroll', throttledUpdatePositions, true);
11237
+ window.removeEventListener('resize', throttledUpdatePositions);
11238
+ // Remove overlay elements if needed
11239
+ overlays.forEach((overlay) => overlay.element.remove());
11240
+ if (label) label.remove();
11241
+ };
11242
+
11243
+ // Then add fragment to container in one operation
11244
+ container.appendChild(fragment);
11245
+
11246
+ return index + 1
11247
+ } finally {
11248
+ // Store cleanup function for later use
11249
+ if (cleanupFn) {
11250
+ (window._highlightCleanupFunctions = window._highlightCleanupFunctions || []).push(
11251
+ cleanupFn
11252
+ );
11253
+ }
11254
+ }
11255
+ }
11256
+
11257
+ /**
11258
+ * @edit scrollable element detection
11259
+ * Checks if an element is scrollable. if so, return the scrollable distance on each direction (left right top bottom). if not return null.
11260
+ * @note distance smaller than 4 will be considered as not scrollable.
11261
+ * @note only check block elements, not inline elements.
11262
+ */
11263
+ function isScrollableElement(element) {
11264
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) {
11265
+ return null // Not a valid element
11266
+ }
11267
+
11268
+ const style = getCachedComputedStyle(element);
11269
+ if (!style) return null
11270
+
11271
+ // Check if the element is a block-level element
11272
+ const display = style.display;
11273
+ if (display === 'inline' || display === 'inline-block') {
11274
+ return null // Not a block-level element
11275
+ }
11276
+
11277
+ // Check overflow properties
11278
+ const overflowX = style.overflowX;
11279
+ const overflowY = style.overflowY;
11280
+
11281
+ // Check scrollable distances
11282
+ const scrollableX = overflowX === 'auto' || overflowX === 'scroll';
11283
+ const scrollableY = overflowY === 'auto' || overflowY === 'scroll';
11284
+
11285
+ if (!scrollableX && !scrollableY) {
11286
+ return null // Not scrollable in any direction
11287
+ }
11288
+
11289
+ const scrollWidth = element.scrollWidth - element.clientWidth;
11290
+ const scrollHeight = element.scrollHeight - element.clientHeight;
11291
+
11292
+ // Consider small distances as not scrollable
11293
+ const threshold = 4;
11294
+
11295
+ if (scrollWidth < threshold && scrollHeight < threshold) {
11296
+ return null // Not scrollable
11297
+ }
11298
+
11299
+ if (!scrollableY && scrollWidth < threshold) {
11300
+ return null // Not scrollable horizontally
11301
+ }
11302
+
11303
+ if (!scrollableX && scrollHeight < threshold) {
11304
+ return null // Not scrollable vertically
11305
+ }
11306
+
11307
+ const distanceToTop = element.scrollTop;
11308
+ const distanceToLeft = element.scrollLeft;
11309
+ const distanceToRight = element.scrollWidth - element.clientWidth - element.scrollLeft;
11310
+ const distanceToBottom = element.scrollHeight - element.clientHeight - element.scrollTop;
11311
+
11312
+ const scrollData = {
11313
+ top: distanceToTop,
11314
+ right: distanceToRight,
11315
+ bottom: distanceToBottom,
11316
+ left: distanceToLeft,
11317
+ };
11318
+
11319
+ // Store extra data for the element
11320
+ addExtraData(element, {
11321
+ scrollable: true,
11322
+ scrollData: scrollData,
11323
+ });
11324
+
11325
+ return scrollData
11326
+ }
11327
+
11328
+ /**
11329
+ * Checks if a text node is visible.
11330
+ *
11331
+ * @param {Text} textNode - The text node to check.
11332
+ * @returns {boolean} Whether the text node is visible.
11333
+ */
11334
+ function isTextNodeVisible(textNode) {
11335
+ try {
11336
+ // Special case: when viewportExpansion is -1, consider all text nodes as visible
11337
+ if (viewportExpansion === -1) {
11338
+ // Still check parent visibility for basic filtering
11339
+ const parentElement = textNode.parentElement;
11340
+ if (!parentElement) return false
11341
+
11342
+ try {
11343
+ return parentElement.checkVisibility({
11344
+ checkOpacity: true,
11345
+ checkVisibilityCSS: true,
11346
+ })
11347
+ } catch (e) {
11348
+ // Fallback if checkVisibility is not supported
11349
+ const style = window.getComputedStyle(parentElement);
11350
+ return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'
11351
+ }
11352
+ }
11353
+
11354
+ const range = document.createRange();
11355
+ range.selectNodeContents(textNode);
11356
+ const rects = range.getClientRects(); // Use getClientRects for Range
11357
+
11358
+ if (!rects || rects.length === 0) {
11359
+ return false
11360
+ }
11361
+
11362
+ let isAnyRectVisible = false;
11363
+ let isAnyRectInViewport = false;
11364
+
11365
+ for (const rect of rects) {
11366
+ // Check size
11367
+ if (rect.width > 0 && rect.height > 0) {
11368
+ isAnyRectVisible = true;
11369
+
11370
+ // Viewport check for this rect
11371
+ if (
11372
+ !(
11373
+ rect.bottom < -viewportExpansion ||
11374
+ rect.top > window.innerHeight + viewportExpansion ||
11375
+ rect.right < -viewportExpansion ||
11376
+ rect.left > window.innerWidth + viewportExpansion
11377
+ )
11378
+ ) {
11379
+ isAnyRectInViewport = true;
11380
+ break // Found a visible rect in viewport, no need to check others
11381
+ }
11382
+ }
11383
+ }
11384
+
11385
+ if (!isAnyRectVisible || !isAnyRectInViewport) {
11386
+ return false
11387
+ }
11388
+
11389
+ // Check parent visibility
11390
+ const parentElement = textNode.parentElement;
11391
+ if (!parentElement) return false
11392
+
11393
+ try {
11394
+ return parentElement.checkVisibility({
11395
+ checkOpacity: true,
11396
+ checkVisibilityCSS: true,
11397
+ })
11398
+ } catch (e) {
11399
+ // Fallback if checkVisibility is not supported
11400
+ const style = window.getComputedStyle(parentElement);
11401
+ return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0'
11402
+ }
11403
+ } catch (e) {
11404
+ console.warn('Error checking text node visibility:', e);
11405
+ return false
11406
+ }
11407
+ }
11408
+
11409
+ /**
11410
+ * Checks if an element is accepted.
11411
+ *
11412
+ * @param {HTMLElement} element - The element to check.
11413
+ * @returns {boolean} Whether the element is accepted.
11414
+ */
11415
+ function isElementAccepted(element) {
11416
+ if (!element || !element.tagName) return false
11417
+
11418
+ // Always accept body and common container elements
11419
+ const alwaysAccept = new Set([
11420
+ 'body',
11421
+ 'div',
11422
+ 'main',
11423
+ 'article',
11424
+ 'section',
11425
+ 'nav',
11426
+ 'header',
11427
+ 'footer',
11428
+ ]);
11429
+ const tagName = element.tagName.toLowerCase();
11430
+
11431
+ if (alwaysAccept.has(tagName)) return true
11432
+
11433
+ const leafElementDenyList = new Set([
11434
+ 'svg',
11435
+ 'script',
11436
+ 'style',
11437
+ 'link',
11438
+ 'meta',
11439
+ 'noscript',
11440
+ 'template',
11441
+ ]);
11442
+
11443
+ return !leafElementDenyList.has(tagName)
11444
+ }
11445
+
11446
+ /**
11447
+ * Checks if an element is visible.
11448
+ *
11449
+ * @param {HTMLElement} element - The element to check.
11450
+ * @returns {boolean} Whether the element is visible.
11451
+ */
11452
+ function isElementVisible(element) {
11453
+ const style = getCachedComputedStyle(element);
11454
+ return (
11455
+ element.offsetWidth > 0 &&
11456
+ element.offsetHeight > 0 &&
11457
+ style?.visibility !== 'hidden' &&
11458
+ style?.display !== 'none'
11459
+ )
11460
+ }
11461
+
11462
+ /**
11463
+ * Checks if an element is interactive.
11464
+ *
11465
+ * lots of comments, and uncommented code - to show the logic of what we already tried
11466
+ *
11467
+ * One of the things we tried at the beginning was also to use event listeners, and other fancy class, style stuff -> what actually worked best was just combining most things with computed cursor style :)
11468
+ *
11469
+ * @param {HTMLElement} element - The element to check.
11470
+ */
11471
+ function isInteractiveElement(element) {
11472
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) {
11473
+ return false
11474
+ }
11475
+
11476
+ /**
11477
+ * @edit add interactiveBlacklist interactiveWhitelist
11478
+ */
11479
+ if (interactiveBlacklist.includes(element)) {
11480
+ return false // Skip blacklisted elements
11481
+ }
11482
+ if (interactiveWhitelist.includes(element)) {
11483
+ return true // Skip whitelisted elements
11484
+ }
11485
+
11486
+ // Cache the tagName and style lookups
11487
+ const tagName = element.tagName.toLowerCase();
11488
+ const style = getCachedComputedStyle(element);
11489
+
11490
+ // Define interactive cursors
11491
+ const interactiveCursors = new Set([
11492
+ 'pointer', // Link/clickable elements
11493
+ 'move', // Movable elements
11494
+ 'text', // Text selection
11495
+ 'grab', // Grabbable elements
11496
+ 'grabbing', // Currently grabbing
11497
+ 'cell', // Table cell selection
11498
+ 'copy', // Copy operation
11499
+ 'alias', // Alias creation
11500
+ 'all-scroll', // Scrollable content
11501
+ 'col-resize', // Column resize
11502
+ 'context-menu', // Context menu available
11503
+ 'crosshair', // Precise selection
11504
+ 'e-resize', // East resize
11505
+ 'ew-resize', // East-west resize
11506
+ 'help', // Help available
11507
+ 'n-resize', // North resize
11508
+ 'ne-resize', // Northeast resize
11509
+ 'nesw-resize', // Northeast-southwest resize
11510
+ 'ns-resize', // North-south resize
11511
+ 'nw-resize', // Northwest resize
11512
+ 'nwse-resize', // Northwest-southeast resize
11513
+ 'row-resize', // Row resize
11514
+ 's-resize', // South resize
11515
+ 'se-resize', // Southeast resize
11516
+ 'sw-resize', // Southwest resize
11517
+ 'vertical-text', // Vertical text selection
11518
+ 'w-resize', // West resize
11519
+ 'zoom-in', // Zoom in
11520
+ 'zoom-out', // Zoom out
11521
+ ]);
11522
+
11523
+ // Define non-interactive cursors
11524
+ const nonInteractiveCursors = new Set([
11525
+ 'not-allowed', // Action not allowed
11526
+ 'no-drop', // Drop not allowed
11527
+ 'wait', // Processing
11528
+ 'progress', // In progress
11529
+ 'initial', // Initial value
11530
+ 'inherit', // Inherited value
11531
+ //? Let's just include all potentially clickable elements that are not specifically blocked
11532
+ // 'none', // No cursor
11533
+ // 'default', // Default cursor
11534
+ // 'auto', // Browser default
11535
+ ]);
11536
+
11537
+ /**
11538
+ * Checks if an element has an interactive pointer.
11539
+ *
11540
+ * @param {HTMLElement} element - The element to check.
11541
+ * @returns {boolean} Whether the element has an interactive pointer.
11542
+ */
11543
+ function doesElementHaveInteractivePointer(element) {
11544
+ if (element.tagName.toLowerCase() === 'html') return false
11545
+
11546
+ if (style?.cursor && interactiveCursors.has(style.cursor)) return true
11547
+
11548
+ return false
11549
+ }
11550
+
11551
+ let isInteractiveCursor = doesElementHaveInteractivePointer(element);
11552
+
11553
+ // Genius fix for almost all interactive elements
11554
+ if (isInteractiveCursor) {
11555
+ return true
11556
+ }
11557
+
11558
+ const interactiveElements = new Set([
11559
+ 'a', // Links
11560
+ 'button', // Buttons
11561
+ 'input', // All input types (text, checkbox, radio, etc.)
11562
+ 'select', // Dropdown menus
11563
+ 'textarea', // Text areas
11564
+ 'details', // Expandable details
11565
+ 'summary', // Summary element (clickable part of details)
11566
+ 'label', // Form labels (often clickable)
11567
+ 'option', // Select options
11568
+ 'optgroup', // Option groups
11569
+ 'fieldset', // Form fieldsets (can be interactive with legend)
11570
+ 'legend', // Fieldset legends
11571
+ ]);
11572
+
11573
+ // Define explicit disable attributes and properties
11574
+ const explicitDisableTags = new Set([
11575
+ 'disabled', // Standard disabled attribute
11576
+ // 'aria-disabled', // ARIA disabled state
11577
+ 'readonly', // Read-only state
11578
+ // 'aria-readonly', // ARIA read-only state
11579
+ // 'aria-hidden', // Hidden from accessibility
11580
+ // 'hidden', // Hidden attribute
11581
+ // 'inert', // Inert attribute
11582
+ // 'aria-inert', // ARIA inert state
11583
+ // 'tabindex="-1"', // Removed from tab order
11584
+ // 'aria-hidden="true"' // Hidden from screen readers
11585
+ ]);
11586
+
11587
+ // handle inputs, select, checkbox, radio, textarea, button and make sure they are not cursor style disabled/not-allowed
11588
+ if (interactiveElements.has(tagName)) {
11589
+ // Check for non-interactive cursor
11590
+ if (style?.cursor && nonInteractiveCursors.has(style.cursor)) {
11591
+ return false
11592
+ }
11593
+
11594
+ // Check for explicit disable attributes
11595
+ for (const disableTag of explicitDisableTags) {
11596
+ if (
11597
+ element.hasAttribute(disableTag) ||
11598
+ element.getAttribute(disableTag) === 'true' ||
11599
+ element.getAttribute(disableTag) === ''
11600
+ ) {
11601
+ return false
11602
+ }
11603
+ }
11604
+
11605
+ // Check for disabled property on form elements
11606
+ if (element.disabled) {
11607
+ return false
11608
+ }
11609
+
11610
+ // Check for readonly property on form elements
11611
+ if (element.readOnly) {
11612
+ return false
11613
+ }
11614
+
11615
+ // Check for inert property
11616
+ if (element.inert) {
11617
+ return false
11618
+ }
11619
+
11620
+ return true
11621
+ }
11622
+
11623
+ const role = element.getAttribute('role');
11624
+ const ariaRole = element.getAttribute('aria-role');
11625
+
11626
+ // Check for contenteditable attribute
11627
+ if (element.getAttribute('contenteditable') === 'true' || element.isContentEditable) {
11628
+ return true
11629
+ }
11630
+
11631
+ // Added enhancement to capture dropdown interactive elements
11632
+ if (
11633
+ element.classList &&
11634
+ (element.classList.contains('button') ||
11635
+ element.classList.contains('dropdown-toggle') ||
11636
+ element.getAttribute('data-index') ||
11637
+ element.getAttribute('data-toggle') === 'dropdown' ||
11638
+ element.getAttribute('aria-haspopup') === 'true')
11639
+ ) {
11640
+ return true
11641
+ }
11642
+
11643
+ const interactiveRoles = new Set([
11644
+ 'button', // Directly clickable element
11645
+ // 'link', // Clickable link
11646
+ 'menu', // Menu container (ARIA menus)
11647
+ 'menubar', // Menu bar container
11648
+ 'menuitem', // Clickable menu item
11649
+ 'menuitemradio', // Radio-style menu item (selectable)
11650
+ 'menuitemcheckbox', // Checkbox-style menu item (toggleable)
11651
+ 'radio', // Radio button (selectable)
11652
+ 'checkbox', // Checkbox (toggleable)
11653
+ 'tab', // Tab (clickable to switch content)
11654
+ 'switch', // Toggle switch (clickable to change state)
11655
+ 'slider', // Slider control (draggable)
11656
+ 'spinbutton', // Number input with up/down controls
11657
+ 'combobox', // Dropdown with text input
11658
+ 'searchbox', // Search input field
11659
+ 'textbox', // Text input field
11660
+ 'listbox', // Selectable list
11661
+ 'option', // Selectable option in a list
11662
+ 'scrollbar', // Scrollable control
11663
+ ]);
11664
+
11665
+ // Basic role/attribute checks
11666
+ const hasInteractiveRole =
11667
+ interactiveElements.has(tagName) ||
11668
+ (role && interactiveRoles.has(role)) ||
11669
+ (ariaRole && interactiveRoles.has(ariaRole));
11670
+
11671
+ if (hasInteractiveRole) return true
11672
+
11673
+ // check whether element has event listeners by window.getEventListeners
11674
+ try {
11675
+ if (typeof getEventListeners === 'function') {
11676
+ const listeners = getEventListeners(element);
11677
+ const mouseEvents = ['click', 'mousedown', 'mouseup', 'dblclick'];
11678
+ for (const eventType of mouseEvents) {
11679
+ if (listeners[eventType] && listeners[eventType].length > 0) {
11680
+ return true // Found a mouse interaction listener
11681
+ }
11682
+ }
11683
+ }
11684
+
11685
+ const getEventListenersForNode =
11686
+ element?.ownerDocument?.defaultView?.getEventListenersForNode ||
11687
+ window.getEventListenersForNode;
11688
+ if (typeof getEventListenersForNode === 'function') {
11689
+ const listeners = getEventListenersForNode(element);
11690
+ const interactionEvents = [
11691
+ 'click',
11692
+ 'mousedown',
11693
+ 'mouseup',
11694
+ 'keydown',
11695
+ 'keyup',
11696
+ 'submit',
11697
+ 'change',
11698
+ 'input',
11699
+ 'focus',
11700
+ 'blur',
11701
+ ];
11702
+ for (const eventType of interactionEvents) {
11703
+ for (const listener of listeners) {
11704
+ if (listener.type === eventType) {
11705
+ return true // Found a common interaction listener
11706
+ }
11707
+ }
11708
+ }
11709
+ }
11710
+ // Fallback: Check common event attributes if getEventListeners is not available (getEventListeners doesn't work in page.evaluate context)
11711
+ const commonMouseAttrs = ['onclick', 'onmousedown', 'onmouseup', 'ondblclick'];
11712
+ for (const attr of commonMouseAttrs) {
11713
+ if (element.hasAttribute(attr) || typeof element[attr] === 'function') {
11714
+ return true
11715
+ }
11716
+ }
11717
+ } catch (e) {
11718
+ // console.warn(`Could not check event listeners for ${element.tagName}:`, e);
11719
+ // If checking listeners fails, rely on other checks
11720
+ }
11721
+
11722
+ /**
11723
+ * @edit scrollable element detection
11724
+ */
11725
+ if (isScrollableElement(element)) {
11726
+ return true
11727
+ }
11728
+
11729
+ return false
11730
+ }
11731
+
11732
+ /**
11733
+ * Checks if an element is the topmost element at its position.
11734
+ *
11735
+ * @param {HTMLElement} element - The element to check.
11736
+ * @returns {boolean} Whether the element is the topmost element at its position.
11737
+ */
11738
+ function isTopElement(element) {
11739
+ // Special case: when viewportExpansion is -1, consider all elements as "top" elements
11740
+ if (viewportExpansion === -1) {
11741
+ return true
11742
+ }
11743
+
11744
+ const rects = getCachedClientRects(element); // Replace element.getClientRects()
11745
+
11746
+ if (!rects || rects.length === 0) {
11747
+ return false // No geometry, cannot be top
11748
+ }
11749
+
11750
+ let isAnyRectInViewport = false;
11751
+ for (const rect of rects) {
11752
+ // Use the same logic as isInExpandedViewport check
11753
+ if (
11754
+ rect.width > 0 &&
11755
+ rect.height > 0 &&
11756
+ !(
11757
+ // Only check non-empty rects
11758
+ (
11759
+ rect.bottom < -viewportExpansion ||
11760
+ rect.top > window.innerHeight + viewportExpansion ||
11761
+ rect.right < -viewportExpansion ||
11762
+ rect.left > window.innerWidth + viewportExpansion
11763
+ )
11764
+ )
11765
+ ) {
11766
+ isAnyRectInViewport = true;
11767
+ break
11768
+ }
11769
+ }
11770
+
11771
+ if (!isAnyRectInViewport) {
11772
+ return false // All rects are outside the viewport area
11773
+ }
11774
+
11775
+ // Find the correct document context and root element
11776
+ let doc = element.ownerDocument;
11777
+
11778
+ // If we're in an iframe, elements are considered top by default
11779
+ if (doc !== window.document) {
11780
+ return true
11781
+ }
11782
+
11783
+ /**
11784
+ * @edit improve `sampleRect`, filter out rects with 0 area
11785
+ */
11786
+ // find a rect that has width and height as sample
11787
+ let rect = Array.from(rects).find((r) => r.width > 0 && r.height > 0);
11788
+ if (!rect) {
11789
+ return false // No valid rect found
11790
+ }
11791
+
11792
+ // For shadow DOM, we need to check within its own root context
11793
+ const shadowRoot = element.getRootNode();
11794
+ if (shadowRoot instanceof ShadowRoot) {
11795
+ const centerX = rect.left + rect.width / 2;
11796
+ const centerY = rect.top + rect.height / 2;
11797
+
11798
+ try {
11799
+ const topEl = shadowRoot.elementFromPoint(centerX, centerY);
11800
+ if (!topEl) return false
11801
+
11802
+ let current = topEl;
11803
+ while (current && current !== shadowRoot) {
11804
+ if (current === element) return true
11805
+ current = current.parentElement;
11806
+ }
11807
+ return false
11808
+ } catch (e) {
11809
+ return true
11810
+ }
11811
+ }
11812
+
11813
+ const margin = 5;
11814
+
11815
+ // For elements in viewport, check if they're topmost. Do the check in the
11816
+ // center of the element and at the corners to ensure we catch more cases.
11817
+ const checkPoints = [
11818
+ // Initially only this was used, but it was not enough
11819
+ { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 },
11820
+ { x: rect.left + margin, y: rect.top + margin }, // top left
11821
+ // { x: rect.right - margin, y: rect.top + margin }, // top right
11822
+ // { x: rect.left + margin, y: rect.bottom - margin }, // bottom left
11823
+ { x: rect.right - margin, y: rect.bottom - margin }, // bottom right
11824
+ ];
11825
+
11826
+ return checkPoints.some(({ x, y }) => {
11827
+ try {
11828
+ const topEl = document.elementFromPoint(x, y);
11829
+ if (!topEl) return false
11830
+
11831
+ let current = topEl;
11832
+ while (current && current !== document.documentElement) {
11833
+ if (current === element) return true
11834
+ current = current.parentElement;
11835
+ }
11836
+ return false
11837
+ } catch (e) {
11838
+ return true
11839
+ }
11840
+ })
11841
+ }
11842
+
11843
+ /**
11844
+ * Checks if an element is within the expanded viewport.
11845
+ *
11846
+ * @param {HTMLElement} element - The element to check.
11847
+ * @param {number} viewportExpansion - The viewport expansion.
11848
+ * @returns {boolean} Whether the element is within the expanded viewport.
11849
+ */
11850
+ function isInExpandedViewport(element, viewportExpansion) {
11851
+ if (viewportExpansion === -1) {
11852
+ return true
11853
+ }
11854
+
11855
+ const rects = element.getClientRects(); // Use getClientRects
11856
+
11857
+ if (!rects || rects.length === 0) {
11858
+ // Fallback to getBoundingClientRect if getClientRects is empty,
11859
+ // useful for elements like <svg> that might not have client rects but have a bounding box.
11860
+ const boundingRect = getCachedBoundingRect(element);
11861
+ if (!boundingRect || boundingRect.width === 0 || boundingRect.height === 0) {
11862
+ return false
11863
+ }
11864
+ return !(
11865
+ boundingRect.bottom < -viewportExpansion ||
11866
+ boundingRect.top > window.innerHeight + viewportExpansion ||
11867
+ boundingRect.right < -viewportExpansion ||
11868
+ boundingRect.left > window.innerWidth + viewportExpansion
11869
+ )
11870
+ }
11871
+
11872
+ // Check if *any* client rect is within the viewport
11873
+ for (const rect of rects) {
11874
+ if (rect.width === 0 || rect.height === 0) continue // Skip empty rects
11875
+
11876
+ if (
11877
+ !(
11878
+ rect.bottom < -viewportExpansion ||
11879
+ rect.top > window.innerHeight + viewportExpansion ||
11880
+ rect.right < -viewportExpansion ||
11881
+ rect.left > window.innerWidth + viewportExpansion
11882
+ )
11883
+ ) {
11884
+ return true // Found at least one rect in the viewport
11885
+ }
11886
+ }
11887
+
11888
+ return false // No rects were found in the viewport
11889
+ }
11890
+
11891
+ // /**
11892
+ // * Gets the effective scroll of an element.
11893
+ // *
11894
+ // * @param {HTMLElement} element - The element to get the effective scroll for.
11895
+ // * @returns {Object} The effective scroll of the element.
11896
+ // */
11897
+ // function getEffectiveScroll(element) {
11898
+ // let currentEl = element;
11899
+ // let scrollX = 0;
11900
+ // let scrollY = 0;
11901
+
11902
+ // while (currentEl && currentEl !== document.documentElement) {
11903
+ // if (currentEl.scrollLeft || currentEl.scrollTop) {
11904
+ // scrollX += currentEl.scrollLeft;
11905
+ // scrollY += currentEl.scrollTop;
11906
+ // }
11907
+ // currentEl = currentEl.parentElement;
11908
+ // }
11909
+
11910
+ // scrollX += window.scrollX;
11911
+ // scrollY += window.scrollY;
11912
+
11913
+ // return { scrollX, scrollY };
11914
+ // }
11915
+
11916
+ /**
11917
+ * Checks if an element is an interactive candidate.
11918
+ *
11919
+ * @param {HTMLElement} element - The element to check.
11920
+ * @returns {boolean} Whether the element is an interactive candidate.
11921
+ */
11922
+ function isInteractiveCandidate(element) {
11923
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) return false
11924
+
11925
+ const tagName = element.tagName.toLowerCase();
11926
+
11927
+ // Fast-path for common interactive elements
11928
+ const interactiveElements = new Set([
11929
+ 'a',
11930
+ 'button',
11931
+ 'input',
11932
+ 'select',
11933
+ 'textarea',
11934
+ 'details',
11935
+ 'summary',
11936
+ 'label',
11937
+ ]);
11938
+
11939
+ if (interactiveElements.has(tagName)) return true
11940
+
11941
+ // Quick attribute checks without getting full lists
11942
+ const hasQuickInteractiveAttr =
11943
+ element.hasAttribute('onclick') ||
11944
+ element.hasAttribute('role') ||
11945
+ element.hasAttribute('tabindex') ||
11946
+ element.hasAttribute('aria-') ||
11947
+ element.hasAttribute('data-action') ||
11948
+ element.getAttribute('contenteditable') === 'true';
11949
+
11950
+ return hasQuickInteractiveAttr
11951
+ }
11952
+
11953
+ // --- Define constants for distinct interaction check ---
11954
+ const DISTINCT_INTERACTIVE_TAGS = new Set([
11955
+ 'a',
11956
+ 'button',
11957
+ 'input',
11958
+ 'select',
11959
+ 'textarea',
11960
+ 'summary',
11961
+ 'details',
11962
+ 'label',
11963
+ 'option',
11964
+ ]);
11965
+ const INTERACTIVE_ROLES = new Set([
11966
+ 'button',
11967
+ 'link',
11968
+ 'menuitem',
11969
+ 'menuitemradio',
11970
+ 'menuitemcheckbox',
11971
+ 'radio',
11972
+ 'checkbox',
11973
+ 'tab',
11974
+ 'switch',
11975
+ 'slider',
11976
+ 'spinbutton',
11977
+ 'combobox',
11978
+ 'searchbox',
11979
+ 'textbox',
11980
+ 'listbox',
11981
+ 'option',
11982
+ 'scrollbar',
11983
+ ]);
11984
+
11985
+ /**
11986
+ * Heuristically determines if an element should be considered as independently interactive,
11987
+ * even if it's nested inside another interactive container.
11988
+ *
11989
+ * This function helps detect deeply nested actionable elements (e.g., menu items within a button)
11990
+ * that may not be picked up by strict interactivity checks.
11991
+ *
11992
+ * @param {HTMLElement} element - The element to check.
11993
+ * @returns {boolean} Whether the element is heuristically interactive.
11994
+ */
11995
+ function isHeuristicallyInteractive(element) {
11996
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) return false
11997
+
11998
+ // Skip non-visible elements early for performance
11999
+ if (!isElementVisible(element)) return false
12000
+
12001
+ // Check for common attributes that often indicate interactivity
12002
+ const hasInteractiveAttributes =
12003
+ element.hasAttribute('role') ||
12004
+ element.hasAttribute('tabindex') ||
12005
+ element.hasAttribute('onclick') ||
12006
+ typeof element.onclick === 'function';
12007
+
12008
+ // Check for semantic class names suggesting interactivity
12009
+ const hasInteractiveClass = /\b(btn|clickable|menu|item|entry|link)\b/i.test(
12010
+ element.className || ''
12011
+ );
12012
+
12013
+ // Determine whether the element is inside a known interactive container
12014
+ const isInKnownContainer = Boolean(
12015
+ element.closest('button,a,[role="button"],.menu,.dropdown,.list,.toolbar')
12016
+ );
12017
+
12018
+ // Ensure the element has at least one visible child (to avoid marking empty wrappers)
12019
+ const hasVisibleChildren = [...element.children].some(isElementVisible);
12020
+
12021
+ // Avoid highlighting elements whose parent is <body> (top-level wrappers)
12022
+ const isParentBody = element.parentElement && element.parentElement.isSameNode(document.body);
12023
+
12024
+ return (
12025
+ (isInteractiveElement(element) || hasInteractiveAttributes || hasInteractiveClass) &&
12026
+ hasVisibleChildren &&
12027
+ isInKnownContainer &&
12028
+ !isParentBody
12029
+ )
12030
+ }
12031
+
12032
+ /**
12033
+ * Checks if an element likely represents a distinct interaction
12034
+ * separate from its parent (if the parent is also interactive).
12035
+ *
12036
+ * @param {HTMLElement} element - The element to check.
12037
+ * @returns {boolean} Whether the element is a distinct interaction.
12038
+ */
12039
+ function isElementDistinctInteraction(element) {
12040
+ if (!element || element.nodeType !== Node.ELEMENT_NODE) {
12041
+ return false
12042
+ }
12043
+
12044
+ const tagName = element.tagName.toLowerCase();
12045
+ const role = element.getAttribute('role');
12046
+
12047
+ // Check if it's an iframe - always distinct boundary
12048
+ if (tagName === 'iframe') {
12049
+ return true
12050
+ }
12051
+
12052
+ // Check tag name
12053
+ if (DISTINCT_INTERACTIVE_TAGS.has(tagName)) {
12054
+ return true
12055
+ }
12056
+ // Check interactive roles
12057
+ if (role && INTERACTIVE_ROLES.has(role)) {
12058
+ return true
12059
+ }
12060
+ // Check contenteditable
12061
+ if (element.isContentEditable || element.getAttribute('contenteditable') === 'true') {
12062
+ return true
12063
+ }
12064
+ // Check for common testing/automation attributes
12065
+ if (
12066
+ element.hasAttribute('data-testid') ||
12067
+ element.hasAttribute('data-cy') ||
12068
+ element.hasAttribute('data-test')
12069
+ ) {
12070
+ return true
12071
+ }
12072
+ // Check for explicit onclick handler (attribute or property)
12073
+ if (element.hasAttribute('onclick') || typeof element.onclick === 'function') {
12074
+ return true
12075
+ }
12076
+
12077
+ // return false
12078
+
12079
+ // Check for other common interaction event listeners
12080
+ try {
12081
+ const getEventListenersForNode =
12082
+ element?.ownerDocument?.defaultView?.getEventListenersForNode ||
12083
+ window.getEventListenersForNode;
12084
+ if (typeof getEventListenersForNode === 'function') {
12085
+ const listeners = getEventListenersForNode(element);
12086
+ const interactionEvents = [
12087
+ 'click',
12088
+ 'mousedown',
12089
+ 'mouseup',
12090
+ 'keydown',
12091
+ 'keyup',
12092
+ 'submit',
12093
+ 'change',
12094
+ 'input',
12095
+ 'focus',
12096
+ 'blur',
12097
+ ];
12098
+ for (const eventType of interactionEvents) {
12099
+ for (const listener of listeners) {
12100
+ if (listener.type === eventType) {
12101
+ return true // Found a common interaction listener
12102
+ }
12103
+ }
12104
+ }
12105
+ }
12106
+ // Fallback: Check common event attributes if getEventListeners is not available (getEventListenersForNode doesn't work in page.evaluate context)
12107
+ const commonEventAttrs = [
12108
+ 'onmousedown',
12109
+ 'onmouseup',
12110
+ 'onkeydown',
12111
+ 'onkeyup',
12112
+ 'onsubmit',
12113
+ 'onchange',
12114
+ 'oninput',
12115
+ 'onfocus',
12116
+ 'onblur',
12117
+ ];
12118
+ if (commonEventAttrs.some((attr) => element.hasAttribute(attr))) {
12119
+ return true
12120
+ }
12121
+ } catch (e) {
12122
+ // console.warn(`Could not check event listeners for ${element.tagName}:`, e);
12123
+ // If checking listeners fails, rely on other checks
12124
+ }
12125
+
12126
+ // if the element is not strictly interactive but appears clickable based on heuristic signals
12127
+ if (isHeuristicallyInteractive(element)) {
12128
+ return true
12129
+ }
12130
+
12131
+ // Default to false: if it's interactive but doesn't match above,
12132
+ // assume it triggers the same action as the parent.
12133
+ return false
12134
+ }
12135
+ // --- End distinct interaction check ---
12136
+
12137
+ /**
12138
+ * Handles the logic for deciding whether to highlight an element and performing the highlight.
12139
+ * @param {
12140
+ {
12141
+ tagName: string;
12142
+ attributes: Record<string, string>;
12143
+ xpath: any;
12144
+ children: never[];
12145
+ isVisible?: boolean;
12146
+ isTopElement?: boolean;
12147
+ isInteractive?: boolean;
12148
+ isInViewport?: boolean;
12149
+ highlightIndex?: number;
12150
+ shadowRoot?: boolean;
12151
+ }} nodeData - The node data object.
12152
+ * @param {HTMLElement} node - The node to highlight.
12153
+ * @param {HTMLElement | null} parentIframe - The parent iframe node.
12154
+ * @param {boolean} isParentHighlighted - Whether the parent node is highlighted.
12155
+ * @returns {boolean} Whether the element was highlighted.
12156
+ */
12157
+ function handleHighlighting(nodeData, node, parentIframe, isParentHighlighted) {
12158
+ if (!nodeData.isInteractive) return false // Not interactive, definitely don't highlight
12159
+
12160
+ let shouldHighlight = false;
12161
+ if (!isParentHighlighted) {
12162
+ // Parent wasn't highlighted, this interactive node can be highlighted.
12163
+ shouldHighlight = true;
12164
+ } else {
12165
+ // Parent *was* highlighted. Only highlight this node if it represents a distinct interaction.
12166
+ if (isElementDistinctInteraction(node)) {
12167
+ shouldHighlight = true;
12168
+ } else {
12169
+ // console.log(`Skipping highlight for ${nodeData.tagName} (parent highlighted)`);
12170
+ shouldHighlight = false;
12171
+ }
12172
+ }
12173
+
12174
+ if (shouldHighlight) {
12175
+ // Check viewport status before assigning index and highlighting
12176
+ nodeData.isInViewport = isInExpandedViewport(node, viewportExpansion);
12177
+
12178
+ // When viewportExpansion is -1, all interactive elements should get a highlight index
12179
+ // regardless of viewport status
12180
+ if (nodeData.isInViewport || viewportExpansion === -1) {
12181
+ nodeData.highlightIndex = highlightIndex++;
12182
+
12183
+ if (doHighlightElements) {
12184
+ if (focusHighlightIndex >= 0) {
12185
+ if (focusHighlightIndex === nodeData.highlightIndex) {
12186
+ highlightElement(node, nodeData.highlightIndex, parentIframe);
12187
+ }
12188
+ } else {
12189
+ highlightElement(node, nodeData.highlightIndex, parentIframe);
12190
+ }
12191
+ return true // Successfully highlighted
12192
+ }
12193
+ }
12194
+ }
12195
+
12196
+ return false // Did not highlight
12197
+ }
12198
+
12199
+ /**
12200
+ * Creates a node data object for a given node and its descendants.
12201
+ *
12202
+ * @param {HTMLElement} node - The node to process.
12203
+ * @param {HTMLElement | null} parentIframe - The parent iframe node.
12204
+ * @param {boolean} isParentHighlighted - Whether the parent node is highlighted.
12205
+ * @returns {string | null} The ID of the node data object, or null if the node is not processed.
12206
+ */
12207
+ function buildDomTree(node, parentIframe = null, isParentHighlighted = false) {
12208
+ // Fast rejection checks first
12209
+ if (
12210
+ !node ||
12211
+ node.id === HIGHLIGHT_CONTAINER_ID ||
12212
+ (node.nodeType !== Node.ELEMENT_NODE && node.nodeType !== Node.TEXT_NODE)
12213
+ ) {
12214
+ return null
12215
+ }
12216
+
12217
+ if (!node || node.id === HIGHLIGHT_CONTAINER_ID) {
12218
+ return null
12219
+ }
12220
+
12221
+ /**
12222
+ * @edit add `data-browser-use-ignore` attribute
12223
+ */
12224
+ if (node.dataset?.browserUseIgnore === 'true' || node.dataset?.pageAgentIgnore === 'true') {
12225
+ return null // Skip this node and its children
12226
+ }
12227
+
12228
+ /**
12229
+ * @edit exclude aria-hidden elements
12230
+ */
12231
+ if (node.getAttribute && node.getAttribute('aria-hidden') === 'true') {
12232
+ return null // Skip this node and its children
12233
+ }
12234
+
12235
+ // Special handling for root node (body)
12236
+ if (node === document.body) {
12237
+ const nodeData = {
12238
+ tagName: 'body',
12239
+ attributes: {},
12240
+ xpath: '/body',
12241
+ children: [],
12242
+ };
12243
+
12244
+ // Process children of body
12245
+ for (const child of node.childNodes) {
12246
+ const domElement = buildDomTree(child, parentIframe, false); // Body's children have no highlighted parent initially
12247
+ if (domElement) nodeData.children.push(domElement);
12248
+ }
12249
+
12250
+ const id = `${ID.current++}`;
12251
+ DOM_HASH_MAP[id] = nodeData;
12252
+ return id
12253
+ }
12254
+
12255
+ // Early bailout for non-element nodes except text
12256
+ if (node.nodeType !== Node.ELEMENT_NODE && node.nodeType !== Node.TEXT_NODE) {
12257
+ return null
12258
+ }
12259
+
12260
+ // Process text nodes
12261
+ if (node.nodeType === Node.TEXT_NODE) {
12262
+ const textContent = node.textContent?.trim();
12263
+ if (!textContent) {
12264
+ return null
12265
+ }
12266
+
12267
+ // Only check visibility for text nodes that might be visible
12268
+ const parentElement = node.parentElement;
12269
+ if (!parentElement || parentElement.tagName.toLowerCase() === 'script') {
12270
+ return null
12271
+ }
12272
+
12273
+ const id = `${ID.current++}`;
12274
+ DOM_HASH_MAP[id] = {
12275
+ type: 'TEXT_NODE',
12276
+ text: textContent,
12277
+ isVisible: isTextNodeVisible(node),
12278
+ };
12279
+ return id
12280
+ }
12281
+
12282
+ // Quick checks for element nodes
12283
+ if (node.nodeType === Node.ELEMENT_NODE && !isElementAccepted(node)) {
12284
+ return null
12285
+ }
12286
+
12287
+ // Early viewport check - only filter out elements clearly outside viewport
12288
+ // The getBoundingClientRect() of the Shadow DOM host element may return width/height = 0
12289
+ if (viewportExpansion !== -1 && !node.shadowRoot) {
12290
+ const rect = getCachedBoundingRect(node); // Keep for initial quick check
12291
+ const style = getCachedComputedStyle(node);
12292
+
12293
+ // Skip viewport check for fixed/sticky elements as they may appear anywhere
12294
+ const isFixedOrSticky = style && (style.position === 'fixed' || style.position === 'sticky');
12295
+
12296
+ // Check if element has actual dimensions using offsetWidth/Height (quick check)
12297
+ const hasSize = node.offsetWidth > 0 || node.offsetHeight > 0;
12298
+
12299
+ // Use getBoundingClientRect for the quick OUTSIDE check.
12300
+ // isInExpandedViewport will do the more accurate check later if needed.
12301
+ if (
12302
+ !rect ||
12303
+ (!isFixedOrSticky &&
12304
+ !hasSize &&
12305
+ (rect.bottom < -viewportExpansion ||
12306
+ rect.top > window.innerHeight + viewportExpansion ||
12307
+ rect.right < -viewportExpansion ||
12308
+ rect.left > window.innerWidth + viewportExpansion))
12309
+ ) {
12310
+ // console.log("Skipping node outside viewport (quick check):", node.tagName, rect);
12311
+ return null
12312
+ }
12313
+ }
12314
+
12315
+ /**
12316
+ * @type {
12317
+ {
12318
+ tagName: string;
12319
+ attributes: Record<string, string | null>;
12320
+ xpath: any;
12321
+ children: never[];
12322
+ isVisible?: boolean;
12323
+ isTopElement?: boolean;
12324
+ isInteractive?: boolean;
12325
+ isInViewport?: boolean;
12326
+ highlightIndex?: number;
12327
+ shadowRoot?: boolean;
12328
+ }
12329
+ } nodeData - The node data object.
12330
+ */
12331
+ const nodeData = {
12332
+ tagName: node.tagName.toLowerCase(),
12333
+ attributes: {},
12334
+
12335
+ /**
12336
+ * @edit no need for xpath
12337
+ */
12338
+ // xpath: getXPathTree(node, true),
12339
+
12340
+ children: [],
12341
+ };
12342
+
12343
+ // Get attributes for interactive elements or potential text containers
12344
+ if (
12345
+ isInteractiveCandidate(node) ||
12346
+ node.tagName.toLowerCase() === 'iframe' ||
12347
+ node.tagName.toLowerCase() === 'body'
12348
+ ) {
12349
+ const attributeNames = node.getAttributeNames?.() || [];
12350
+ for (const name of attributeNames) {
12351
+ const value = node.getAttribute(name);
12352
+ nodeData.attributes[name] = value;
12353
+ }
12354
+
12355
+ /**
12356
+ * @edit @workaround input.checked
12357
+ */
12358
+ if (
12359
+ node.tagName.toLowerCase() === 'input' &&
12360
+ (node.type === 'checkbox' || node.type === 'radio')
12361
+ ) {
12362
+ nodeData.attributes.checked = node.checked ? 'true' : 'false'; // Store as string for consistency
12363
+ }
12364
+ }
12365
+
12366
+ let nodeWasHighlighted = false;
12367
+ // Perform visibility, interactivity, and highlighting checks
12368
+ if (node.nodeType === Node.ELEMENT_NODE) {
12369
+ nodeData.isVisible = isElementVisible(node); // isElementVisible uses offsetWidth/Height, which is fine
12370
+ if (nodeData.isVisible) {
12371
+ nodeData.isTopElement = isTopElement(node);
12372
+
12373
+ // Special handling for ARIA menu containers - check interactivity even if not top element
12374
+ const role = node.getAttribute('role');
12375
+ const isMenuContainer = role === 'menu' || role === 'menubar' || role === 'listbox';
12376
+
12377
+ if (nodeData.isTopElement || isMenuContainer) {
12378
+ nodeData.isInteractive = isInteractiveElement(node);
12379
+ // Call the dedicated highlighting function
12380
+ nodeWasHighlighted = handleHighlighting(nodeData, node, parentIframe, isParentHighlighted);
12381
+
12382
+ /**
12383
+ * @edit direct dom ref
12384
+ */
12385
+ nodeData.ref = node;
12386
+
12387
+ /**
12388
+ * @edit make sure attributes exist for interactive candidates.
12389
+ * @note if the element failed the isInteractiveCandidate, attributes would be empty.
12390
+ */
12391
+ if (nodeData.isInteractive && Object.keys(nodeData.attributes).length === 0) {
12392
+ const attributeNames = node.getAttributeNames?.() || [];
12393
+ for (const name of attributeNames) {
12394
+ const value = node.getAttribute(name);
12395
+ nodeData.attributes[name] = value;
12396
+ }
12397
+ }
12398
+ }
12399
+ }
12400
+ }
12401
+
12402
+ // Process children, with special handling for iframes and rich text editors
12403
+ if (node.tagName) {
12404
+ const tagName = node.tagName.toLowerCase();
12405
+
12406
+ // Handle iframes
12407
+ if (tagName === 'iframe') {
12408
+ try {
12409
+ const iframeDoc = node.contentDocument || node.contentWindow?.document;
12410
+ if (iframeDoc) {
12411
+ for (const child of iframeDoc.childNodes) {
12412
+ const domElement = buildDomTree(child, node, false);
12413
+ if (domElement) nodeData.children.push(domElement);
12414
+ }
12415
+ }
12416
+ } catch (e) {
12417
+ console.warn('Unable to access iframe:', e);
12418
+ }
12419
+ }
12420
+ // Handle rich text editors and contenteditable elements
12421
+ else if (
12422
+ node.isContentEditable ||
12423
+ node.getAttribute('contenteditable') === 'true' ||
12424
+ node.id === 'tinymce' ||
12425
+ node.classList.contains('mce-content-body') ||
12426
+ (tagName === 'body' && node.getAttribute('data-id')?.startsWith('mce_'))
12427
+ ) {
12428
+ // Process all child nodes to capture formatted text
12429
+ for (const child of node.childNodes) {
12430
+ const domElement = buildDomTree(child, parentIframe, nodeWasHighlighted);
12431
+ if (domElement) nodeData.children.push(domElement);
12432
+ }
12433
+ } else {
12434
+ // Handle shadow DOM
12435
+ if (node.shadowRoot) {
12436
+ nodeData.shadowRoot = true;
12437
+ for (const child of node.shadowRoot.childNodes) {
12438
+ const domElement = buildDomTree(child, parentIframe, nodeWasHighlighted);
12439
+ if (domElement) nodeData.children.push(domElement);
12440
+ }
12441
+ }
12442
+ // Handle regular elements
12443
+ for (const child of node.childNodes) {
12444
+ // Pass the highlighted status of the *current* node to its children
12445
+ const passHighlightStatusToChild = nodeWasHighlighted || isParentHighlighted;
12446
+ const domElement = buildDomTree(child, parentIframe, passHighlightStatusToChild);
12447
+ if (domElement) nodeData.children.push(domElement);
12448
+ }
12449
+ }
12450
+ }
12451
+
12452
+ // Skip empty anchor tags only if they have no dimensions and no children
12453
+ if (nodeData.tagName === 'a' && nodeData.children.length === 0 && !nodeData.attributes.href) {
12454
+ // Check if the anchor has actual dimensions
12455
+ const rect = getCachedBoundingRect(node);
12456
+ const hasSize =
12457
+ (rect && rect.width > 0 && rect.height > 0) || node.offsetWidth > 0 || node.offsetHeight > 0;
12458
+
12459
+ if (!hasSize) {
12460
+ return null
12461
+ }
12462
+ }
12463
+
12464
+ /**
12465
+ * @edit add `extra` field for extra data
12466
+ */
12467
+ nodeData.extra = extraData.get(node) || null;
12468
+
12469
+ const id = `${ID.current++}`;
12470
+ DOM_HASH_MAP[id] = nodeData;
12471
+ return id
12472
+ }
12473
+
12474
+ const rootId = buildDomTree(document.body);
12475
+
12476
+ // Clear the cache before starting
12477
+ DOM_CACHE.clearCache();
12478
+
12479
+ return { rootId, map: DOM_HASH_MAP }
12480
+ };
12481
+
12482
+ /**
12483
+ * DOM tree utilities: build flat tree, convert to string, manage highlights.
12484
+ * Adapted from @page-agent/page-controller (MIT License).
12485
+ */
12486
+ const DEFAULT_VIEWPORT_EXPANSION = -1;
12487
+ function resolveViewportExpansion(viewportExpansion) {
12488
+ return viewportExpansion ?? DEFAULT_VIEWPORT_EXPANSION;
12489
+ }
12490
+ const newElementsCache = new WeakMap();
12491
+ function getFlatTree(config) {
12492
+ const viewportExpansion = resolveViewportExpansion(config.viewportExpansion);
12493
+ const interactiveBlacklist = [];
12494
+ for (const item of config.interactiveBlacklist || []) {
12495
+ if (typeof item === "function") {
12496
+ interactiveBlacklist.push(item());
12497
+ }
12498
+ else {
12499
+ interactiveBlacklist.push(item);
12500
+ }
12501
+ }
12502
+ const interactiveWhitelist = [];
12503
+ for (const item of config.interactiveWhitelist || []) {
12504
+ if (typeof item === "function") {
12505
+ interactiveWhitelist.push(item());
12506
+ }
12507
+ else {
12508
+ interactiveWhitelist.push(item);
12509
+ }
12510
+ }
12511
+ const elements = domTree({
12512
+ doHighlightElements: true,
12513
+ debugMode: true,
12514
+ focusHighlightIndex: -1,
12515
+ viewportExpansion,
12516
+ interactiveBlacklist,
12517
+ interactiveWhitelist,
12518
+ highlightOpacity: config.highlightOpacity ?? 0.0,
12519
+ highlightLabelOpacity: config.highlightLabelOpacity ?? 0.1,
12520
+ });
12521
+ for (const nodeId in elements.map) {
12522
+ const node = elements.map[nodeId];
12523
+ if (node.isInteractive && node.ref) {
12524
+ const ref = node.ref;
12525
+ if (!newElementsCache.has(ref)) {
12526
+ newElementsCache.set(ref, window.location.href);
12527
+ node.isNew = true;
12528
+ }
12529
+ }
12530
+ }
12531
+ return elements;
12532
+ }
12533
+ // ---- flatTreeToString ----
12534
+ const globRegexCache = new Map();
12535
+ function globToRegex(pattern) {
12536
+ let regex = globRegexCache.get(pattern);
12537
+ if (!regex) {
12538
+ const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&");
12539
+ regex = new RegExp(`^${escaped.replace(/\*/g, ".*")}$`);
12540
+ globRegexCache.set(pattern, regex);
12541
+ }
12542
+ return regex;
12543
+ }
12544
+ function matchAttributes(attrs, patterns) {
12545
+ const result = {};
12546
+ for (const pattern of patterns) {
12547
+ if (pattern.includes("*")) {
12548
+ const regex = globToRegex(pattern);
12549
+ for (const key of Object.keys(attrs)) {
12550
+ if (regex.test(key) && attrs[key].trim()) {
12551
+ result[key] = attrs[key].trim();
12552
+ }
12553
+ }
12554
+ }
12555
+ else {
12556
+ const value = attrs[pattern];
12557
+ if (value && value.trim()) {
12558
+ result[pattern] = value.trim();
12559
+ }
12560
+ }
12561
+ }
12562
+ return result;
12563
+ }
12564
+ function flatTreeToString(flatTree, includeAttributes) {
12565
+ const DEFAULT_INCLUDE_ATTRIBUTES = [
12566
+ "title",
12567
+ "type",
12568
+ "checked",
12569
+ "name",
12570
+ "role",
12571
+ "value",
12572
+ "placeholder",
12573
+ "data-date-format",
12574
+ "alt",
12575
+ "aria-label",
12576
+ "aria-expanded",
12577
+ "data-state",
12578
+ "aria-checked",
12579
+ "id",
12580
+ "for",
12581
+ "target",
12582
+ "aria-haspopup",
12583
+ "aria-controls",
12584
+ "aria-owns",
12585
+ "contenteditable",
12586
+ ];
12587
+ const includeAttrs = [
12588
+ ...(includeAttributes || []),
12589
+ ...DEFAULT_INCLUDE_ATTRIBUTES,
12590
+ ];
12591
+ const capTextLength = (text, maxLength) => {
12592
+ if (text.length > maxLength) {
12593
+ return text.substring(0, maxLength) + "...";
12594
+ }
12595
+ return text;
12596
+ };
12597
+ const buildTreeNode = (nodeId) => {
12598
+ const node = flatTree.map[nodeId];
12599
+ if (!node)
12600
+ return null;
12601
+ if (node.type === "TEXT_NODE") {
12602
+ const textNode = node;
12603
+ return {
12604
+ type: "text",
12605
+ text: textNode.text,
12606
+ isVisible: textNode.isVisible,
12607
+ parent: null,
12608
+ children: [],
12609
+ };
12610
+ }
12611
+ else {
12612
+ const elementNode = node;
12613
+ const children = [];
12614
+ if (elementNode.children) {
12615
+ for (const childId of elementNode.children) {
12616
+ const child = buildTreeNode(childId);
12617
+ if (child) {
12618
+ children.push(child);
12619
+ }
12620
+ }
12621
+ }
12622
+ return {
12623
+ type: "element",
12624
+ tagName: elementNode.tagName,
12625
+ attributes: elementNode.attributes ?? {},
12626
+ isVisible: elementNode.isVisible ?? false,
12627
+ isInteractive: elementNode.isInteractive ?? false,
12628
+ isTopElement: elementNode.isTopElement ?? false,
12629
+ isNew: elementNode.isNew ?? false,
12630
+ highlightIndex: elementNode.highlightIndex,
12631
+ parent: null,
12632
+ children,
12633
+ extra: elementNode.extra ?? {},
12634
+ };
12635
+ }
12636
+ };
12637
+ const setParentReferences = (node, parent = null) => {
12638
+ node.parent = parent;
12639
+ for (const child of node.children) {
12640
+ setParentReferences(child, node);
12641
+ }
12642
+ };
12643
+ const rootNode = buildTreeNode(flatTree.rootId);
12644
+ if (!rootNode)
12645
+ return "";
12646
+ setParentReferences(rootNode);
12647
+ const hasParentWithHighlightIndex = (node) => {
12648
+ let current = node.parent;
12649
+ while (current) {
12650
+ if (current.type === "element" &&
12651
+ current.highlightIndex !== undefined) {
12652
+ return true;
12653
+ }
12654
+ current = current.parent;
12655
+ }
12656
+ return false;
12657
+ };
12658
+ const processNode = (node, depth, result) => {
12659
+ let nextDepth = depth;
12660
+ const depthStr = "\t".repeat(depth);
12661
+ if (node.type === "element") {
12662
+ if (node.highlightIndex !== undefined) {
12663
+ nextDepth += 1;
12664
+ const text = getAllTextTillNextClickableElement(node);
12665
+ let attributesHtmlStr = "";
12666
+ if (includeAttrs.length > 0 && node.attributes) {
12667
+ const attributesToInclude = matchAttributes(node.attributes, includeAttrs);
12668
+ const keys = Object.keys(attributesToInclude);
12669
+ if (keys.length > 1) {
12670
+ const keysToRemove = new Set();
12671
+ const seenValues = {};
12672
+ for (const key of keys) {
12673
+ const value = attributesToInclude[key];
12674
+ if (value.length > 5) {
12675
+ if (value in seenValues) {
12676
+ keysToRemove.add(key);
12677
+ }
12678
+ else {
12679
+ seenValues[value] = key;
12680
+ }
12681
+ }
12682
+ }
12683
+ for (const key of keysToRemove) {
12684
+ delete attributesToInclude[key];
12685
+ }
12686
+ }
12687
+ if (attributesToInclude.role === node.tagName) {
12688
+ delete attributesToInclude.role;
12689
+ }
12690
+ const attrsToRemoveIfTextMatches = [
12691
+ "aria-label",
12692
+ "placeholder",
12693
+ "title",
12694
+ ];
12695
+ for (const attr of attrsToRemoveIfTextMatches) {
12696
+ if (attributesToInclude[attr] &&
12697
+ attributesToInclude[attr].toLowerCase().trim() ===
12698
+ text.toLowerCase().trim()) {
12699
+ delete attributesToInclude[attr];
12700
+ }
12701
+ }
12702
+ if (Object.keys(attributesToInclude).length > 0) {
12703
+ attributesHtmlStr = Object.entries(attributesToInclude)
12704
+ .map(([key, value]) => `${key}=${capTextLength(value, 20)}`)
12705
+ .join(" ");
12706
+ }
12707
+ }
12708
+ const highlightIndicator = node.isNew
12709
+ ? `*[${node.highlightIndex}]`
12710
+ : `[${node.highlightIndex}]`;
12711
+ let line = `${depthStr}${highlightIndicator}<${node.tagName ?? ""}`;
12712
+ if (attributesHtmlStr) {
12713
+ line += ` ${attributesHtmlStr}`;
12714
+ }
12715
+ if (node.extra) {
12716
+ if (node.extra.scrollable) {
12717
+ let scrollDataText = "";
12718
+ if (node.extra.scrollData?.left)
12719
+ scrollDataText += `left=${node.extra.scrollData.left}, `;
12720
+ if (node.extra.scrollData?.top)
12721
+ scrollDataText += `top=${node.extra.scrollData.top}, `;
12722
+ if (node.extra.scrollData?.right)
12723
+ scrollDataText += `right=${node.extra.scrollData.right}, `;
12724
+ if (node.extra.scrollData?.bottom)
12725
+ scrollDataText += `bottom=${node.extra.scrollData.bottom}`;
12726
+ line += ` data-scrollable="${scrollDataText}"`;
12727
+ }
12728
+ }
12729
+ if (text) {
12730
+ const trimmedText = text.trim();
12731
+ if (!attributesHtmlStr) {
12732
+ line += " ";
12733
+ }
12734
+ line += `>${trimmedText}`;
12735
+ }
12736
+ else if (!attributesHtmlStr) {
12737
+ line += " ";
12738
+ }
12739
+ line += " />";
12740
+ result.push(line);
12741
+ }
12742
+ for (const child of node.children) {
12743
+ processNode(child, nextDepth, result);
12744
+ }
12745
+ }
12746
+ else if (node.type === "text") {
12747
+ if (hasParentWithHighlightIndex(node)) {
12748
+ return;
12749
+ }
12750
+ if (node.parent &&
12751
+ node.parent.type === "element" &&
12752
+ node.parent.isVisible &&
12753
+ node.parent.isTopElement) {
12754
+ result.push(`${depthStr}${node.text ?? ""}`);
12755
+ }
12756
+ }
12757
+ };
12758
+ const result = [];
12759
+ processNode(rootNode, 0, result);
12760
+ return result.join("\n");
12761
+ }
12762
+ const getAllTextTillNextClickableElement = (node, maxDepth = -1) => {
12763
+ const textParts = [];
12764
+ const collectText = (currentNode, currentDepth) => {
12765
+ if (maxDepth !== -1 && currentDepth > maxDepth) {
12766
+ return;
12767
+ }
12768
+ if (currentNode.type === "element" &&
12769
+ currentNode !== node &&
12770
+ currentNode.highlightIndex !== undefined) {
12771
+ return;
12772
+ }
12773
+ if (currentNode.type === "text" && currentNode.text) {
12774
+ textParts.push(currentNode.text);
12775
+ }
12776
+ else if (currentNode.type === "element") {
12777
+ for (const child of currentNode.children) {
12778
+ collectText(child, currentDepth + 1);
12779
+ }
12780
+ }
12781
+ };
12782
+ collectText(node, 0);
12783
+ return textParts.join("\n").trim();
12784
+ };
12785
+ function getSelectorMap(flatTree) {
12786
+ const selectorMap = new Map();
12787
+ const keys = Object.keys(flatTree.map);
12788
+ for (const key of keys) {
12789
+ const node = flatTree.map[key];
12790
+ if (node.isInteractive && typeof node.highlightIndex === "number") {
12791
+ selectorMap.set(node.highlightIndex, node);
12792
+ }
12793
+ }
12794
+ return selectorMap;
12795
+ }
12796
+ function getElementTextMap(simplifiedHTML) {
12797
+ const lines = simplifiedHTML
12798
+ .split("\n")
12799
+ .map((line) => line.trim())
12800
+ .filter((line) => line.length > 0);
12801
+ const elementTextMap = new Map();
12802
+ for (const line of lines) {
12803
+ const regex = /^\[(\d+)\]<[^>]+>([^<]*)/;
12804
+ const match = regex.exec(line);
12805
+ if (match) {
12806
+ const index = parseInt(match[1], 10);
12807
+ elementTextMap.set(index, line);
12808
+ }
12809
+ }
12810
+ return elementTextMap;
12811
+ }
12812
+ function cleanUpHighlights() {
12813
+ const cleanupFunctions = window._highlightCleanupFunctions || [];
12814
+ for (const cleanup of cleanupFunctions) {
12815
+ if (typeof cleanup === "function") {
12816
+ cleanup();
12817
+ }
12818
+ }
12819
+ window._highlightCleanupFunctions = [];
12820
+ }
12821
+
12822
+ async function waitFor(seconds) {
12823
+ await new Promise((resolve) => setTimeout(resolve, seconds * 1000));
12824
+ }
12825
+ function getElementByIndex(selectorMap, index) {
12826
+ const interactiveNode = selectorMap.get(index);
12827
+ if (!interactiveNode) {
12828
+ throw new Error(`No interactive element found at index ${index}`);
12829
+ }
12830
+ const element = interactiveNode.ref;
12831
+ if (!element) {
12832
+ throw new Error(`Element at index ${index} does not have a reference`);
12833
+ }
12834
+ if (!(element instanceof HTMLElement)) {
12835
+ throw new Error(`Element at index ${index} is not an HTMLElement`);
12836
+ }
12837
+ return element;
12838
+ }
12839
+ let lastClickedElement = null;
12840
+ function blurLastClickedElement() {
12841
+ if (lastClickedElement) {
12842
+ lastClickedElement.blur();
12843
+ lastClickedElement.dispatchEvent(new MouseEvent("mouseout", { bubbles: true, cancelable: true }));
12844
+ lastClickedElement = null;
12845
+ }
12846
+ }
12847
+ async function scrollIntoViewIfNeeded(element) {
12848
+ // Check if element is already in viewport
12849
+ const rect = element.getBoundingClientRect();
12850
+ const inViewport = rect.top >= 0 &&
12851
+ rect.bottom <= window.innerHeight &&
12852
+ rect.left >= 0 &&
12853
+ rect.right <= window.innerWidth;
12854
+ if (!inViewport) {
12855
+ element.scrollIntoView({ behavior: "smooth", block: "center", inline: "nearest" });
12856
+ // Wait for smooth scroll animation to settle
12857
+ await waitFor(0.4);
12858
+ }
12859
+ }
12860
+ /** Move the visual AI cursor to the center of an element. */
12861
+ async function movePointerToElement(element) {
12862
+ const rect = element.getBoundingClientRect();
12863
+ const x = rect.left + rect.width / 2;
12864
+ const y = rect.top + rect.height / 2;
12865
+ window.dispatchEvent(new CustomEvent("HyphaDebugger::MovePointerTo", { detail: { x, y } }));
12866
+ await waitFor(0.3); // wait for cursor animation
12867
+ }
12868
+ async function clickElement(element) {
12869
+ blurLastClickedElement();
12870
+ lastClickedElement = element;
12871
+ await scrollIntoViewIfNeeded(element);
12872
+ await movePointerToElement(element);
12873
+ // Trigger click ripple animation
12874
+ window.dispatchEvent(new CustomEvent("HyphaDebugger::ClickPointer"));
12875
+ await waitFor(0.05);
12876
+ // hover
12877
+ element.dispatchEvent(new MouseEvent("mouseenter", { bubbles: true, cancelable: true }));
12878
+ element.dispatchEvent(new MouseEvent("mouseover", { bubbles: true, cancelable: true }));
12879
+ // mouse sequence
12880
+ element.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true }));
12881
+ element.focus();
12882
+ element.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true }));
12883
+ element.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true }));
12884
+ await waitFor(0.2);
12885
+ }
12886
+ // Lazy-initialized to avoid "window is not defined" in Node/SSR
12887
+ let _nativeInputValueSetter = null;
12888
+ let _nativeTextAreaValueSetter = null;
12889
+ function getNativeInputValueSetter() {
12890
+ if (!_nativeInputValueSetter) {
12891
+ _nativeInputValueSetter = Object.getOwnPropertyDescriptor(window.HTMLInputElement.prototype, "value").set;
12892
+ }
12893
+ return _nativeInputValueSetter;
12894
+ }
12895
+ function getNativeTextAreaValueSetter() {
12896
+ if (!_nativeTextAreaValueSetter) {
12897
+ _nativeTextAreaValueSetter = Object.getOwnPropertyDescriptor(window.HTMLTextAreaElement.prototype, "value").set;
12898
+ }
12899
+ return _nativeTextAreaValueSetter;
12900
+ }
12901
+ async function inputTextElement(element, text) {
12902
+ const isContentEditable = element.isContentEditable;
12903
+ if (!(element instanceof HTMLInputElement) &&
12904
+ !(element instanceof HTMLTextAreaElement) &&
12905
+ !isContentEditable) {
12906
+ throw new Error("Element is not an input, textarea, or contenteditable");
12907
+ }
12908
+ await clickElement(element);
12909
+ if (isContentEditable) {
12910
+ // Clear
12911
+ if (element.dispatchEvent(new InputEvent("beforeinput", {
12912
+ bubbles: true,
12913
+ cancelable: true,
12914
+ inputType: "deleteContent",
12915
+ }))) {
12916
+ element.innerText = "";
12917
+ element.dispatchEvent(new InputEvent("input", {
12918
+ bubbles: true,
12919
+ inputType: "deleteContent",
12920
+ }));
12921
+ }
12922
+ // Insert
12923
+ if (element.dispatchEvent(new InputEvent("beforeinput", {
12924
+ bubbles: true,
12925
+ cancelable: true,
12926
+ inputType: "insertText",
12927
+ data: text,
12928
+ }))) {
12929
+ element.innerText = text;
12930
+ element.dispatchEvent(new InputEvent("input", {
12931
+ bubbles: true,
12932
+ inputType: "insertText",
12933
+ data: text,
12934
+ }));
12935
+ }
12936
+ element.dispatchEvent(new Event("change", { bubbles: true }));
12937
+ element.blur();
12938
+ }
12939
+ else if (element instanceof HTMLTextAreaElement) {
12940
+ getNativeTextAreaValueSetter().call(element, text);
12941
+ }
12942
+ else {
12943
+ getNativeInputValueSetter().call(element, text);
12944
+ }
12945
+ if (!isContentEditable) {
12946
+ element.dispatchEvent(new Event("input", { bubbles: true }));
12947
+ }
12948
+ await waitFor(0.1);
12949
+ blurLastClickedElement();
12950
+ }
12951
+ async function selectOptionElement(selectElement, optionText) {
12952
+ if (!(selectElement instanceof HTMLSelectElement)) {
12953
+ throw new Error("Element is not a select element");
12954
+ }
12955
+ await scrollIntoViewIfNeeded(selectElement);
12956
+ // Move cursor to element
12957
+ const rect = selectElement.getBoundingClientRect();
12958
+ window.dispatchEvent(new CustomEvent("HyphaDebugger::MovePointerTo", {
12959
+ detail: { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 },
12960
+ }));
12961
+ await waitFor(0.3);
12962
+ window.dispatchEvent(new CustomEvent("HyphaDebugger::ClickPointer"));
12963
+ const options = Array.from(selectElement.options);
12964
+ const option = options.find((opt) => opt.textContent?.trim() === optionText.trim());
12965
+ if (!option) {
12966
+ throw new Error(`Option with text "${optionText}" not found in select element`);
12967
+ }
12968
+ selectElement.value = option.value;
12969
+ selectElement.dispatchEvent(new Event("change", { bubbles: true }));
12970
+ await waitFor(0.1);
12971
+ }
12972
+ async function scrollVertically(down, scroll_amount, element) {
12973
+ if (element) {
12974
+ let currentElement = element;
12975
+ let scrollSuccess = false;
12976
+ let scrolledElement = null;
12977
+ let scrollDelta = 0;
12978
+ let attempts = 0;
12979
+ const dy = scroll_amount;
12980
+ while (currentElement && attempts < 10) {
12981
+ const computedStyle = window.getComputedStyle(currentElement);
12982
+ const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY);
12983
+ const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight;
12984
+ if (hasScrollableY && canScrollVertically) {
12985
+ const beforeScroll = currentElement.scrollTop;
12986
+ const maxScroll = currentElement.scrollHeight - currentElement.clientHeight;
12987
+ let scrollAmount = dy / 3;
12988
+ if (scrollAmount > 0) {
12989
+ scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll);
12990
+ }
12991
+ else {
12992
+ scrollAmount = Math.max(scrollAmount, -beforeScroll);
12993
+ }
12994
+ currentElement.scrollTop = beforeScroll + scrollAmount;
12995
+ const afterScroll = currentElement.scrollTop;
12996
+ const actualScrollDelta = afterScroll - beforeScroll;
12997
+ if (Math.abs(actualScrollDelta) > 0.5) {
12998
+ scrollSuccess = true;
12999
+ scrolledElement = currentElement;
13000
+ scrollDelta = actualScrollDelta;
13001
+ break;
13002
+ }
13003
+ }
13004
+ if (currentElement === document.body ||
13005
+ currentElement === document.documentElement) {
13006
+ break;
13007
+ }
13008
+ currentElement = currentElement.parentElement;
13009
+ attempts++;
13010
+ }
13011
+ if (scrollSuccess) {
13012
+ return `Scrolled container (${scrolledElement?.tagName}) by ${scrollDelta}px`;
13013
+ }
13014
+ else {
13015
+ return `No scrollable container found for element (${element.tagName})`;
13016
+ }
13017
+ }
13018
+ // Page-level scrolling
13019
+ const dy = scroll_amount;
13020
+ const bigEnough = (el) => el.clientHeight >= window.innerHeight * 0.5;
13021
+ const canScroll = (el) => el &&
13022
+ /(auto|scroll|overlay)/.test(getComputedStyle(el).overflowY) &&
13023
+ el.scrollHeight > el.clientHeight &&
13024
+ bigEnough(el);
13025
+ let el = document.activeElement;
13026
+ while (el && !canScroll(el) && el !== document.body)
13027
+ el = el.parentElement;
13028
+ el = canScroll(el)
13029
+ ? el
13030
+ : Array.from(document.querySelectorAll("*")).find(canScroll) ||
13031
+ document.scrollingElement ||
13032
+ document.documentElement;
13033
+ if (el === document.scrollingElement ||
13034
+ el === document.documentElement ||
13035
+ el === document.body) {
13036
+ const scrollBefore = window.scrollY;
13037
+ window.scrollBy(0, dy);
13038
+ const scrollAfter = window.scrollY;
13039
+ const scrolled = scrollAfter - scrollBefore;
13040
+ if (Math.abs(scrolled) < 1) {
13041
+ return dy > 0
13042
+ ? "Already at the bottom of the page."
13043
+ : "Already at the top of the page.";
13044
+ }
13045
+ const scrollMax = document.documentElement.scrollHeight - window.innerHeight;
13046
+ const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1;
13047
+ const reachedTop = dy < 0 && scrollAfter <= 1;
13048
+ if (reachedBottom)
13049
+ return `Scrolled page by ${scrolled}px. Reached the bottom.`;
13050
+ if (reachedTop)
13051
+ return `Scrolled page by ${scrolled}px. Reached the top.`;
13052
+ return `Scrolled page by ${scrolled}px.`;
13053
+ }
13054
+ else {
13055
+ const scrollBefore = el.scrollTop;
13056
+ const scrollMax = el.scrollHeight - el.clientHeight;
13057
+ el.scrollBy({ top: dy, behavior: "smooth" });
13058
+ await waitFor(0.1);
13059
+ const scrollAfter = el.scrollTop;
13060
+ const scrolled = scrollAfter - scrollBefore;
13061
+ if (Math.abs(scrolled) < 1) {
13062
+ return dy > 0
13063
+ ? `Already at the bottom of container (${el.tagName}).`
13064
+ : `Already at the top of container (${el.tagName}).`;
13065
+ }
13066
+ const reachedBottom = dy > 0 && scrollAfter >= scrollMax - 1;
13067
+ const reachedTop = dy < 0 && scrollAfter <= 1;
13068
+ if (reachedBottom)
13069
+ return `Scrolled container (${el.tagName}) by ${scrolled}px. Reached the bottom.`;
13070
+ if (reachedTop)
13071
+ return `Scrolled container (${el.tagName}) by ${scrolled}px. Reached the top.`;
13072
+ return `Scrolled container (${el.tagName}) by ${scrolled}px.`;
13073
+ }
13074
+ }
13075
+ async function scrollHorizontally(right, scroll_amount, element) {
13076
+ if (element) {
13077
+ let currentElement = element;
13078
+ let scrollSuccess = false;
13079
+ let scrolledElement = null;
13080
+ let scrollDelta = 0;
13081
+ let attempts = 0;
13082
+ const dx = right ? scroll_amount : -scroll_amount;
13083
+ while (currentElement && attempts < 10) {
13084
+ const computedStyle = window.getComputedStyle(currentElement);
13085
+ const hasScrollableX = /(auto|scroll|overlay)/.test(computedStyle.overflowX);
13086
+ const canScrollHorizontally = currentElement.scrollWidth > currentElement.clientWidth;
13087
+ if (hasScrollableX && canScrollHorizontally) {
13088
+ const beforeScroll = currentElement.scrollLeft;
13089
+ const maxScroll = currentElement.scrollWidth - currentElement.clientWidth;
13090
+ let scrollAmount = dx / 3;
13091
+ if (scrollAmount > 0) {
13092
+ scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll);
13093
+ }
13094
+ else {
13095
+ scrollAmount = Math.max(scrollAmount, -beforeScroll);
13096
+ }
13097
+ currentElement.scrollLeft = beforeScroll + scrollAmount;
13098
+ const afterScroll = currentElement.scrollLeft;
13099
+ const actualScrollDelta = afterScroll - beforeScroll;
13100
+ if (Math.abs(actualScrollDelta) > 0.5) {
13101
+ scrollSuccess = true;
13102
+ scrolledElement = currentElement;
13103
+ scrollDelta = actualScrollDelta;
13104
+ break;
13105
+ }
13106
+ }
13107
+ if (currentElement === document.body ||
13108
+ currentElement === document.documentElement) {
13109
+ break;
13110
+ }
13111
+ currentElement = currentElement.parentElement;
13112
+ attempts++;
13113
+ }
13114
+ if (scrollSuccess) {
13115
+ return `Scrolled container (${scrolledElement?.tagName}) horizontally by ${scrollDelta}px`;
13116
+ }
13117
+ else {
13118
+ return `No horizontally scrollable container found for element (${element.tagName})`;
13119
+ }
13120
+ }
13121
+ // Page-level horizontal scroll
13122
+ const dx = right ? scroll_amount : -scroll_amount;
13123
+ const bigEnough = (el) => el.clientWidth >= window.innerWidth * 0.5;
13124
+ const canScroll = (el) => el &&
13125
+ /(auto|scroll|overlay)/.test(getComputedStyle(el).overflowX) &&
13126
+ el.scrollWidth > el.clientWidth &&
13127
+ bigEnough(el);
13128
+ let el = document.activeElement;
13129
+ while (el && !canScroll(el) && el !== document.body)
13130
+ el = el.parentElement;
13131
+ el = canScroll(el)
13132
+ ? el
13133
+ : Array.from(document.querySelectorAll("*")).find(canScroll) ||
13134
+ document.scrollingElement ||
13135
+ document.documentElement;
13136
+ if (el === document.scrollingElement ||
13137
+ el === document.documentElement ||
13138
+ el === document.body) {
13139
+ const scrollBefore = window.scrollX;
13140
+ const scrollMax = document.documentElement.scrollWidth - window.innerWidth;
13141
+ window.scrollBy(dx, 0);
13142
+ const scrollAfter = window.scrollX;
13143
+ const scrolled = scrollAfter - scrollBefore;
13144
+ if (Math.abs(scrolled) < 1) {
13145
+ return dx > 0
13146
+ ? "Already at the right edge of the page."
13147
+ : "Already at the left edge of the page.";
13148
+ }
13149
+ const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1;
13150
+ const reachedLeft = dx < 0 && scrollAfter <= 1;
13151
+ if (reachedRight)
13152
+ return `Scrolled page by ${scrolled}px. Reached the right edge.`;
13153
+ if (reachedLeft)
13154
+ return `Scrolled page by ${scrolled}px. Reached the left edge.`;
13155
+ return `Scrolled page horizontally by ${scrolled}px.`;
13156
+ }
13157
+ else {
13158
+ const scrollBefore = el.scrollLeft;
13159
+ const scrollMax = el.scrollWidth - el.clientWidth;
13160
+ el.scrollBy({ left: dx, behavior: "smooth" });
13161
+ await waitFor(0.1);
13162
+ const scrollAfter = el.scrollLeft;
13163
+ const scrolled = scrollAfter - scrollBefore;
13164
+ if (Math.abs(scrolled) < 1) {
13165
+ return dx > 0
13166
+ ? `Already at the right edge of container (${el.tagName}).`
13167
+ : `Already at the left edge of container (${el.tagName}).`;
13168
+ }
13169
+ const reachedRight = dx > 0 && scrollAfter >= scrollMax - 1;
13170
+ const reachedLeft = dx < 0 && scrollAfter <= 1;
13171
+ if (reachedRight)
13172
+ return `Scrolled container (${el.tagName}) by ${scrolled}px. Reached the right edge.`;
13173
+ if (reachedLeft)
13174
+ return `Scrolled container (${el.tagName}) by ${scrolled}px. Reached the left edge.`;
13175
+ return `Scrolled container (${el.tagName}) horizontally by ${scrolled}px.`;
13176
+ }
13177
+ }
13178
+
13179
+ /**
13180
+ * Page info utilities: viewport, scroll position, page dimensions.
13181
+ * Adapted from @page-agent/page-controller (MIT License).
13182
+ */
13183
+ function getPageScrollInfo() {
13184
+ const viewport_width = window.innerWidth;
13185
+ const viewport_height = window.innerHeight;
13186
+ const page_width = Math.max(document.documentElement.scrollWidth, document.body.scrollWidth || 0);
13187
+ const page_height = Math.max(document.documentElement.scrollHeight, document.body.scrollHeight || 0);
13188
+ const scroll_x = window.scrollX ||
13189
+ window.pageXOffset ||
13190
+ document.documentElement.scrollLeft ||
13191
+ 0;
13192
+ const scroll_y = window.scrollY ||
13193
+ window.pageYOffset ||
13194
+ document.documentElement.scrollTop ||
13195
+ 0;
13196
+ const pixels_below = Math.max(0, page_height - (window.innerHeight + scroll_y));
13197
+ const pixels_right = Math.max(0, page_width - (window.innerWidth + scroll_x));
13198
+ return {
13199
+ viewport_width,
13200
+ viewport_height,
13201
+ page_width,
13202
+ page_height,
13203
+ scroll_x,
13204
+ scroll_y,
13205
+ pixels_above: scroll_y,
13206
+ pixels_below,
13207
+ pages_above: viewport_height > 0 ? scroll_y / viewport_height : 0,
13208
+ pages_below: viewport_height > 0 ? pixels_below / viewport_height : 0,
13209
+ total_pages: viewport_height > 0 ? page_height / viewport_height : 0,
13210
+ current_page_position: scroll_y / Math.max(1, page_height - viewport_height),
13211
+ pixels_left: scroll_x,
13212
+ pixels_right,
13213
+ };
13214
+ }
13215
+
13216
+ /**
13217
+ * PageController: manages DOM state and element interactions.
13218
+ * Adapted from @page-agent/page-controller (MIT License).
13219
+ *
13220
+ * This wraps the smart DOM analysis (interactive element detection,
13221
+ * indexed element map) and provides an API for external agents.
13222
+ */
13223
+ class PageController {
13224
+ constructor(config = {}) {
13225
+ this.flatTree = null;
13226
+ this.selectorMap = new Map();
13227
+ this.elementTextMap = new Map();
13228
+ this.simplifiedHTML = "";
13229
+ this.isIndexed = false;
13230
+ this.config = config;
13231
+ }
13232
+ /**
13233
+ * Get structured browser state for LLM consumption.
13234
+ * Builds the DOM tree, highlights interactive elements, and returns
13235
+ * a simplified text representation with numeric indices.
13236
+ */
13237
+ async getBrowserState() {
13238
+ const url = window.location.href;
13239
+ const title = document.title;
13240
+ const pi = getPageScrollInfo();
13241
+ const viewportExpansion = resolveViewportExpansion(this.config.viewportExpansion);
13242
+ await this.updateTree();
13243
+ const content = this.simplifiedHTML;
13244
+ const titleLine = `Current Page: [${title}](${url})`;
13245
+ const pageInfoLine = `Page info: ${pi.viewport_width}x${pi.viewport_height}px viewport, ${pi.page_width}x${pi.page_height}px total, ${pi.pages_above.toFixed(1)} pages above, ${pi.pages_below.toFixed(1)} pages below, at ${(pi.current_page_position * 100).toFixed(0)}%`;
13246
+ const elementsLabel = viewportExpansion === -1
13247
+ ? "Interactive elements (full page):"
13248
+ : "Interactive elements (viewport):";
13249
+ const hasContentAbove = pi.pixels_above > 4;
13250
+ const scrollHintAbove = hasContentAbove && viewportExpansion !== -1
13251
+ ? `... ${pi.pixels_above} pixels above - scroll to see more ...`
13252
+ : "[Start of page]";
13253
+ const header = `${titleLine}\n${pageInfoLine}\n\n${elementsLabel}\n\n${scrollHintAbove}`;
13254
+ const hasContentBelow = pi.pixels_below > 4;
13255
+ const footer = hasContentBelow && viewportExpansion !== -1
13256
+ ? `... ${pi.pixels_below} pixels below - scroll to see more ...`
13257
+ : "[End of page]";
13258
+ return {
13259
+ url,
13260
+ title,
13261
+ header,
13262
+ content,
13263
+ footer,
13264
+ element_count: this.selectorMap.size,
13265
+ };
13266
+ }
13267
+ /**
13268
+ * Update DOM tree, returns simplified HTML for LLM.
13269
+ */
13270
+ async updateTree() {
13271
+ cleanUpHighlights();
13272
+ this.flatTree = getFlatTree(this.config);
13273
+ this.simplifiedHTML = flatTreeToString(this.flatTree, this.config.includeAttributes);
13274
+ this.selectorMap.clear();
13275
+ this.selectorMap = getSelectorMap(this.flatTree);
13276
+ this.elementTextMap.clear();
13277
+ this.elementTextMap = getElementTextMap(this.simplifiedHTML);
13278
+ this.isIndexed = true;
13279
+ return this.simplifiedHTML;
13280
+ }
13281
+ async cleanUpHighlights() {
13282
+ cleanUpHighlights();
13283
+ }
13284
+ assertIndexed() {
13285
+ if (!this.isIndexed) {
13286
+ throw new Error("DOM tree not indexed yet. Call get_browser_state first.");
13287
+ }
13288
+ }
13289
+ /** Clean up highlights after performing an action. */
13290
+ cleanUpAfterAction() {
13291
+ cleanUpHighlights();
13292
+ }
13293
+ async clickElement(index) {
13294
+ try {
13295
+ this.assertIndexed();
13296
+ const element = getElementByIndex(this.selectorMap, index);
13297
+ const elemText = this.elementTextMap.get(index);
13298
+ this.cleanUpAfterAction();
13299
+ await clickElement(element);
13300
+ if (element instanceof HTMLAnchorElement &&
13301
+ element.target === "_blank") {
13302
+ return {
13303
+ success: true,
13304
+ message: `Clicked element (${elemText ?? index}). Link opened in a new tab.`,
13305
+ };
13306
+ }
13307
+ return {
13308
+ success: true,
13309
+ message: `Clicked element (${elemText ?? index}).`,
13310
+ };
13311
+ }
13312
+ catch (error) {
13313
+ return {
13314
+ success: false,
13315
+ message: `Failed to click element: ${error}`,
13316
+ };
13317
+ }
13318
+ }
13319
+ async inputText(index, text) {
13320
+ try {
13321
+ this.assertIndexed();
13322
+ const element = getElementByIndex(this.selectorMap, index);
13323
+ const elemText = this.elementTextMap.get(index);
13324
+ this.cleanUpAfterAction();
13325
+ await inputTextElement(element, text);
13326
+ return {
13327
+ success: true,
13328
+ message: `Input text "${text}" into element (${elemText ?? index}).`,
13329
+ };
13330
+ }
13331
+ catch (error) {
13332
+ return {
13333
+ success: false,
13334
+ message: `Failed to input text: ${error}`,
13335
+ };
13336
+ }
13337
+ }
13338
+ async selectOption(index, optionText) {
13339
+ try {
13340
+ this.assertIndexed();
13341
+ const element = getElementByIndex(this.selectorMap, index);
13342
+ const elemText = this.elementTextMap.get(index);
13343
+ this.cleanUpAfterAction();
13344
+ await selectOptionElement(element, optionText);
13345
+ return {
13346
+ success: true,
13347
+ message: `Selected option "${optionText}" in element (${elemText ?? index}).`,
13348
+ };
13349
+ }
13350
+ catch (error) {
13351
+ return {
13352
+ success: false,
13353
+ message: `Failed to select option: ${error}`,
13354
+ };
13355
+ }
13356
+ }
13357
+ async scroll(options) {
13358
+ try {
13359
+ this.assertIndexed();
13360
+ this.cleanUpAfterAction();
13361
+ const { direction, amount, index } = options;
13362
+ const element = index !== undefined
13363
+ ? getElementByIndex(this.selectorMap, index)
13364
+ : null;
13365
+ let message;
13366
+ if (direction === "left" || direction === "right") {
13367
+ const pixels = amount ?? window.innerWidth * 0.8;
13368
+ message = await scrollHorizontally(direction === "right", pixels, element);
13369
+ }
13370
+ else {
13371
+ const pixels = amount ?? window.innerHeight * 0.8;
13372
+ const scrollAmount = direction === "down" ? pixels : -pixels;
13373
+ message = await scrollVertically(direction === "down", scrollAmount, element);
13374
+ }
13375
+ return { success: true, message };
13376
+ }
13377
+ catch (error) {
13378
+ return {
13379
+ success: false,
13380
+ message: `Failed to scroll: ${error}`,
13381
+ };
13382
+ }
13383
+ }
13384
+ dispose() {
13385
+ cleanUpHighlights();
13386
+ this.flatTree = null;
13387
+ this.selectorMap.clear();
13388
+ this.elementTextMap.clear();
13389
+ this.simplifiedHTML = "";
13390
+ this.isIndexed = false;
13391
+ }
13392
+ }
13393
+
13394
+ /**
13395
+ * Hypha RPC service wrappers for the PageController.
13396
+ *
13397
+ * These functions are schema-annotated for AI agent / LLM tool calling.
13398
+ * They provide smart DOM analysis with indexed interactive elements,
13399
+ * enabling agents to interact with pages by element index instead of
13400
+ * fragile CSS selectors.
13401
+ */
13402
+ // Singleton — shared across all service calls
13403
+ let controller = null;
13404
+ function getController() {
13405
+ if (!controller) {
13406
+ controller = new PageController({
13407
+ viewportExpansion: -1, // full page by default
13408
+ highlightOpacity: 0.1, // 10% fill on element boxes
13409
+ highlightLabelOpacity: 0.5, // 50% opacity on number labels + borders
13410
+ });
13411
+ }
13412
+ return controller;
13413
+ }
13414
+ /**
13415
+ * Get the current browser state: page info, scroll position, and a
13416
+ * simplified HTML representation with all interactive elements indexed
13417
+ * as [0], [1], [2], etc. Use the indices to call click_element_by_index,
13418
+ * input_text, select_option, or scroll.
13419
+ */
13420
+ async function getBrowserState(viewport_only) {
13421
+ const ctrl = getController();
13422
+ if (viewport_only !== undefined) {
13423
+ ctrl.config.viewportExpansion = viewport_only ? 0 : -1;
13424
+ }
13425
+ return ctrl.getBrowserState();
13426
+ }
13427
+ getBrowserState.__schema__ = {
13428
+ name: "getBrowserState",
13429
+ description: "Get the current page state with all interactive elements indexed as [0], [1], [2], etc. " +
13430
+ "Returns a simplified HTML representation optimized for LLM consumption. " +
13431
+ "Interactive elements (buttons, links, inputs, scrollable areas) are detected via smart heuristics " +
13432
+ "(CSS cursor, ARIA roles, event listeners, tag names). " +
13433
+ "Use the returned indices with click_element_by_index, input_text, select_option, or scroll. " +
13434
+ "Call this first to understand the page before performing any actions.",
13435
+ parameters: {
13436
+ type: "object",
13437
+ properties: {
13438
+ viewport_only: {
13439
+ type: "boolean",
13440
+ description: "If true, only return elements visible in the current viewport. Default: false (full page).",
13441
+ },
13442
+ },
13443
+ },
13444
+ };
13445
+ /**
13446
+ * Click an interactive element by its index from get_browser_state.
13447
+ */
13448
+ async function clickElementByIndex(index) {
13449
+ return getController().clickElement(index);
13450
+ }
13451
+ clickElementByIndex.__schema__ = {
13452
+ name: "clickElementByIndex",
13453
+ description: "Click an interactive element by its numeric index from get_browser_state output. " +
13454
+ "Simulates a full mouse event sequence (hover, mousedown, focus, mouseup, click) " +
13455
+ "to trigger all event listeners including React/Vue handlers.",
13456
+ parameters: {
13457
+ type: "object",
13458
+ properties: {
13459
+ index: {
13460
+ type: "number",
13461
+ description: "The element index from get_browser_state (e.g. 0 for [0], 5 for [5]).",
13462
+ },
13463
+ },
13464
+ required: ["index"],
13465
+ },
13466
+ };
13467
+ /**
13468
+ * Type text into an input, textarea, or contenteditable element by index.
13469
+ */
13470
+ async function inputText(index, text) {
13471
+ return getController().inputText(index, text);
13472
+ }
13473
+ inputText.__schema__ = {
13474
+ name: "inputText",
13475
+ description: "Type text into an input, textarea, or contenteditable element by its index. " +
13476
+ "Replaces existing content. Works with React controlled components, " +
13477
+ "contenteditable editors (LinkedIn, Quill), and native inputs.",
13478
+ parameters: {
13479
+ type: "object",
13480
+ properties: {
13481
+ index: {
13482
+ type: "number",
13483
+ description: "The element index from get_browser_state.",
13484
+ },
13485
+ text: {
13486
+ type: "string",
13487
+ description: "The text to type into the element.",
13488
+ },
13489
+ },
13490
+ required: ["index", "text"],
13491
+ },
13492
+ };
13493
+ /**
13494
+ * Select a dropdown option by element index and option text.
13495
+ */
13496
+ async function selectOption(index, option_text) {
13497
+ return getController().selectOption(index, option_text);
13498
+ }
13499
+ selectOption.__schema__ = {
13500
+ name: "selectOption",
13501
+ description: "Select a dropdown option in a <select> element by its index and the visible option text.",
13502
+ parameters: {
13503
+ type: "object",
13504
+ properties: {
13505
+ index: {
13506
+ type: "number",
13507
+ description: "The <select> element index from get_browser_state.",
13508
+ },
13509
+ option_text: {
13510
+ type: "string",
13511
+ description: "The visible text of the option to select (case-sensitive, trimmed).",
13512
+ },
13513
+ },
13514
+ required: ["index", "option_text"],
13515
+ },
13516
+ };
13517
+ /**
13518
+ * Scroll the page or a specific scrollable container.
13519
+ */
13520
+ async function scroll(direction, amount, index) {
13521
+ return getController().scroll({ direction, amount, index });
13522
+ }
13523
+ scroll.__schema__ = {
13524
+ name: "scroll",
13525
+ description: "Scroll the page or a specific scrollable container. " +
13526
+ "If index is provided, scrolls the nearest scrollable ancestor of that element. " +
13527
+ "Otherwise scrolls the page or the largest scrollable container.",
13528
+ parameters: {
13529
+ type: "object",
13530
+ properties: {
13531
+ direction: {
13532
+ type: "string",
13533
+ enum: ["up", "down", "left", "right"],
13534
+ description: "Scroll direction.",
13535
+ },
13536
+ amount: {
13537
+ type: "number",
13538
+ description: "Scroll amount in pixels. Default: ~80% of viewport height (vertical) or width (horizontal).",
13539
+ },
13540
+ index: {
13541
+ type: "number",
13542
+ description: "Optional element index. If provided, scrolls the nearest scrollable ancestor of this element.",
13543
+ },
13544
+ },
13545
+ required: ["direction"],
13546
+ },
13547
+ };
13548
+ /**
13549
+ * Remove all visual element highlights/labels from the page.
13550
+ */
13551
+ async function removeHighlights() {
13552
+ getController().cleanUpHighlights();
13553
+ return { success: true, message: "Highlights removed." };
13554
+ }
13555
+ removeHighlights.__schema__ = {
13556
+ name: "removeHighlights",
13557
+ description: "Remove all visual element index labels/highlights from the page. " +
13558
+ "Useful after taking a screenshot if you want a clean view.",
13559
+ parameters: {
13560
+ type: "object",
13561
+ properties: {},
13562
+ },
13563
+ };
13564
+ /**
13565
+ * Dispose the page controller (for cleanup).
13566
+ */
13567
+ function disposeController() {
13568
+ if (controller) {
13569
+ controller.dispose();
13570
+ controller = null;
13571
+ }
13572
+ }
13573
+
10607
13574
  /**
10608
13575
  * Core debugger class: connects to Hypha and registers the debug service.
10609
13576
  */
13577
+ /** Generate a cryptographically random hex string of `bytes` bytes. */
13578
+ function randomHex(bytes = 8) {
13579
+ const arr = new Uint8Array(bytes);
13580
+ crypto.getRandomValues(arr);
13581
+ return Array.from(arr, (b) => b.toString(16).padStart(2, "0")).join("");
13582
+ }
10610
13583
  class HyphaDebugger {
10611
13584
  constructor(config) {
10612
13585
  this.overlay = null;
13586
+ this.cursor = null;
10613
13587
  this.server = null;
10614
13588
  this.serviceInfo = null;
13589
+ const requireToken = config.require_token ?? false;
13590
+ // Always append random suffix unless user provided a custom id.
13591
+ let serviceId = config.service_id ?? "web-debugger";
13592
+ if (!config.service_id) {
13593
+ serviceId = `web-debugger-${randomHex(16)}`;
13594
+ }
13595
+ // Derive visibility: require_token mode → protected, no-token → unlisted.
13596
+ // An explicit config.visibility always takes precedence.
13597
+ const visibility = config.visibility ?? (requireToken ? "protected" : "unlisted");
10615
13598
  this.config = {
10616
13599
  server_url: config.server_url,
10617
13600
  workspace: config.workspace ?? "",
10618
13601
  token: config.token ?? "",
10619
- service_id: config.service_id ?? "web-debugger",
13602
+ service_id: serviceId,
10620
13603
  service_name: config.service_name ?? "Web Debugger",
10621
13604
  show_ui: config.show_ui ?? true,
10622
- visibility: config.visibility ?? "public",
13605
+ visibility,
13606
+ require_token: requireToken,
10623
13607
  };
10624
13608
  }
10625
13609
  async start() {
@@ -10636,6 +13620,8 @@
10636
13620
  this.overlay = new DebugOverlay();
10637
13621
  this.overlay.setStatus("disconnected");
10638
13622
  this.overlay.setInfo({ Status: "Connecting..." });
13623
+ // Initialize animated AI cursor
13624
+ this.cursor = new AICursor();
10639
13625
  }
10640
13626
  try {
10641
13627
  // Get the connectToServer function
@@ -10682,6 +13668,9 @@
10682
13668
  catch {
10683
13669
  // Ignore unregister errors on cleanup
10684
13670
  }
13671
+ disposeController();
13672
+ this.cursor?.destroy();
13673
+ this.cursor = null;
10685
13674
  this.overlay?.destroy();
10686
13675
  this.overlay = null;
10687
13676
  const w = window;
@@ -10696,9 +13685,12 @@
10696
13685
  */
10697
13686
  async updateSession(extra) {
10698
13687
  const fullServiceId = this.serviceInfo?.id ?? this.config.service_id;
10699
- const sessionToken = await this.server.generateToken();
10700
13688
  const serviceUrl = this.buildServiceUrl(fullServiceId);
10701
13689
  const workspace = this.server.config?.workspace ?? "";
13690
+ // In no-token mode the URL itself is the secret — skip token generation.
13691
+ const sessionToken = this.config.require_token
13692
+ ? await this.server.generateToken({ expires_in: 86400 })
13693
+ : "";
10702
13694
  if (this.overlay) {
10703
13695
  this.overlay.setStatus("connected");
10704
13696
  this.overlay.setInfo({
@@ -10709,8 +13701,13 @@
10709
13701
  this.overlay.setInstructions(this.buildInstructionBlock(serviceUrl, sessionToken));
10710
13702
  }
10711
13703
  console.log(`[hypha-debugger] Service URL: ${serviceUrl}`);
10712
- console.log(`[hypha-debugger] Token: ${sessionToken}`);
10713
- console.log(`[hypha-debugger] Test:\n curl '${serviceUrl}/get_page_info?_mode=last' -H 'Authorization: Bearer ${sessionToken}'`);
13704
+ if (sessionToken) {
13705
+ console.log(`[hypha-debugger] Token: ${sessionToken}`);
13706
+ console.log(`[hypha-debugger] Test:\n curl '${serviceUrl}/get_page_info' -H 'Authorization: Bearer ${sessionToken}'`);
13707
+ }
13708
+ else {
13709
+ console.log(`[hypha-debugger] Test:\n curl '${serviceUrl}/get_page_info'`);
13710
+ }
10714
13711
  const session = {
10715
13712
  service_id: fullServiceId,
10716
13713
  workspace,
@@ -10772,13 +13769,20 @@
10772
13769
  get_page_info: this.wrapFn(getPageInfo, "get_page_info"),
10773
13770
  get_html: this.wrapFn(getHtml, "get_html"),
10774
13771
  query_dom: this.wrapFn(queryDom, "query_dom"),
10775
- click_element: this.wrapFn(clickElement, "click_element"),
13772
+ click_element: this.wrapFn(clickElement$1, "click_element"),
10776
13773
  fill_input: this.wrapFn(fillInput, "fill_input"),
10777
13774
  scroll_to: this.wrapFn(scrollTo, "scroll_to"),
10778
13775
  take_screenshot: this.wrapFn(takeScreenshot, "take_screenshot"),
10779
13776
  execute_script: this.wrapFn(executeScript, "execute_script"),
10780
13777
  navigate: this.wrapFn(navigate, "navigate"),
10781
13778
  get_react_tree: this.wrapFn(getReactTree, "get_react_tree"),
13779
+ // Smart DOM analysis + index-based interaction (from page-controller)
13780
+ get_browser_state: this.wrapFn(getBrowserState, "get_browser_state"),
13781
+ click_element_by_index: this.wrapFn(clickElementByIndex, "click_element_by_index"),
13782
+ input_text: this.wrapFn(inputText, "input_text"),
13783
+ select_option: this.wrapFn(selectOption, "select_option"),
13784
+ scroll: this.wrapFn(scroll, "scroll"),
13785
+ remove_highlights: this.wrapFn(removeHighlights, "remove_highlights"),
10782
13786
  get_skill_md: this.wrapFn(this.createGetSkillMd(), "get_skill_md"),
10783
13787
  };
10784
13788
  }
@@ -10788,10 +13792,15 @@
10788
13792
  const schemaFns = {};
10789
13793
  const fns = {
10790
13794
  get_page_info: getPageInfo, get_html: getHtml,
10791
- query_dom: queryDom, click_element: clickElement, fill_input: fillInput,
13795
+ query_dom: queryDom, click_element: clickElement$1, fill_input: fillInput,
10792
13796
  scroll_to: scrollTo, take_screenshot: takeScreenshot,
10793
13797
  execute_script: executeScript, navigate: navigate,
10794
13798
  get_react_tree: getReactTree,
13799
+ // Smart DOM analysis + index-based interaction
13800
+ get_browser_state: getBrowserState,
13801
+ click_element_by_index: clickElementByIndex,
13802
+ input_text: inputText, select_option: selectOption,
13803
+ scroll: scroll, remove_highlights: removeHighlights,
10795
13804
  };
10796
13805
  for (const [name, f] of Object.entries(fns)) {
10797
13806
  if (f.__schema__)
@@ -10814,61 +13823,76 @@
10814
13823
  }
10815
13824
  /** Build the instruction block for the overlay panel. */
10816
13825
  buildInstructionBlock(serviceUrl, token) {
10817
- return [
10818
- `SERVICE_URL="${serviceUrl}"`,
10819
- `TOKEN="${token}"`,
13826
+ const auth = token ? ` -H "Authorization: Bearer $TOKEN"` : "";
13827
+ const lines = [
13828
+ `# Hypha Remote Debugger — Web Page`,
13829
+ `# A debugger is attached to a live web page.`,
13830
+ `# You can remotely inspect, interact with, and control this page via the HTTP API below.`,
13831
+ `#`,
13832
+ `# RECOMMENDED WORKFLOW (index-based, most reliable):`,
13833
+ `# 1. get_browser_state → see all interactive elements as [0], [1], [2], ...`,
13834
+ `# 2. click_element_by_index / input_text / select_option / scroll → act by index`,
13835
+ `# 3. take_screenshot → verify the result visually`,
13836
+ `#`,
13837
+ `# Also available: get_page_info, get_html, query_dom, click_element, fill_input,`,
13838
+ `# scroll_to, execute_script, navigate, get_react_tree, remove_highlights`,
13839
+ `#`,
13840
+ `# POST endpoints accept JSON body with parameter names as keys.`,
10820
13841
  ``,
10821
- `# Quick test:`,
10822
- `curl "$SERVICE_URL/get_page_info?_mode=last" -H "Authorization: Bearer $TOKEN"`,
10823
- ``,
10824
- `# Full API docs:`,
10825
- `curl "$SERVICE_URL/get_skill_md?_mode=last" -H "Authorization: Bearer $TOKEN"`,
10826
- ].join("\n");
13842
+ `SERVICE_URL="${serviceUrl}"`,
13843
+ ];
13844
+ if (token) {
13845
+ lines.push(`TOKEN="${token}"`);
13846
+ }
13847
+ lines.push(``, `# 1. Get interactive elements (smart DOM analysis with indexed elements):`, `curl "$SERVICE_URL/get_browser_state"${auth}`, ``, `# 2. Click element by index (e.g. click [3]):`, `curl -X POST "$SERVICE_URL/click_element_by_index"${auth} -H "Content-Type: application/json" -d '{"index": 3}'`, ``, `# 3. Type into an input by index:`, `curl -X POST "$SERVICE_URL/input_text"${auth} -H "Content-Type: application/json" -d '{"index": 5, "text": "hello"}'`, ``, `# Take a screenshot:`, `curl "$SERVICE_URL/take_screenshot"${auth}`, ``, `# Execute JavaScript remotely:`, `curl -X POST "$SERVICE_URL/execute_script"${auth} -H "Content-Type: application/json" -d '{"code": "document.title"}'`, ``, `# Full API docs:`, `curl "$SERVICE_URL/get_skill_md"${auth}`);
13848
+ return lines.join("\n");
10827
13849
  }
10828
- /** Wrap a service function with logging and kwargs-to-positional-args support. */
13850
+ /**
13851
+ * Wrap a service function with logging and correct parameter names.
13852
+ *
13853
+ * Uses new Function() to create a wrapper whose parameter names match
13854
+ * the __schema__ property names. This is critical for production builds
13855
+ * where Babel/Terser minifies parameter names — hypha-rpc's
13856
+ * getParamNames() parses Function.toString() to map kwargs to positional
13857
+ * args, so the wrapper must have the real (unminified) parameter names.
13858
+ */
10829
13859
  wrapFn(fn, name) {
10830
- const wrapped = async (...args) => {
10831
- // Hypha's HTTP API calls with keyword arguments (**kwargs),
10832
- // which arrive on the JS side as a single object argument.
10833
- // Destructure into positional args based on schema properties.
10834
- if (args.length === 1 &&
10835
- args[0] &&
10836
- typeof args[0] === "object" &&
10837
- !Array.isArray(args[0]) &&
10838
- fn.__schema__?.parameters?.properties) {
10839
- const kwargs = args[0];
10840
- const props = fn.__schema__.parameters.properties;
10841
- const paramNames = Object.keys(props);
10842
- // Check if any kwargs key matches a schema property name
10843
- const hasMatchingKey = paramNames.some((p) => p in kwargs);
10844
- if (hasMatchingKey) {
10845
- args = paramNames.map((p) => kwargs[p]);
10846
- while (args.length > 0 && args[args.length - 1] === undefined) {
10847
- args.pop();
10848
- }
10849
- }
10850
- }
10851
- this.overlay?.addLog(`${name}(${this.summarizeArgs(args)})`, "call");
13860
+ const schema = fn.__schema__;
13861
+ const paramNames = schema?.parameters?.properties
13862
+ ? Object.keys(schema.parameters.properties)
13863
+ : [];
13864
+ const self = this;
13865
+ const callAndLog = async (args) => {
13866
+ self.overlay?.addLog(`${name}(${self.summarizeArgs(args)})`, "call");
10852
13867
  try {
10853
13868
  const result = await fn(...args);
10854
13869
  const hasError = result && typeof result === "object" && "error" in result;
10855
13870
  if (hasError) {
10856
- this.overlay?.addLog(`${name}: ${result.error}`, "error");
13871
+ self.overlay?.addLog(`${name}: ${result.error}`, "error");
10857
13872
  }
10858
13873
  else {
10859
- this.overlay?.addLog(`${name} -> OK`, "result");
13874
+ self.overlay?.addLog(`${name} -> OK`, "result");
10860
13875
  }
10861
13876
  return result;
10862
13877
  }
10863
13878
  catch (err) {
10864
- this.overlay?.addLog(`${name}: ${err.message}`, "error");
13879
+ self.overlay?.addLog(`${name}: ${err.message}`, "error");
10865
13880
  throw err;
10866
13881
  }
10867
13882
  };
10868
- if (fn.__schema__) {
10869
- wrapped.__schema__ = fn.__schema__;
13883
+ let wrapper;
13884
+ if (paramNames.length === 0) {
13885
+ wrapper = async (...args) => callAndLog(args);
13886
+ }
13887
+ else {
13888
+ // Create a function with explicit, unminified parameter names so
13889
+ // hypha-rpc can parse them from Function.toString().
13890
+ const paramList = paramNames.join(", ");
13891
+ wrapper = new Function("callAndLog", `return async function(${paramList}) { return callAndLog([${paramList}]); }`)(callAndLog);
10870
13892
  }
10871
- return wrapped;
13893
+ if (schema)
13894
+ wrapper.__schema__ = schema;
13895
+ return wrapper;
10872
13896
  }
10873
13897
  summarizeArgs(args) {
10874
13898
  if (args.length === 0)
@@ -10911,7 +13935,8 @@
10911
13935
  /**
10912
13936
  * Auto-start: when loaded via <script> tag, automatically start the debugger.
10913
13937
  * Configuration can be provided via data-* attributes on the script tag:
10914
- * data-server-url, data-workspace, data-token, data-service-id, data-no-ui
13938
+ * data-server-url, data-workspace, data-token, data-service-id, data-no-ui,
13939
+ * data-require-token
10915
13940
  *
10916
13941
  * Set data-manual to disable auto-start.
10917
13942
  */
@@ -10949,6 +13974,9 @@
10949
13974
  if (scriptEl?.hasAttribute("data-no-ui")) {
10950
13975
  config.show_ui = false;
10951
13976
  }
13977
+ if (scriptEl?.hasAttribute("data-require-token")) {
13978
+ config.require_token = true;
13979
+ }
10952
13980
  startDebugger(config).catch((err) => {
10953
13981
  console.error("[hypha-debugger] Auto-start failed:", err);
10954
13982
  });