mcp-web-inspector 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +5 -5
  2. package/dist/index.js +63 -9
  3. package/dist/toolHandler.d.ts +2 -0
  4. package/dist/toolHandler.js +31 -4
  5. package/dist/tools/browser/base.d.ts +41 -2
  6. package/dist/tools/browser/base.js +221 -16
  7. package/dist/tools/browser/common/postAction.d.ts +12 -0
  8. package/dist/tools/browser/common/postAction.js +158 -0
  9. package/dist/tools/browser/content/get_html.js +19 -6
  10. package/dist/tools/browser/content/get_text.js +24 -8
  11. package/dist/tools/browser/inspection/check_visibility.js +2 -3
  12. package/dist/tools/browser/inspection/compare_element_alignment.js +6 -8
  13. package/dist/tools/browser/inspection/element_exists.js +1 -2
  14. package/dist/tools/browser/inspection/get_computed_styles.js +2 -3
  15. package/dist/tools/browser/inspection/inspect_ancestors.js +4 -5
  16. package/dist/tools/browser/inspection/inspect_dom.js +20 -4
  17. package/dist/tools/browser/inspection/measure_element.js +2 -3
  18. package/dist/tools/browser/inspection/query_selector.js +3 -3
  19. package/dist/tools/browser/interaction/click.js +178 -6
  20. package/dist/tools/browser/interaction/drag.js +2 -4
  21. package/dist/tools/browser/interaction/fill.js +2 -3
  22. package/dist/tools/browser/interaction/hover.js +1 -2
  23. package/dist/tools/browser/interaction/press_key.js +1 -2
  24. package/dist/tools/browser/interaction/select.js +1 -2
  25. package/dist/tools/browser/interaction/upload_file.js +1 -2
  26. package/dist/tools/browser/navigation/scroll_by.js +1 -1
  27. package/dist/tools/browser/navigation/scroll_to_element.js +1 -2
  28. package/dist/tools/browser/waiting/wait_for_element.js +1 -2
  29. package/dist/tools/common/types.d.ts +2 -0
  30. package/package.json +1 -1
package/README.md CHANGED
@@ -723,7 +723,7 @@ Issues:
723
723
  Test a selector and return detailed information about all matched elements. Essential for selector debugging and finding the right element to interact with. Returns compact text format with element tag, position, text content, visibility status, and interaction capability. Shows why elements are hidden (display:none, opacity:0, zero size). Supports testid shortcuts (e.g., 'testid:submit-button'). Use limit parameter to control how many matches to show (default: 10). NEW: Use onlyVisible parameter to filter results (true=visible only, false=hidden only, undefined=all).
724
724
 
725
725
  - Parameters:
726
- - selector (string, required): CSS selector, text selector, or testid shorthand to test (e.g., 'button.submit', 'testid:login-form', 'text=Sign In')
726
+ - selector (string, required): CSS selector, text selector, or testid shorthand to test (e.g., 'button.submit', 'testid:login-form', 'text=Sign In', 'dialog::button' to scope the lookup to the topmost open dialog/sheet)
727
727
  - limit (number, optional): Maximum number of elements to return detailed info for (default: 10, recommended max: 50)
728
728
  - onlyVisible (boolean, optional): Filter results by visibility: true = show only visible elements, false = show only hidden elements, undefined/not specified = show all elements (default: undefined)
729
729
  - showAttributes (string, optional): Comma-separated list of HTML attributes to display for each element (e.g., 'id,name,aria-label,href,type'). If not specified, attributes are not shown.
@@ -952,7 +952,7 @@ Scroll an element into view. Automatically handles scrolling within the nearest
952
952
  Click an element on the page
953
953
 
954
954
  - Parameters:
955
- - selector (string, required): CSS selector for the element to click
955
+ - selector (string, required): CSS selector for the element to click. Supports 'testid:NAME' and 'dialog::SELECTOR' (scopes the lookup to the topmost open dialog/sheet, e.g. 'dialog::testid:confirm').
956
956
 
957
957
  #### `drag`
958
958
  Drag an element to a target location
@@ -965,7 +965,7 @@ Drag an element to a target location
965
965
  fill an input/textarea/contenteditable; if the selector matches a wrapper, descends up to 4 levels to a unique fillable descendant (errors if zero or multiple)
966
966
 
967
967
  - Parameters:
968
- - selector (string, required): CSS selector for input field or its wrapper
968
+ - selector (string, required): CSS selector for input field or its wrapper. Supports 'testid:NAME' and 'dialog::SELECTOR' (scopes to the topmost open dialog/sheet).
969
969
  - value (string, required): Value to fill
970
970
 
971
971
  #### `hover`
@@ -1006,10 +1006,10 @@ Upload a file to an input[type='file'] element on the page
1006
1006
  - maxLength (number, optional): Maximum number of characters to return (default: 20000)
1007
1007
 
1008
1008
  #### `get_text`
1009
- [may return preview+token] ⚠️ RARELY NEEDED: Get ALL visible text content from the entire page (no structure, just raw text). Most tasks need structured inspection instead. ONLY use get_text for: (1) extracting text for content analysis (word count, language detection), (2) searching for text when location is completely unknown, (3) text-only snapshots for comparison. For structured tasks, use: inspect_dom() to understand page structure, find_by_text() to locate specific text with context, query_selector() to find elements. Auto-returns text if <2000 chars (small elements); if larger, returns a preview and a one-time token to fetch the full output via confirm_output. Supports testid shortcuts.
1009
+ [may return preview+token] ⚠️ RARELY NEEDED: Get ALL visible text content from the entire page (no structure, just raw text). Most tasks need structured inspection instead. ONLY use get_text for: (1) extracting text for content analysis (word count, language detection), (2) searching for text when location is completely unknown, (3) text-only snapshots for comparison. For structured tasks, use: inspect_dom() to understand page structure, find_by_text() to locate specific text with context, query_selector() to find elements. Auto-returns text if <2000 chars (small elements); if larger, returns a preview and a one-time token to fetch the full output via confirm_output. Supports testid shortcuts and the `dialog::SELECTOR` scope to read inside the topmost open dialog/sheet.
1010
1010
 
1011
1011
  - Parameters:
1012
- - selector (string, optional): CSS selector, text selector, or testid shorthand to limit text extraction to a specific container. Omit to get text from entire page. Example: 'testid:article-body' or '#main-content'
1012
+ - selector (string, optional): CSS selector, text selector, or testid shorthand to limit text extraction to a specific container. Omit to get text from entire page. Examples: 'testid:article-body', '#main-content', 'dialog::section' (scopes lookup to the topmost open dialog/sheet — useful when a sheet covers ambiguous page chrome). Use bare 'dialog::' for the whole topmost dialog.
1013
1013
  - maxLength (number, optional): Maximum number of characters to return (default: 20000)
1014
1014
 
1015
1015
  #### `visual_screenshot_for_humans`
package/dist/index.js CHANGED
@@ -4,10 +4,11 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
4
4
  import { createToolDefinitions } from "./tools/common/registry.js";
5
5
  import { setupRequestHandlers } from "./requestHandler.js";
6
6
  import { parseArgs } from "node:util";
7
- import { setSessionConfig } from "./toolHandler.js";
7
+ import { setSessionConfig, ensureBrowser } from "./toolHandler.js";
8
8
  import { readFileSync } from "node:fs";
9
9
  import { fileURLToPath } from "node:url";
10
10
  import { dirname, join } from "node:path";
11
+ import { createServer } from "node:net";
11
12
  // Get package.json version
12
13
  const __dirname = dirname(fileURLToPath(import.meta.url));
13
14
  const PACKAGE_ROOT = join(__dirname, "..");
@@ -37,6 +38,13 @@ const { values } = parseArgs({
37
38
  type: 'boolean',
38
39
  default: false,
39
40
  },
41
+ 'cdp-port': {
42
+ type: 'string',
43
+ },
44
+ 'warmup-browser': {
45
+ type: 'boolean',
46
+ default: false,
47
+ },
40
48
  'print-tools-json': {
41
49
  type: 'boolean',
42
50
  default: false,
@@ -48,17 +56,50 @@ const { values } = parseArgs({
48
56
  },
49
57
  strict: false,
50
58
  });
59
+ // Probe localhost:port; resolve true if free, false if in use.
60
+ function isPortFree(port) {
61
+ return new Promise(resolve => {
62
+ const srv = createServer();
63
+ srv.once('error', () => resolve(false));
64
+ srv.once('listening', () => srv.close(() => resolve(true)));
65
+ srv.listen(port, '127.0.0.1');
66
+ });
67
+ }
68
+ // First free port in [start, start+span). Throws if none.
69
+ async function findFreePort(start, span) {
70
+ for (let p = start; p < start + span; p++) {
71
+ if (await isPortFree(p))
72
+ return p;
73
+ }
74
+ throw new Error(`No free CDP port in ${start}..${start + span - 1}`);
75
+ }
76
+ // Resolve --cdp-port: 0 disables; explicit value used as-is; unset auto-picks from 9222 upward.
77
+ async function resolveCdpPort(raw) {
78
+ if (raw === undefined)
79
+ return findFreePort(9222, 100);
80
+ const n = Number.parseInt(raw, 10);
81
+ if (!Number.isInteger(n) || n < 0 || n > 65535) {
82
+ console.error(`Invalid --cdp-port value: ${raw}. Must be an integer in 0..65535 (0 disables).`);
83
+ process.exit(1);
84
+ }
85
+ return n;
86
+ }
51
87
  // Configure session settings (session saving is enabled by default)
52
88
  const baseDir = String(values['user-data-dir'] || './.mcp-web-inspector');
53
- const sessionConfig = {
54
- saveSession: !Boolean(values['no-save-session']),
55
- userDataDir: `${baseDir}/user-data`,
56
- screenshotsDir: `${baseDir}/screenshots`,
57
- headlessDefault: Boolean(values['headless']) || (process.platform === 'linux' && !process.env.DISPLAY && !process.env.WAYLAND_DISPLAY),
58
- exposeSensitiveNetworkData: Boolean(values['expose-sensitive-network-data']),
59
- };
60
- setSessionConfig(sessionConfig);
61
89
  async function runServer() {
90
+ // Skip port resolution when only printing metadata — no browser will launch.
91
+ const printOnly = Boolean(values['print-tools-json'] || values['print-tools-md']);
92
+ const cdpPort = printOnly ? 0 : await resolveCdpPort(values['cdp-port']);
93
+ const sessionConfig = {
94
+ saveSession: !Boolean(values['no-save-session']),
95
+ userDataDir: `${baseDir}/user-data`,
96
+ screenshotsDir: `${baseDir}/screenshots`,
97
+ headlessDefault: Boolean(values['headless']) || (process.platform === 'linux' && !process.env.DISPLAY && !process.env.WAYLAND_DISPLAY),
98
+ exposeSensitiveNetworkData: Boolean(values['expose-sensitive-network-data']),
99
+ cdpPort,
100
+ warmupBrowser: Boolean(values['warmup-browser']),
101
+ };
102
+ setSessionConfig(sessionConfig);
62
103
  // Create tool definitions with session config
63
104
  const TOOLS = createToolDefinitions(sessionConfig);
64
105
  // CLI utilities: print tools metadata (JSON/Markdown) and exit
@@ -72,6 +113,9 @@ async function runServer() {
72
113
  return;
73
114
  }
74
115
  console.error(`Starting mcp-web-inspector v${VERSION}`);
116
+ const cdpInstruction = sessionConfig.cdpPort > 0
117
+ ? `External Playwright clients can attach to this browser via Chrome DevTools Protocol at http://localhost:${sessionConfig.cdpPort} — pass that URL to chromium.connectOverCDP() to share cookies, localStorage, and the open page set with this server.`
118
+ : undefined;
75
119
  const server = new Server({
76
120
  name: "mcp-web-inspector",
77
121
  version: VERSION,
@@ -80,6 +124,7 @@ async function runServer() {
80
124
  resources: {},
81
125
  tools: {},
82
126
  },
127
+ ...(cdpInstruction ? { instructions: cdpInstruction } : {}),
83
128
  });
84
129
  // Setup request handlers
85
130
  setupRequestHandlers(server, TOOLS);
@@ -97,6 +142,15 @@ async function runServer() {
97
142
  // Create transport and connect
98
143
  const transport = new StdioServerTransport();
99
144
  await server.connect(transport);
145
+ // Optional eager browser launch. Off by default — sessions that never invoke
146
+ // an MCP tool shouldn't pay for Chromium startup. Useful when external
147
+ // clients (e.g. CDP seed/login scripts) need the browser up before any tool
148
+ // call. Non-blocking — failures surface on the first tool call.
149
+ if (sessionConfig.warmupBrowser) {
150
+ ensureBrowser({ headless: sessionConfig.headlessDefault }).catch(err => {
151
+ console.error("Eager browser warmup failed (will retry on first tool call):", err);
152
+ });
153
+ }
100
154
  }
101
155
  runServer().catch((error) => {
102
156
  console.error("Fatal error in main():", error);
@@ -19,6 +19,8 @@ export interface NetworkRequest {
19
19
  };
20
20
  }
21
21
  type ColorSchemeOverride = 'light' | 'dark' | 'no-preference';
22
+ export declare function hasShownNthHint(): boolean;
23
+ export declare function markNthHintShown(): void;
22
24
  /**
23
25
  * Sets the session configuration
24
26
  */
@@ -13,8 +13,20 @@ let sessionConfig = {
13
13
  screenshotsDir: './.mcp-web-inspector/screenshots',
14
14
  headlessDefault: false,
15
15
  exposeSensitiveNetworkData: false,
16
+ cdpPort: 0,
17
+ warmupBrowser: false,
16
18
  };
17
19
  let colorSchemeOverride = null;
20
+ // Session-scoped flag: the verbose "matched multiple elements" nth-selector
21
+ // guidance is only emitted once per browser session. After the first emit,
22
+ // tools surface only the short ⚠ warning to keep agent context lean.
23
+ let nthHintShown = false;
24
+ export function hasShownNthHint() {
25
+ return nthHintShown;
26
+ }
27
+ export function markNthHintShown() {
28
+ nthHintShown = true;
29
+ }
18
30
  /**
19
31
  * Sets the session configuration
20
32
  */
@@ -49,6 +61,7 @@ export function resetBrowserState() {
49
61
  currentBrowserType = 'chromium';
50
62
  currentDevice = undefined;
51
63
  networkLog = [];
64
+ nthHintShown = false;
52
65
  clearConsoleLogs();
53
66
  }
54
67
  /**
@@ -400,10 +413,14 @@ export async function ensureBrowser(browserSettings) {
400
413
  // IPs (e.g. Tailscale 100.64.0.0/10). This breaks environments where the API is on an
401
414
  // internal network but the app is served from a public CDN.
402
415
  // Prepare context options
416
+ const launchArgs = ['--disable-features=LocalNetworkAccessChecks'];
417
+ if (sessionConfig.cdpPort && sessionConfig.cdpPort > 0) {
418
+ launchArgs.push(`--remote-debugging-port=${sessionConfig.cdpPort}`);
419
+ }
403
420
  const contextOptions = {
404
421
  headless,
405
422
  executablePath: executablePath,
406
- args: ['--disable-features=LocalNetworkAccessChecks'],
423
+ args: launchArgs,
407
424
  };
408
425
  // If device config exists, use it; otherwise use manual viewport/userAgent
409
426
  if (deviceConfig) {
@@ -439,7 +456,10 @@ export async function ensureBrowser(browserSettings) {
439
456
  else {
440
457
  browser = await browserInstance.launch({
441
458
  headless,
442
- executablePath: executablePath
459
+ executablePath: executablePath,
460
+ args: sessionConfig.cdpPort && sessionConfig.cdpPort > 0
461
+ ? [`--remote-debugging-port=${sessionConfig.cdpPort}`]
462
+ : [],
443
463
  });
444
464
  currentBrowserType = browserType;
445
465
  // Add cleanup logic when browser is disconnected
@@ -608,10 +628,14 @@ export async function ensureBrowser(browserSettings) {
608
628
  retryViewportHeight = screenSize?.height ?? 720;
609
629
  }
610
630
  // Prepare context options
631
+ const retryLaunchArgs = ['--disable-features=LocalNetworkAccessChecks'];
632
+ if (sessionConfig.cdpPort && sessionConfig.cdpPort > 0) {
633
+ retryLaunchArgs.push(`--remote-debugging-port=${sessionConfig.cdpPort}`);
634
+ }
611
635
  const retryContextOptions = {
612
636
  headless,
613
637
  executablePath: executablePath,
614
- args: ['--disable-features=LocalNetworkAccessChecks'],
638
+ args: retryLaunchArgs,
615
639
  };
616
640
  // If device config exists, use it; otherwise use manual viewport/userAgent
617
641
  if (deviceConfig) {
@@ -644,7 +668,10 @@ export async function ensureBrowser(browserSettings) {
644
668
  else {
645
669
  browser = await browserInstance.launch({
646
670
  headless,
647
- executablePath: executablePath
671
+ executablePath: executablePath,
672
+ args: sessionConfig.cdpPort && sessionConfig.cdpPort > 0
673
+ ? [`--remote-debugging-port=${sessionConfig.cdpPort}`]
674
+ : [],
648
675
  });
649
676
  currentBrowserType = browserType;
650
677
  browser.on('disconnected', () => {
@@ -1,4 +1,4 @@
1
- import type { Page } from 'playwright';
1
+ import type { Locator, Page } from 'playwright';
2
2
  import { ToolHandler, ToolContext, ToolResponse } from '../common/types.js';
3
3
  /**
4
4
  * Base class for all browser-based tools
@@ -20,10 +20,49 @@ export declare abstract class BrowserToolBase implements ToolHandler {
20
20
  * "#radix-\:rc\:-content-123" → "id=radix-:rc:-content-123"
21
21
  * - Remove unnecessary escapes for bracket characters only (\\[ and \\])
22
22
  * DO NOT unescape colons globally — colons in class/ID names must stay escaped in CSS.
23
+ *
24
+ * Note: the `dialog::SELECTOR` scope shortcut (e.g., `dialog::section`,
25
+ * `dialog::testid:close`) is NOT handled here — it is a runtime scope
26
+ * resolved by `createScopedLocator()`, not a syntactic rewrite.
27
+ *
23
28
  * @param selector The selector string
24
29
  * @returns Normalized selector
25
30
  */
26
31
  protected normalizeSelector(selector: string): string;
32
+ /**
33
+ * Build a Playwright `Locator` honoring the `dialog::SELECTOR` scope shortcut.
34
+ *
35
+ * - `dialog::section` → topmost open dialog/sheet, then `section` inside it
36
+ * - `dialog::testid:close` → topmost open dialog, then `[data-testid="close"]` inside it
37
+ * - `dialog::` → the topmost open dialog itself
38
+ * - anything else → `page.locator(normalizeSelector(rawSelector))`
39
+ *
40
+ * "Topmost" is determined by the highest effective z-index — for each
41
+ * candidate dialog, we walk up to the nearest positioned ancestor (almost
42
+ * always the backdrop/glass-screen wrapper, which is what stacking actually
43
+ * follows) and read its z-index. DOM order is the tiebreaker. This is more
44
+ * robust than picking `.last()` because portal frameworks don't always
45
+ * append in z-order, and modal stacking is driven by the backdrop's
46
+ * z-index, not the dialog content's.
47
+ */
48
+ protected createScopedLocator(page: Page, rawSelector: string): Promise<Locator>;
49
+ /**
50
+ * Detect a "user-dominating" open modal — i.e. one that a human would
51
+ * visually focus on and interact with to the exclusion of the rest of the
52
+ * page. Used by inspect_dom / get_text / get_html to auto-scope when no
53
+ * selector is provided, so the LLM's view matches the human's view.
54
+ *
55
+ * Strict criterion: requires `aria-modal="true"` (or native `dialog[open]`)
56
+ * because non-modal `[role="dialog"]` includes things like side panels and
57
+ * tooltips that don't dominate the page.
58
+ *
59
+ * Returns null if no active modal is open. Otherwise returns the topmost
60
+ * one, ranked by the same z-index walk used by `createScopedLocator()`.
61
+ */
62
+ protected detectActiveModal(page: Page): Promise<{
63
+ descriptor: string;
64
+ suggestion: string;
65
+ } | null>;
27
66
  /**
28
67
  * Sanitize verbose Playwright selector engine messages by removing stack traces and
29
68
  * keeping only the essential syntax error information.
@@ -105,7 +144,7 @@ export declare abstract class BrowserToolBase implements ToolHandler {
105
144
  * @param preferredVisible Whether visibility preference was used
106
145
  * @returns Formatted string or empty if only one element
107
146
  */
108
- protected formatElementSelectionInfo(selector: string, elementIndex: number, totalCount: number, preferredVisible?: boolean): string;
147
+ protected formatElementSelectionInfo(selector: string, elementIndex: number, totalCount: number, preferredVisible?: boolean): Promise<string>;
109
148
  /**
110
149
  * Generate a warning message if the selector is a testid and there are duplicates
111
150
  *
@@ -16,6 +16,11 @@ export class BrowserToolBase {
16
16
  * "#radix-\:rc\:-content-123" → "id=radix-:rc:-content-123"
17
17
  * - Remove unnecessary escapes for bracket characters only (\\[ and \\])
18
18
  * DO NOT unescape colons globally — colons in class/ID names must stay escaped in CSS.
19
+ *
20
+ * Note: the `dialog::SELECTOR` scope shortcut (e.g., `dialog::section`,
21
+ * `dialog::testid:close`) is NOT handled here — it is a runtime scope
22
+ * resolved by `createScopedLocator()`, not a syntactic rewrite.
23
+ *
19
24
  * @param selector The selector string
20
25
  * @returns Normalized selector
21
26
  */
@@ -76,6 +81,188 @@ export class BrowserToolBase {
76
81
  cleaned = cleaned.replace(/\\{2,}(?=:)/g, '\\');
77
82
  return cleaned;
78
83
  }
84
+ /**
85
+ * Build a Playwright `Locator` honoring the `dialog::SELECTOR` scope shortcut.
86
+ *
87
+ * - `dialog::section` → topmost open dialog/sheet, then `section` inside it
88
+ * - `dialog::testid:close` → topmost open dialog, then `[data-testid="close"]` inside it
89
+ * - `dialog::` → the topmost open dialog itself
90
+ * - anything else → `page.locator(normalizeSelector(rawSelector))`
91
+ *
92
+ * "Topmost" is determined by the highest effective z-index — for each
93
+ * candidate dialog, we walk up to the nearest positioned ancestor (almost
94
+ * always the backdrop/glass-screen wrapper, which is what stacking actually
95
+ * follows) and read its z-index. DOM order is the tiebreaker. This is more
96
+ * robust than picking `.last()` because portal frameworks don't always
97
+ * append in z-order, and modal stacking is driven by the backdrop's
98
+ * z-index, not the dialog content's.
99
+ */
100
+ async createScopedLocator(page, rawSelector) {
101
+ const trimmed = (rawSelector ?? '').trim();
102
+ const DIALOG_PREFIX = 'dialog::';
103
+ if (!trimmed.startsWith(DIALOG_PREFIX)) {
104
+ return page.locator(this.normalizeSelector(trimmed));
105
+ }
106
+ const dialogRoots = '[role="dialog"]:not([aria-hidden="true"]),' +
107
+ '[role="alertdialog"]:not([aria-hidden="true"]),' +
108
+ 'dialog[open]';
109
+ // Match detectActiveModal: include only user-visible candidates and
110
+ // rank by effective z-index. Without the visibility filter, a hidden
111
+ // dialog left in the DOM (display:none) could be picked over an
112
+ // actually-open one.
113
+ const result = await page.evaluate((rootsSelector) => {
114
+ const isUserVisible = (el) => {
115
+ const cs = getComputedStyle(el);
116
+ if (cs.display === 'none' || cs.visibility === 'hidden' || parseFloat(cs.opacity) === 0) {
117
+ return false;
118
+ }
119
+ const rect = el.getBoundingClientRect();
120
+ return rect.width > 0 && rect.height > 0;
121
+ };
122
+ const allCandidates = Array.from(document.querySelectorAll(rootsSelector));
123
+ const visibleIndices = [];
124
+ allCandidates.forEach((el, i) => {
125
+ if (isUserVisible(el))
126
+ visibleIndices.push(i);
127
+ });
128
+ if (visibleIndices.length === 0)
129
+ return { topIndex: -1, hasVisible: false };
130
+ if (visibleIndices.length === 1)
131
+ return { topIndex: visibleIndices[0], hasVisible: true };
132
+ const effectiveZ = (start) => {
133
+ let z = 0;
134
+ let node = start;
135
+ while (node && node !== document.body) {
136
+ const cs = getComputedStyle(node);
137
+ if (cs.position !== 'static') {
138
+ const parsed = parseInt(cs.zIndex, 10);
139
+ if (!isNaN(parsed)) {
140
+ z = Math.max(z, parsed);
141
+ }
142
+ }
143
+ node = node.parentElement;
144
+ }
145
+ return z;
146
+ };
147
+ let bestIdx = visibleIndices[0];
148
+ let bestScore = -Infinity;
149
+ visibleIndices.forEach((i) => {
150
+ // Tiebreaker: DOM order — later element is on top.
151
+ const score = effectiveZ(allCandidates[i]) * 1000000 + i;
152
+ if (score > bestScore) {
153
+ bestScore = score;
154
+ bestIdx = i;
155
+ }
156
+ });
157
+ return { topIndex: bestIdx, hasVisible: true };
158
+ }, dialogRoots).catch(() => ({ topIndex: -1, hasVisible: false }));
159
+ // No visible dialog → return a never-matching locator so downstream
160
+ // callers see a clean "No elements found" instead of silently scoping
161
+ // to a hidden dialog left in the DOM.
162
+ if (!result.hasVisible) {
163
+ return page.locator('dialog-no-such-element-sentinel');
164
+ }
165
+ const topmostDialog = page.locator(dialogRoots).nth(result.topIndex);
166
+ const inner = trimmed.slice(DIALOG_PREFIX.length).trim();
167
+ if (!inner) {
168
+ return topmostDialog;
169
+ }
170
+ return topmostDialog.locator(this.normalizeSelector(inner));
171
+ }
172
+ /**
173
+ * Detect a "user-dominating" open modal — i.e. one that a human would
174
+ * visually focus on and interact with to the exclusion of the rest of the
175
+ * page. Used by inspect_dom / get_text / get_html to auto-scope when no
176
+ * selector is provided, so the LLM's view matches the human's view.
177
+ *
178
+ * Strict criterion: requires `aria-modal="true"` (or native `dialog[open]`)
179
+ * because non-modal `[role="dialog"]` includes things like side panels and
180
+ * tooltips that don't dominate the page.
181
+ *
182
+ * Returns null if no active modal is open. Otherwise returns the topmost
183
+ * one, ranked by the same z-index walk used by `createScopedLocator()`.
184
+ */
185
+ async detectActiveModal(page) {
186
+ const ACTIVE_MODAL_SELECTOR = '[role="dialog"][aria-modal="true"]:not([aria-hidden="true"]),' +
187
+ '[role="alertdialog"][aria-modal="true"]:not([aria-hidden="true"]),' +
188
+ 'dialog[open]';
189
+ return await page.evaluate((rootsSelector) => {
190
+ const isUserVisible = (el) => {
191
+ const cs = getComputedStyle(el);
192
+ if (cs.display === 'none' || cs.visibility === 'hidden' || parseFloat(cs.opacity) === 0) {
193
+ return false;
194
+ }
195
+ const rect = el.getBoundingClientRect();
196
+ return rect.width > 0 && rect.height > 0;
197
+ };
198
+ const candidates = Array.from(document.querySelectorAll(rootsSelector)).filter(isUserVisible);
199
+ if (candidates.length === 0)
200
+ return null;
201
+ const effectiveZ = (start) => {
202
+ let z = 0;
203
+ let node = start;
204
+ while (node && node !== document.body) {
205
+ const cs = getComputedStyle(node);
206
+ if (cs.position !== 'static') {
207
+ const parsed = parseInt(cs.zIndex, 10);
208
+ if (!isNaN(parsed)) {
209
+ z = Math.max(z, parsed);
210
+ }
211
+ }
212
+ node = node.parentElement;
213
+ }
214
+ return z;
215
+ };
216
+ let bestIdx = 0;
217
+ let bestScore = -Infinity;
218
+ candidates.forEach((el, i) => {
219
+ const score = effectiveZ(el) * 1000000 + i;
220
+ if (score > bestScore) {
221
+ bestScore = score;
222
+ bestIdx = i;
223
+ }
224
+ });
225
+ const top = candidates[bestIdx];
226
+ const tag = top.tagName.toLowerCase();
227
+ const role = top.getAttribute('role') || (tag === 'dialog' ? 'dialog' : '');
228
+ const testid = top.getAttribute('data-testid') ||
229
+ top.getAttribute('data-test') ||
230
+ top.getAttribute('data-cy');
231
+ const id = top.id || null;
232
+ const ariaLabel = top.getAttribute('aria-label');
233
+ const ariaLabelledBy = top.getAttribute('aria-labelledby');
234
+ let labelText = null;
235
+ if (ariaLabelledBy) {
236
+ const labelEl = document.getElementById(ariaLabelledBy);
237
+ labelText = labelEl?.textContent?.trim() || null;
238
+ }
239
+ const parts = [`<${tag}`];
240
+ if (role)
241
+ parts.push(`role="${role}"`);
242
+ if (testid)
243
+ parts.push(`data-testid="${testid}"`);
244
+ else if (id)
245
+ parts.push(`id="${id}"`);
246
+ if (ariaLabel)
247
+ parts.push(`aria-label="${ariaLabel}"`);
248
+ else if (labelText)
249
+ parts.push(`labelled="${labelText.slice(0, 60)}"`);
250
+ parts[parts.length - 1] += '>';
251
+ return {
252
+ descriptor: parts.join(' '),
253
+ suggestion: testid ? `dialog::testid:${testid}` : 'dialog::',
254
+ };
255
+ }, ACTIVE_MODAL_SELECTOR).then((result) => {
256
+ // Defensive: only treat as a real modal if the result is a
257
+ // properly-shaped object. Mocked test environments may return
258
+ // arbitrary values from page.evaluate() that should not trigger
259
+ // auto-scope.
260
+ if (result && typeof result === 'object' && typeof result.descriptor === 'string') {
261
+ return result;
262
+ }
263
+ return null;
264
+ }, () => null);
265
+ }
79
266
  /**
80
267
  * Sanitize verbose Playwright selector engine messages by removing stack traces and
81
268
  * keeping only the essential syntax error information.
@@ -249,24 +436,27 @@ export class BrowserToolBase {
249
436
  // Check for multiple elements with errorOnMultiple flag
250
437
  if (options?.errorOnMultiple && count > 1) {
251
438
  const selector = options.originalSelector || 'selector';
252
- const nthHint = ''.trimEnd();
253
- const warning = ''.trimEnd();
254
439
  let message = `Selector "${selector}" matched ${count} elements. Please use a more specific selector.`;
255
- if (nthHint) {
256
- message += `\n${nthHint}`;
257
- }
258
- if (warning) {
259
- message += `\n${warning}`;
260
- }
261
- {
440
+ // Verbose disambiguation guidance is rate-limited per session — useful
441
+ // once for the agent to learn the pattern, noise on every subsequent call.
442
+ // After the first emit, fall back to a one-line pointer.
443
+ const { hasShownNthHint, markNthHintShown } = await import('../../toolHandler.js');
444
+ if (!hasShownNthHint()) {
262
445
  const guidance = [
263
446
  `1) Preferred: add a unique data-testid and select it directly (e.g., testid:submit).`,
264
447
  `2) If you cannot change markup: append \`>> nth=<index>\` to target a specific match.`,
265
448
  ];
266
- const matchesDetails = await this.describeMatchedElements(locator, selector, count);
267
- message += `\n${guidance.join('\n')}\n\nMatches:\n${matchesDetails}`;
268
- throw new Error(message);
449
+ message += `\n${guidance.join('\n')}`;
450
+ markNthHintShown();
269
451
  }
452
+ else {
453
+ message += `\nUse a more specific selector (e.g. testid:..., or '>> nth=<index>').`;
454
+ }
455
+ // Per-call match details remain — they describe what's actually on the
456
+ // page, not generic advice.
457
+ const matchesDetails = await this.describeMatchedElements(locator, selector, count);
458
+ message += `\n\nMatches:\n${matchesDetails}`;
459
+ throw new Error(message);
270
460
  }
271
461
  // Handle explicit element index (1-based)
272
462
  if (options?.elementIndex !== undefined) {
@@ -316,7 +506,7 @@ export class BrowserToolBase {
316
506
  * @param preferredVisible Whether visibility preference was used
317
507
  * @returns Formatted string or empty if only one element
318
508
  */
319
- formatElementSelectionInfo(selector, elementIndex, totalCount, preferredVisible = true) {
509
+ async formatElementSelectionInfo(selector, elementIndex, totalCount, preferredVisible = true) {
320
510
  const usesNth = selector.includes('>> nth=');
321
511
  if (totalCount <= 1) {
322
512
  // Even when a single element is ultimately targeted, discourage nth usage
@@ -326,10 +516,25 @@ export class BrowserToolBase {
326
516
  }
327
517
  return '';
328
518
  }
329
- const duplicateWarning = this.getDuplicateTestIdWarning(selector, totalCount).trimEnd();
330
- const nthHint = this.buildNthSelectorHint(selector, totalCount).trimEnd();
331
519
  const avoidNth = usesNth ? "💡 Tip: Avoid relying on '>> nth='; add a unique data-testid instead." : '';
332
- const extraHints = [duplicateWarning, nthHint, avoidNth].filter(Boolean).join('\n');
520
+ // Verbose nth-selector guidance is rate-limited to one emit per session.
521
+ // The short ⚠ warning still surfaces every call; the multi-line hint block
522
+ // (duplicate-testid tip + nth-selector workaround) appears only on the
523
+ // first multi-match of the session — it's reference material the agent
524
+ // only needs once.
525
+ let extraHints = '';
526
+ const { hasShownNthHint, markNthHintShown } = await import('../../toolHandler.js');
527
+ if (!hasShownNthHint()) {
528
+ const duplicateWarning = this.getDuplicateTestIdWarning(selector, totalCount).trimEnd();
529
+ const nthHint = this.buildNthSelectorHint(selector, totalCount).trimEnd();
530
+ extraHints = [duplicateWarning, nthHint, avoidNth].filter(Boolean).join('\n');
531
+ if (duplicateWarning || nthHint) {
532
+ markNthHintShown();
533
+ }
534
+ }
535
+ else if (avoidNth) {
536
+ extraHints = avoidNth;
537
+ }
333
538
  const baseMessage = preferredVisible
334
539
  ? `⚠ Found ${totalCount} elements matching "${selector}", using element ${elementIndex + 1} (first visible)`
335
540
  : `⚠ Found ${totalCount} elements matching "${selector}", using element ${elementIndex + 1}`;
@@ -5,3 +5,15 @@ export declare function titleUrlChangeLines(page: Page, initial?: {
5
5
  url?: string;
6
6
  title?: string;
7
7
  }): Promise<string[]>;
8
+ export type OverlayKind = 'dialog' | 'menu' | 'listbox' | 'tooltip' | 'popup';
9
+ export interface OverlayEntry {
10
+ descriptor: string;
11
+ kind: OverlayKind;
12
+ suggestion?: string;
13
+ }
14
+ export interface OverlaySnapshot {
15
+ keys: string[];
16
+ entries: Record<string, OverlayEntry>;
17
+ }
18
+ export declare function snapshotOpenOverlays(page: Page): Promise<OverlaySnapshot>;
19
+ export declare function overlayChangeLines(before: OverlaySnapshot, after: OverlaySnapshot): string[];