mcp-web-inspector 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/index.js +63 -9
- package/dist/toolHandler.d.ts +2 -0
- package/dist/toolHandler.js +31 -4
- package/dist/tools/browser/base.d.ts +41 -2
- package/dist/tools/browser/base.js +221 -16
- package/dist/tools/browser/common/postAction.d.ts +12 -0
- package/dist/tools/browser/common/postAction.js +158 -0
- package/dist/tools/browser/content/get_html.js +19 -6
- package/dist/tools/browser/content/get_text.js +24 -8
- package/dist/tools/browser/inspection/check_visibility.js +2 -3
- package/dist/tools/browser/inspection/compare_element_alignment.js +6 -8
- package/dist/tools/browser/inspection/element_exists.js +1 -2
- package/dist/tools/browser/inspection/get_computed_styles.js +2 -3
- package/dist/tools/browser/inspection/inspect_ancestors.js +4 -5
- package/dist/tools/browser/inspection/inspect_dom.js +20 -4
- package/dist/tools/browser/inspection/measure_element.js +2 -3
- package/dist/tools/browser/inspection/query_selector.js +3 -3
- package/dist/tools/browser/interaction/click.js +178 -6
- package/dist/tools/browser/interaction/drag.js +2 -4
- package/dist/tools/browser/interaction/fill.js +2 -3
- package/dist/tools/browser/interaction/hover.js +1 -2
- package/dist/tools/browser/interaction/press_key.js +1 -2
- package/dist/tools/browser/interaction/select.js +1 -2
- package/dist/tools/browser/interaction/upload_file.js +1 -2
- package/dist/tools/browser/navigation/scroll_by.js +1 -1
- package/dist/tools/browser/navigation/scroll_to_element.js +1 -2
- package/dist/tools/browser/waiting/wait_for_element.js +1 -2
- package/dist/tools/common/types.d.ts +2 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -723,7 +723,7 @@ Issues:
|
|
|
723
723
|
Test a selector and return detailed information about all matched elements. Essential for selector debugging and finding the right element to interact with. Returns compact text format with element tag, position, text content, visibility status, and interaction capability. Shows why elements are hidden (display:none, opacity:0, zero size). Supports testid shortcuts (e.g., 'testid:submit-button'). Use limit parameter to control how many matches to show (default: 10). NEW: Use onlyVisible parameter to filter results (true=visible only, false=hidden only, undefined=all).
|
|
724
724
|
|
|
725
725
|
- Parameters:
|
|
726
|
-
- selector (string, required): CSS selector, text selector, or testid shorthand to test (e.g., 'button.submit', 'testid:login-form', 'text=Sign In')
|
|
726
|
+
- selector (string, required): CSS selector, text selector, or testid shorthand to test (e.g., 'button.submit', 'testid:login-form', 'text=Sign In', 'dialog::button' to scope the lookup to the topmost open dialog/sheet)
|
|
727
727
|
- limit (number, optional): Maximum number of elements to return detailed info for (default: 10, recommended max: 50)
|
|
728
728
|
- onlyVisible (boolean, optional): Filter results by visibility: true = show only visible elements, false = show only hidden elements, undefined/not specified = show all elements (default: undefined)
|
|
729
729
|
- showAttributes (string, optional): Comma-separated list of HTML attributes to display for each element (e.g., 'id,name,aria-label,href,type'). If not specified, attributes are not shown.
|
|
@@ -952,7 +952,7 @@ Scroll an element into view. Automatically handles scrolling within the nearest
|
|
|
952
952
|
Click an element on the page
|
|
953
953
|
|
|
954
954
|
- Parameters:
|
|
955
|
-
- selector (string, required): CSS selector for the element to click
|
|
955
|
+
- selector (string, required): CSS selector for the element to click. Supports 'testid:NAME' and 'dialog::SELECTOR' (scopes the lookup to the topmost open dialog/sheet, e.g. 'dialog::testid:confirm').
|
|
956
956
|
|
|
957
957
|
#### `drag`
|
|
958
958
|
Drag an element to a target location
|
|
@@ -965,7 +965,7 @@ Drag an element to a target location
|
|
|
965
965
|
fill an input/textarea/contenteditable; if the selector matches a wrapper, descends up to 4 levels to a unique fillable descendant (errors if zero or multiple)
|
|
966
966
|
|
|
967
967
|
- Parameters:
|
|
968
|
-
- selector (string, required): CSS selector for input field or its wrapper
|
|
968
|
+
- selector (string, required): CSS selector for input field or its wrapper. Supports 'testid:NAME' and 'dialog::SELECTOR' (scopes to the topmost open dialog/sheet).
|
|
969
969
|
- value (string, required): Value to fill
|
|
970
970
|
|
|
971
971
|
#### `hover`
|
|
@@ -1006,10 +1006,10 @@ Upload a file to an input[type='file'] element on the page
|
|
|
1006
1006
|
- maxLength (number, optional): Maximum number of characters to return (default: 20000)
|
|
1007
1007
|
|
|
1008
1008
|
#### `get_text`
|
|
1009
|
-
[may return preview+token] ⚠️ RARELY NEEDED: Get ALL visible text content from the entire page (no structure, just raw text). Most tasks need structured inspection instead. ONLY use get_text for: (1) extracting text for content analysis (word count, language detection), (2) searching for text when location is completely unknown, (3) text-only snapshots for comparison. For structured tasks, use: inspect_dom() to understand page structure, find_by_text() to locate specific text with context, query_selector() to find elements. Auto-returns text if <2000 chars (small elements); if larger, returns a preview and a one-time token to fetch the full output via confirm_output. Supports testid shortcuts.
|
|
1009
|
+
[may return preview+token] ⚠️ RARELY NEEDED: Get ALL visible text content from the entire page (no structure, just raw text). Most tasks need structured inspection instead. ONLY use get_text for: (1) extracting text for content analysis (word count, language detection), (2) searching for text when location is completely unknown, (3) text-only snapshots for comparison. For structured tasks, use: inspect_dom() to understand page structure, find_by_text() to locate specific text with context, query_selector() to find elements. Auto-returns text if <2000 chars (small elements); if larger, returns a preview and a one-time token to fetch the full output via confirm_output. Supports testid shortcuts and the `dialog::SELECTOR` scope to read inside the topmost open dialog/sheet.
|
|
1010
1010
|
|
|
1011
1011
|
- Parameters:
|
|
1012
|
-
- selector (string, optional): CSS selector, text selector, or testid shorthand to limit text extraction to a specific container. Omit to get text from entire page.
|
|
1012
|
+
- selector (string, optional): CSS selector, text selector, or testid shorthand to limit text extraction to a specific container. Omit to get text from entire page. Examples: 'testid:article-body', '#main-content', 'dialog::section' (scopes lookup to the topmost open dialog/sheet — useful when a sheet covers ambiguous page chrome). Use bare 'dialog::' for the whole topmost dialog.
|
|
1013
1013
|
- maxLength (number, optional): Maximum number of characters to return (default: 20000)
|
|
1014
1014
|
|
|
1015
1015
|
#### `visual_screenshot_for_humans`
|
package/dist/index.js
CHANGED
|
@@ -4,10 +4,11 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
|
|
|
4
4
|
import { createToolDefinitions } from "./tools/common/registry.js";
|
|
5
5
|
import { setupRequestHandlers } from "./requestHandler.js";
|
|
6
6
|
import { parseArgs } from "node:util";
|
|
7
|
-
import { setSessionConfig } from "./toolHandler.js";
|
|
7
|
+
import { setSessionConfig, ensureBrowser } from "./toolHandler.js";
|
|
8
8
|
import { readFileSync } from "node:fs";
|
|
9
9
|
import { fileURLToPath } from "node:url";
|
|
10
10
|
import { dirname, join } from "node:path";
|
|
11
|
+
import { createServer } from "node:net";
|
|
11
12
|
// Get package.json version
|
|
12
13
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
14
|
const PACKAGE_ROOT = join(__dirname, "..");
|
|
@@ -37,6 +38,13 @@ const { values } = parseArgs({
|
|
|
37
38
|
type: 'boolean',
|
|
38
39
|
default: false,
|
|
39
40
|
},
|
|
41
|
+
'cdp-port': {
|
|
42
|
+
type: 'string',
|
|
43
|
+
},
|
|
44
|
+
'warmup-browser': {
|
|
45
|
+
type: 'boolean',
|
|
46
|
+
default: false,
|
|
47
|
+
},
|
|
40
48
|
'print-tools-json': {
|
|
41
49
|
type: 'boolean',
|
|
42
50
|
default: false,
|
|
@@ -48,17 +56,50 @@ const { values } = parseArgs({
|
|
|
48
56
|
},
|
|
49
57
|
strict: false,
|
|
50
58
|
});
|
|
59
|
+
// Probe localhost:port; resolve true if free, false if in use.
|
|
60
|
+
function isPortFree(port) {
|
|
61
|
+
return new Promise(resolve => {
|
|
62
|
+
const srv = createServer();
|
|
63
|
+
srv.once('error', () => resolve(false));
|
|
64
|
+
srv.once('listening', () => srv.close(() => resolve(true)));
|
|
65
|
+
srv.listen(port, '127.0.0.1');
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
// First free port in [start, start+span). Throws if none.
|
|
69
|
+
async function findFreePort(start, span) {
|
|
70
|
+
for (let p = start; p < start + span; p++) {
|
|
71
|
+
if (await isPortFree(p))
|
|
72
|
+
return p;
|
|
73
|
+
}
|
|
74
|
+
throw new Error(`No free CDP port in ${start}..${start + span - 1}`);
|
|
75
|
+
}
|
|
76
|
+
// Resolve --cdp-port: 0 disables; explicit value used as-is; unset auto-picks from 9222 upward.
|
|
77
|
+
async function resolveCdpPort(raw) {
|
|
78
|
+
if (raw === undefined)
|
|
79
|
+
return findFreePort(9222, 100);
|
|
80
|
+
const n = Number.parseInt(raw, 10);
|
|
81
|
+
if (!Number.isInteger(n) || n < 0 || n > 65535) {
|
|
82
|
+
console.error(`Invalid --cdp-port value: ${raw}. Must be an integer in 0..65535 (0 disables).`);
|
|
83
|
+
process.exit(1);
|
|
84
|
+
}
|
|
85
|
+
return n;
|
|
86
|
+
}
|
|
51
87
|
// Configure session settings (session saving is enabled by default)
|
|
52
88
|
const baseDir = String(values['user-data-dir'] || './.mcp-web-inspector');
|
|
53
|
-
const sessionConfig = {
|
|
54
|
-
saveSession: !Boolean(values['no-save-session']),
|
|
55
|
-
userDataDir: `${baseDir}/user-data`,
|
|
56
|
-
screenshotsDir: `${baseDir}/screenshots`,
|
|
57
|
-
headlessDefault: Boolean(values['headless']) || (process.platform === 'linux' && !process.env.DISPLAY && !process.env.WAYLAND_DISPLAY),
|
|
58
|
-
exposeSensitiveNetworkData: Boolean(values['expose-sensitive-network-data']),
|
|
59
|
-
};
|
|
60
|
-
setSessionConfig(sessionConfig);
|
|
61
89
|
async function runServer() {
|
|
90
|
+
// Skip port resolution when only printing metadata — no browser will launch.
|
|
91
|
+
const printOnly = Boolean(values['print-tools-json'] || values['print-tools-md']);
|
|
92
|
+
const cdpPort = printOnly ? 0 : await resolveCdpPort(values['cdp-port']);
|
|
93
|
+
const sessionConfig = {
|
|
94
|
+
saveSession: !Boolean(values['no-save-session']),
|
|
95
|
+
userDataDir: `${baseDir}/user-data`,
|
|
96
|
+
screenshotsDir: `${baseDir}/screenshots`,
|
|
97
|
+
headlessDefault: Boolean(values['headless']) || (process.platform === 'linux' && !process.env.DISPLAY && !process.env.WAYLAND_DISPLAY),
|
|
98
|
+
exposeSensitiveNetworkData: Boolean(values['expose-sensitive-network-data']),
|
|
99
|
+
cdpPort,
|
|
100
|
+
warmupBrowser: Boolean(values['warmup-browser']),
|
|
101
|
+
};
|
|
102
|
+
setSessionConfig(sessionConfig);
|
|
62
103
|
// Create tool definitions with session config
|
|
63
104
|
const TOOLS = createToolDefinitions(sessionConfig);
|
|
64
105
|
// CLI utilities: print tools metadata (JSON/Markdown) and exit
|
|
@@ -72,6 +113,9 @@ async function runServer() {
|
|
|
72
113
|
return;
|
|
73
114
|
}
|
|
74
115
|
console.error(`Starting mcp-web-inspector v${VERSION}`);
|
|
116
|
+
const cdpInstruction = sessionConfig.cdpPort > 0
|
|
117
|
+
? `External Playwright clients can attach to this browser via Chrome DevTools Protocol at http://localhost:${sessionConfig.cdpPort} — pass that URL to chromium.connectOverCDP() to share cookies, localStorage, and the open page set with this server.`
|
|
118
|
+
: undefined;
|
|
75
119
|
const server = new Server({
|
|
76
120
|
name: "mcp-web-inspector",
|
|
77
121
|
version: VERSION,
|
|
@@ -80,6 +124,7 @@ async function runServer() {
|
|
|
80
124
|
resources: {},
|
|
81
125
|
tools: {},
|
|
82
126
|
},
|
|
127
|
+
...(cdpInstruction ? { instructions: cdpInstruction } : {}),
|
|
83
128
|
});
|
|
84
129
|
// Setup request handlers
|
|
85
130
|
setupRequestHandlers(server, TOOLS);
|
|
@@ -97,6 +142,15 @@ async function runServer() {
|
|
|
97
142
|
// Create transport and connect
|
|
98
143
|
const transport = new StdioServerTransport();
|
|
99
144
|
await server.connect(transport);
|
|
145
|
+
// Optional eager browser launch. Off by default — sessions that never invoke
|
|
146
|
+
// an MCP tool shouldn't pay for Chromium startup. Useful when external
|
|
147
|
+
// clients (e.g. CDP seed/login scripts) need the browser up before any tool
|
|
148
|
+
// call. Non-blocking — failures surface on the first tool call.
|
|
149
|
+
if (sessionConfig.warmupBrowser) {
|
|
150
|
+
ensureBrowser({ headless: sessionConfig.headlessDefault }).catch(err => {
|
|
151
|
+
console.error("Eager browser warmup failed (will retry on first tool call):", err);
|
|
152
|
+
});
|
|
153
|
+
}
|
|
100
154
|
}
|
|
101
155
|
runServer().catch((error) => {
|
|
102
156
|
console.error("Fatal error in main():", error);
|
package/dist/toolHandler.d.ts
CHANGED
|
@@ -19,6 +19,8 @@ export interface NetworkRequest {
|
|
|
19
19
|
};
|
|
20
20
|
}
|
|
21
21
|
type ColorSchemeOverride = 'light' | 'dark' | 'no-preference';
|
|
22
|
+
export declare function hasShownNthHint(): boolean;
|
|
23
|
+
export declare function markNthHintShown(): void;
|
|
22
24
|
/**
|
|
23
25
|
* Sets the session configuration
|
|
24
26
|
*/
|
package/dist/toolHandler.js
CHANGED
|
@@ -13,8 +13,20 @@ let sessionConfig = {
|
|
|
13
13
|
screenshotsDir: './.mcp-web-inspector/screenshots',
|
|
14
14
|
headlessDefault: false,
|
|
15
15
|
exposeSensitiveNetworkData: false,
|
|
16
|
+
cdpPort: 0,
|
|
17
|
+
warmupBrowser: false,
|
|
16
18
|
};
|
|
17
19
|
let colorSchemeOverride = null;
|
|
20
|
+
// Session-scoped flag: the verbose "matched multiple elements" nth-selector
|
|
21
|
+
// guidance is only emitted once per browser session. After the first emit,
|
|
22
|
+
// tools surface only the short ⚠ warning to keep agent context lean.
|
|
23
|
+
let nthHintShown = false;
|
|
24
|
+
export function hasShownNthHint() {
|
|
25
|
+
return nthHintShown;
|
|
26
|
+
}
|
|
27
|
+
export function markNthHintShown() {
|
|
28
|
+
nthHintShown = true;
|
|
29
|
+
}
|
|
18
30
|
/**
|
|
19
31
|
* Sets the session configuration
|
|
20
32
|
*/
|
|
@@ -49,6 +61,7 @@ export function resetBrowserState() {
|
|
|
49
61
|
currentBrowserType = 'chromium';
|
|
50
62
|
currentDevice = undefined;
|
|
51
63
|
networkLog = [];
|
|
64
|
+
nthHintShown = false;
|
|
52
65
|
clearConsoleLogs();
|
|
53
66
|
}
|
|
54
67
|
/**
|
|
@@ -400,10 +413,14 @@ export async function ensureBrowser(browserSettings) {
|
|
|
400
413
|
// IPs (e.g. Tailscale 100.64.0.0/10). This breaks environments where the API is on an
|
|
401
414
|
// internal network but the app is served from a public CDN.
|
|
402
415
|
// Prepare context options
|
|
416
|
+
const launchArgs = ['--disable-features=LocalNetworkAccessChecks'];
|
|
417
|
+
if (sessionConfig.cdpPort && sessionConfig.cdpPort > 0) {
|
|
418
|
+
launchArgs.push(`--remote-debugging-port=${sessionConfig.cdpPort}`);
|
|
419
|
+
}
|
|
403
420
|
const contextOptions = {
|
|
404
421
|
headless,
|
|
405
422
|
executablePath: executablePath,
|
|
406
|
-
args:
|
|
423
|
+
args: launchArgs,
|
|
407
424
|
};
|
|
408
425
|
// If device config exists, use it; otherwise use manual viewport/userAgent
|
|
409
426
|
if (deviceConfig) {
|
|
@@ -439,7 +456,10 @@ export async function ensureBrowser(browserSettings) {
|
|
|
439
456
|
else {
|
|
440
457
|
browser = await browserInstance.launch({
|
|
441
458
|
headless,
|
|
442
|
-
executablePath: executablePath
|
|
459
|
+
executablePath: executablePath,
|
|
460
|
+
args: sessionConfig.cdpPort && sessionConfig.cdpPort > 0
|
|
461
|
+
? [`--remote-debugging-port=${sessionConfig.cdpPort}`]
|
|
462
|
+
: [],
|
|
443
463
|
});
|
|
444
464
|
currentBrowserType = browserType;
|
|
445
465
|
// Add cleanup logic when browser is disconnected
|
|
@@ -608,10 +628,14 @@ export async function ensureBrowser(browserSettings) {
|
|
|
608
628
|
retryViewportHeight = screenSize?.height ?? 720;
|
|
609
629
|
}
|
|
610
630
|
// Prepare context options
|
|
631
|
+
const retryLaunchArgs = ['--disable-features=LocalNetworkAccessChecks'];
|
|
632
|
+
if (sessionConfig.cdpPort && sessionConfig.cdpPort > 0) {
|
|
633
|
+
retryLaunchArgs.push(`--remote-debugging-port=${sessionConfig.cdpPort}`);
|
|
634
|
+
}
|
|
611
635
|
const retryContextOptions = {
|
|
612
636
|
headless,
|
|
613
637
|
executablePath: executablePath,
|
|
614
|
-
args:
|
|
638
|
+
args: retryLaunchArgs,
|
|
615
639
|
};
|
|
616
640
|
// If device config exists, use it; otherwise use manual viewport/userAgent
|
|
617
641
|
if (deviceConfig) {
|
|
@@ -644,7 +668,10 @@ export async function ensureBrowser(browserSettings) {
|
|
|
644
668
|
else {
|
|
645
669
|
browser = await browserInstance.launch({
|
|
646
670
|
headless,
|
|
647
|
-
executablePath: executablePath
|
|
671
|
+
executablePath: executablePath,
|
|
672
|
+
args: sessionConfig.cdpPort && sessionConfig.cdpPort > 0
|
|
673
|
+
? [`--remote-debugging-port=${sessionConfig.cdpPort}`]
|
|
674
|
+
: [],
|
|
648
675
|
});
|
|
649
676
|
currentBrowserType = browserType;
|
|
650
677
|
browser.on('disconnected', () => {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Page } from 'playwright';
|
|
1
|
+
import type { Locator, Page } from 'playwright';
|
|
2
2
|
import { ToolHandler, ToolContext, ToolResponse } from '../common/types.js';
|
|
3
3
|
/**
|
|
4
4
|
* Base class for all browser-based tools
|
|
@@ -20,10 +20,49 @@ export declare abstract class BrowserToolBase implements ToolHandler {
|
|
|
20
20
|
* "#radix-\:rc\:-content-123" → "id=radix-:rc:-content-123"
|
|
21
21
|
* - Remove unnecessary escapes for bracket characters only (\\[ and \\])
|
|
22
22
|
* DO NOT unescape colons globally — colons in class/ID names must stay escaped in CSS.
|
|
23
|
+
*
|
|
24
|
+
* Note: the `dialog::SELECTOR` scope shortcut (e.g., `dialog::section`,
|
|
25
|
+
* `dialog::testid:close`) is NOT handled here — it is a runtime scope
|
|
26
|
+
* resolved by `createScopedLocator()`, not a syntactic rewrite.
|
|
27
|
+
*
|
|
23
28
|
* @param selector The selector string
|
|
24
29
|
* @returns Normalized selector
|
|
25
30
|
*/
|
|
26
31
|
protected normalizeSelector(selector: string): string;
|
|
32
|
+
/**
|
|
33
|
+
* Build a Playwright `Locator` honoring the `dialog::SELECTOR` scope shortcut.
|
|
34
|
+
*
|
|
35
|
+
* - `dialog::section` → topmost open dialog/sheet, then `section` inside it
|
|
36
|
+
* - `dialog::testid:close` → topmost open dialog, then `[data-testid="close"]` inside it
|
|
37
|
+
* - `dialog::` → the topmost open dialog itself
|
|
38
|
+
* - anything else → `page.locator(normalizeSelector(rawSelector))`
|
|
39
|
+
*
|
|
40
|
+
* "Topmost" is determined by the highest effective z-index — for each
|
|
41
|
+
* candidate dialog, we walk up to the nearest positioned ancestor (almost
|
|
42
|
+
* always the backdrop/glass-screen wrapper, which is what stacking actually
|
|
43
|
+
* follows) and read its z-index. DOM order is the tiebreaker. This is more
|
|
44
|
+
* robust than picking `.last()` because portal frameworks don't always
|
|
45
|
+
* append in z-order, and modal stacking is driven by the backdrop's
|
|
46
|
+
* z-index, not the dialog content's.
|
|
47
|
+
*/
|
|
48
|
+
protected createScopedLocator(page: Page, rawSelector: string): Promise<Locator>;
|
|
49
|
+
/**
|
|
50
|
+
* Detect a "user-dominating" open modal — i.e. one that a human would
|
|
51
|
+
* visually focus on and interact with to the exclusion of the rest of the
|
|
52
|
+
* page. Used by inspect_dom / get_text / get_html to auto-scope when no
|
|
53
|
+
* selector is provided, so the LLM's view matches the human's view.
|
|
54
|
+
*
|
|
55
|
+
* Strict criterion: requires `aria-modal="true"` (or native `dialog[open]`)
|
|
56
|
+
* because non-modal `[role="dialog"]` includes things like side panels and
|
|
57
|
+
* tooltips that don't dominate the page.
|
|
58
|
+
*
|
|
59
|
+
* Returns null if no active modal is open. Otherwise returns the topmost
|
|
60
|
+
* one, ranked by the same z-index walk used by `createScopedLocator()`.
|
|
61
|
+
*/
|
|
62
|
+
protected detectActiveModal(page: Page): Promise<{
|
|
63
|
+
descriptor: string;
|
|
64
|
+
suggestion: string;
|
|
65
|
+
} | null>;
|
|
27
66
|
/**
|
|
28
67
|
* Sanitize verbose Playwright selector engine messages by removing stack traces and
|
|
29
68
|
* keeping only the essential syntax error information.
|
|
@@ -105,7 +144,7 @@ export declare abstract class BrowserToolBase implements ToolHandler {
|
|
|
105
144
|
* @param preferredVisible Whether visibility preference was used
|
|
106
145
|
* @returns Formatted string or empty if only one element
|
|
107
146
|
*/
|
|
108
|
-
protected formatElementSelectionInfo(selector: string, elementIndex: number, totalCount: number, preferredVisible?: boolean): string
|
|
147
|
+
protected formatElementSelectionInfo(selector: string, elementIndex: number, totalCount: number, preferredVisible?: boolean): Promise<string>;
|
|
109
148
|
/**
|
|
110
149
|
* Generate a warning message if the selector is a testid and there are duplicates
|
|
111
150
|
*
|
|
@@ -16,6 +16,11 @@ export class BrowserToolBase {
|
|
|
16
16
|
* "#radix-\:rc\:-content-123" → "id=radix-:rc:-content-123"
|
|
17
17
|
* - Remove unnecessary escapes for bracket characters only (\\[ and \\])
|
|
18
18
|
* DO NOT unescape colons globally — colons in class/ID names must stay escaped in CSS.
|
|
19
|
+
*
|
|
20
|
+
* Note: the `dialog::SELECTOR` scope shortcut (e.g., `dialog::section`,
|
|
21
|
+
* `dialog::testid:close`) is NOT handled here — it is a runtime scope
|
|
22
|
+
* resolved by `createScopedLocator()`, not a syntactic rewrite.
|
|
23
|
+
*
|
|
19
24
|
* @param selector The selector string
|
|
20
25
|
* @returns Normalized selector
|
|
21
26
|
*/
|
|
@@ -76,6 +81,188 @@ export class BrowserToolBase {
|
|
|
76
81
|
cleaned = cleaned.replace(/\\{2,}(?=:)/g, '\\');
|
|
77
82
|
return cleaned;
|
|
78
83
|
}
|
|
84
|
+
/**
|
|
85
|
+
* Build a Playwright `Locator` honoring the `dialog::SELECTOR` scope shortcut.
|
|
86
|
+
*
|
|
87
|
+
* - `dialog::section` → topmost open dialog/sheet, then `section` inside it
|
|
88
|
+
* - `dialog::testid:close` → topmost open dialog, then `[data-testid="close"]` inside it
|
|
89
|
+
* - `dialog::` → the topmost open dialog itself
|
|
90
|
+
* - anything else → `page.locator(normalizeSelector(rawSelector))`
|
|
91
|
+
*
|
|
92
|
+
* "Topmost" is determined by the highest effective z-index — for each
|
|
93
|
+
* candidate dialog, we walk up to the nearest positioned ancestor (almost
|
|
94
|
+
* always the backdrop/glass-screen wrapper, which is what stacking actually
|
|
95
|
+
* follows) and read its z-index. DOM order is the tiebreaker. This is more
|
|
96
|
+
* robust than picking `.last()` because portal frameworks don't always
|
|
97
|
+
* append in z-order, and modal stacking is driven by the backdrop's
|
|
98
|
+
* z-index, not the dialog content's.
|
|
99
|
+
*/
|
|
100
|
+
async createScopedLocator(page, rawSelector) {
|
|
101
|
+
const trimmed = (rawSelector ?? '').trim();
|
|
102
|
+
const DIALOG_PREFIX = 'dialog::';
|
|
103
|
+
if (!trimmed.startsWith(DIALOG_PREFIX)) {
|
|
104
|
+
return page.locator(this.normalizeSelector(trimmed));
|
|
105
|
+
}
|
|
106
|
+
const dialogRoots = '[role="dialog"]:not([aria-hidden="true"]),' +
|
|
107
|
+
'[role="alertdialog"]:not([aria-hidden="true"]),' +
|
|
108
|
+
'dialog[open]';
|
|
109
|
+
// Match detectActiveModal: include only user-visible candidates and
|
|
110
|
+
// rank by effective z-index. Without the visibility filter, a hidden
|
|
111
|
+
// dialog left in the DOM (display:none) could be picked over an
|
|
112
|
+
// actually-open one.
|
|
113
|
+
const result = await page.evaluate((rootsSelector) => {
|
|
114
|
+
const isUserVisible = (el) => {
|
|
115
|
+
const cs = getComputedStyle(el);
|
|
116
|
+
if (cs.display === 'none' || cs.visibility === 'hidden' || parseFloat(cs.opacity) === 0) {
|
|
117
|
+
return false;
|
|
118
|
+
}
|
|
119
|
+
const rect = el.getBoundingClientRect();
|
|
120
|
+
return rect.width > 0 && rect.height > 0;
|
|
121
|
+
};
|
|
122
|
+
const allCandidates = Array.from(document.querySelectorAll(rootsSelector));
|
|
123
|
+
const visibleIndices = [];
|
|
124
|
+
allCandidates.forEach((el, i) => {
|
|
125
|
+
if (isUserVisible(el))
|
|
126
|
+
visibleIndices.push(i);
|
|
127
|
+
});
|
|
128
|
+
if (visibleIndices.length === 0)
|
|
129
|
+
return { topIndex: -1, hasVisible: false };
|
|
130
|
+
if (visibleIndices.length === 1)
|
|
131
|
+
return { topIndex: visibleIndices[0], hasVisible: true };
|
|
132
|
+
const effectiveZ = (start) => {
|
|
133
|
+
let z = 0;
|
|
134
|
+
let node = start;
|
|
135
|
+
while (node && node !== document.body) {
|
|
136
|
+
const cs = getComputedStyle(node);
|
|
137
|
+
if (cs.position !== 'static') {
|
|
138
|
+
const parsed = parseInt(cs.zIndex, 10);
|
|
139
|
+
if (!isNaN(parsed)) {
|
|
140
|
+
z = Math.max(z, parsed);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
node = node.parentElement;
|
|
144
|
+
}
|
|
145
|
+
return z;
|
|
146
|
+
};
|
|
147
|
+
let bestIdx = visibleIndices[0];
|
|
148
|
+
let bestScore = -Infinity;
|
|
149
|
+
visibleIndices.forEach((i) => {
|
|
150
|
+
// Tiebreaker: DOM order — later element is on top.
|
|
151
|
+
const score = effectiveZ(allCandidates[i]) * 1000000 + i;
|
|
152
|
+
if (score > bestScore) {
|
|
153
|
+
bestScore = score;
|
|
154
|
+
bestIdx = i;
|
|
155
|
+
}
|
|
156
|
+
});
|
|
157
|
+
return { topIndex: bestIdx, hasVisible: true };
|
|
158
|
+
}, dialogRoots).catch(() => ({ topIndex: -1, hasVisible: false }));
|
|
159
|
+
// No visible dialog → return a never-matching locator so downstream
|
|
160
|
+
// callers see a clean "No elements found" instead of silently scoping
|
|
161
|
+
// to a hidden dialog left in the DOM.
|
|
162
|
+
if (!result.hasVisible) {
|
|
163
|
+
return page.locator('dialog-no-such-element-sentinel');
|
|
164
|
+
}
|
|
165
|
+
const topmostDialog = page.locator(dialogRoots).nth(result.topIndex);
|
|
166
|
+
const inner = trimmed.slice(DIALOG_PREFIX.length).trim();
|
|
167
|
+
if (!inner) {
|
|
168
|
+
return topmostDialog;
|
|
169
|
+
}
|
|
170
|
+
return topmostDialog.locator(this.normalizeSelector(inner));
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Detect a "user-dominating" open modal — i.e. one that a human would
|
|
174
|
+
* visually focus on and interact with to the exclusion of the rest of the
|
|
175
|
+
* page. Used by inspect_dom / get_text / get_html to auto-scope when no
|
|
176
|
+
* selector is provided, so the LLM's view matches the human's view.
|
|
177
|
+
*
|
|
178
|
+
* Strict criterion: requires `aria-modal="true"` (or native `dialog[open]`)
|
|
179
|
+
* because non-modal `[role="dialog"]` includes things like side panels and
|
|
180
|
+
* tooltips that don't dominate the page.
|
|
181
|
+
*
|
|
182
|
+
* Returns null if no active modal is open. Otherwise returns the topmost
|
|
183
|
+
* one, ranked by the same z-index walk used by `createScopedLocator()`.
|
|
184
|
+
*/
|
|
185
|
+
async detectActiveModal(page) {
|
|
186
|
+
const ACTIVE_MODAL_SELECTOR = '[role="dialog"][aria-modal="true"]:not([aria-hidden="true"]),' +
|
|
187
|
+
'[role="alertdialog"][aria-modal="true"]:not([aria-hidden="true"]),' +
|
|
188
|
+
'dialog[open]';
|
|
189
|
+
return await page.evaluate((rootsSelector) => {
|
|
190
|
+
const isUserVisible = (el) => {
|
|
191
|
+
const cs = getComputedStyle(el);
|
|
192
|
+
if (cs.display === 'none' || cs.visibility === 'hidden' || parseFloat(cs.opacity) === 0) {
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
const rect = el.getBoundingClientRect();
|
|
196
|
+
return rect.width > 0 && rect.height > 0;
|
|
197
|
+
};
|
|
198
|
+
const candidates = Array.from(document.querySelectorAll(rootsSelector)).filter(isUserVisible);
|
|
199
|
+
if (candidates.length === 0)
|
|
200
|
+
return null;
|
|
201
|
+
const effectiveZ = (start) => {
|
|
202
|
+
let z = 0;
|
|
203
|
+
let node = start;
|
|
204
|
+
while (node && node !== document.body) {
|
|
205
|
+
const cs = getComputedStyle(node);
|
|
206
|
+
if (cs.position !== 'static') {
|
|
207
|
+
const parsed = parseInt(cs.zIndex, 10);
|
|
208
|
+
if (!isNaN(parsed)) {
|
|
209
|
+
z = Math.max(z, parsed);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
node = node.parentElement;
|
|
213
|
+
}
|
|
214
|
+
return z;
|
|
215
|
+
};
|
|
216
|
+
let bestIdx = 0;
|
|
217
|
+
let bestScore = -Infinity;
|
|
218
|
+
candidates.forEach((el, i) => {
|
|
219
|
+
const score = effectiveZ(el) * 1000000 + i;
|
|
220
|
+
if (score > bestScore) {
|
|
221
|
+
bestScore = score;
|
|
222
|
+
bestIdx = i;
|
|
223
|
+
}
|
|
224
|
+
});
|
|
225
|
+
const top = candidates[bestIdx];
|
|
226
|
+
const tag = top.tagName.toLowerCase();
|
|
227
|
+
const role = top.getAttribute('role') || (tag === 'dialog' ? 'dialog' : '');
|
|
228
|
+
const testid = top.getAttribute('data-testid') ||
|
|
229
|
+
top.getAttribute('data-test') ||
|
|
230
|
+
top.getAttribute('data-cy');
|
|
231
|
+
const id = top.id || null;
|
|
232
|
+
const ariaLabel = top.getAttribute('aria-label');
|
|
233
|
+
const ariaLabelledBy = top.getAttribute('aria-labelledby');
|
|
234
|
+
let labelText = null;
|
|
235
|
+
if (ariaLabelledBy) {
|
|
236
|
+
const labelEl = document.getElementById(ariaLabelledBy);
|
|
237
|
+
labelText = labelEl?.textContent?.trim() || null;
|
|
238
|
+
}
|
|
239
|
+
const parts = [`<${tag}`];
|
|
240
|
+
if (role)
|
|
241
|
+
parts.push(`role="${role}"`);
|
|
242
|
+
if (testid)
|
|
243
|
+
parts.push(`data-testid="${testid}"`);
|
|
244
|
+
else if (id)
|
|
245
|
+
parts.push(`id="${id}"`);
|
|
246
|
+
if (ariaLabel)
|
|
247
|
+
parts.push(`aria-label="${ariaLabel}"`);
|
|
248
|
+
else if (labelText)
|
|
249
|
+
parts.push(`labelled="${labelText.slice(0, 60)}"`);
|
|
250
|
+
parts[parts.length - 1] += '>';
|
|
251
|
+
return {
|
|
252
|
+
descriptor: parts.join(' '),
|
|
253
|
+
suggestion: testid ? `dialog::testid:${testid}` : 'dialog::',
|
|
254
|
+
};
|
|
255
|
+
}, ACTIVE_MODAL_SELECTOR).then((result) => {
|
|
256
|
+
// Defensive: only treat as a real modal if the result is a
|
|
257
|
+
// properly-shaped object. Mocked test environments may return
|
|
258
|
+
// arbitrary values from page.evaluate() that should not trigger
|
|
259
|
+
// auto-scope.
|
|
260
|
+
if (result && typeof result === 'object' && typeof result.descriptor === 'string') {
|
|
261
|
+
return result;
|
|
262
|
+
}
|
|
263
|
+
return null;
|
|
264
|
+
}, () => null);
|
|
265
|
+
}
|
|
79
266
|
/**
|
|
80
267
|
* Sanitize verbose Playwright selector engine messages by removing stack traces and
|
|
81
268
|
* keeping only the essential syntax error information.
|
|
@@ -249,24 +436,27 @@ export class BrowserToolBase {
|
|
|
249
436
|
// Check for multiple elements with errorOnMultiple flag
|
|
250
437
|
if (options?.errorOnMultiple && count > 1) {
|
|
251
438
|
const selector = options.originalSelector || 'selector';
|
|
252
|
-
const nthHint = ''.trimEnd();
|
|
253
|
-
const warning = ''.trimEnd();
|
|
254
439
|
let message = `Selector "${selector}" matched ${count} elements. Please use a more specific selector.`;
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
}
|
|
261
|
-
{
|
|
440
|
+
// Verbose disambiguation guidance is rate-limited per session — useful
|
|
441
|
+
// once for the agent to learn the pattern, noise on every subsequent call.
|
|
442
|
+
// After the first emit, fall back to a one-line pointer.
|
|
443
|
+
const { hasShownNthHint, markNthHintShown } = await import('../../toolHandler.js');
|
|
444
|
+
if (!hasShownNthHint()) {
|
|
262
445
|
const guidance = [
|
|
263
446
|
`1) Preferred: add a unique data-testid and select it directly (e.g., testid:submit).`,
|
|
264
447
|
`2) If you cannot change markup: append \`>> nth=<index>\` to target a specific match.`,
|
|
265
448
|
];
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
throw new Error(message);
|
|
449
|
+
message += `\n${guidance.join('\n')}`;
|
|
450
|
+
markNthHintShown();
|
|
269
451
|
}
|
|
452
|
+
else {
|
|
453
|
+
message += `\nUse a more specific selector (e.g. testid:..., or '>> nth=<index>').`;
|
|
454
|
+
}
|
|
455
|
+
// Per-call match details remain — they describe what's actually on the
|
|
456
|
+
// page, not generic advice.
|
|
457
|
+
const matchesDetails = await this.describeMatchedElements(locator, selector, count);
|
|
458
|
+
message += `\n\nMatches:\n${matchesDetails}`;
|
|
459
|
+
throw new Error(message);
|
|
270
460
|
}
|
|
271
461
|
// Handle explicit element index (1-based)
|
|
272
462
|
if (options?.elementIndex !== undefined) {
|
|
@@ -316,7 +506,7 @@ export class BrowserToolBase {
|
|
|
316
506
|
* @param preferredVisible Whether visibility preference was used
|
|
317
507
|
* @returns Formatted string or empty if only one element
|
|
318
508
|
*/
|
|
319
|
-
formatElementSelectionInfo(selector, elementIndex, totalCount, preferredVisible = true) {
|
|
509
|
+
async formatElementSelectionInfo(selector, elementIndex, totalCount, preferredVisible = true) {
|
|
320
510
|
const usesNth = selector.includes('>> nth=');
|
|
321
511
|
if (totalCount <= 1) {
|
|
322
512
|
// Even when a single element is ultimately targeted, discourage nth usage
|
|
@@ -326,10 +516,25 @@ export class BrowserToolBase {
|
|
|
326
516
|
}
|
|
327
517
|
return '';
|
|
328
518
|
}
|
|
329
|
-
const duplicateWarning = this.getDuplicateTestIdWarning(selector, totalCount).trimEnd();
|
|
330
|
-
const nthHint = this.buildNthSelectorHint(selector, totalCount).trimEnd();
|
|
331
519
|
const avoidNth = usesNth ? "💡 Tip: Avoid relying on '>> nth='; add a unique data-testid instead." : '';
|
|
332
|
-
|
|
520
|
+
// Verbose nth-selector guidance is rate-limited to one emit per session.
|
|
521
|
+
// The short ⚠ warning still surfaces every call; the multi-line hint block
|
|
522
|
+
// (duplicate-testid tip + nth-selector workaround) appears only on the
|
|
523
|
+
// first multi-match of the session — it's reference material the agent
|
|
524
|
+
// only needs once.
|
|
525
|
+
let extraHints = '';
|
|
526
|
+
const { hasShownNthHint, markNthHintShown } = await import('../../toolHandler.js');
|
|
527
|
+
if (!hasShownNthHint()) {
|
|
528
|
+
const duplicateWarning = this.getDuplicateTestIdWarning(selector, totalCount).trimEnd();
|
|
529
|
+
const nthHint = this.buildNthSelectorHint(selector, totalCount).trimEnd();
|
|
530
|
+
extraHints = [duplicateWarning, nthHint, avoidNth].filter(Boolean).join('\n');
|
|
531
|
+
if (duplicateWarning || nthHint) {
|
|
532
|
+
markNthHintShown();
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
else if (avoidNth) {
|
|
536
|
+
extraHints = avoidNth;
|
|
537
|
+
}
|
|
333
538
|
const baseMessage = preferredVisible
|
|
334
539
|
? `⚠ Found ${totalCount} elements matching "${selector}", using element ${elementIndex + 1} (first visible)`
|
|
335
540
|
: `⚠ Found ${totalCount} elements matching "${selector}", using element ${elementIndex + 1}`;
|
|
@@ -5,3 +5,15 @@ export declare function titleUrlChangeLines(page: Page, initial?: {
|
|
|
5
5
|
url?: string;
|
|
6
6
|
title?: string;
|
|
7
7
|
}): Promise<string[]>;
|
|
8
|
+
export type OverlayKind = 'dialog' | 'menu' | 'listbox' | 'tooltip' | 'popup';
|
|
9
|
+
export interface OverlayEntry {
|
|
10
|
+
descriptor: string;
|
|
11
|
+
kind: OverlayKind;
|
|
12
|
+
suggestion?: string;
|
|
13
|
+
}
|
|
14
|
+
export interface OverlaySnapshot {
|
|
15
|
+
keys: string[];
|
|
16
|
+
entries: Record<string, OverlayEntry>;
|
|
17
|
+
}
|
|
18
|
+
export declare function snapshotOpenOverlays(page: Page): Promise<OverlaySnapshot>;
|
|
19
|
+
export declare function overlayChangeLines(before: OverlaySnapshot, after: OverlaySnapshot): string[];
|