@rayven122/mcp-selenium 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1191 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync } from 'node:fs';
4
+ import { isAbsolute, relative, resolve, sep } from 'node:path';
5
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
6
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
7
+ import { ErrorCode, McpError } from '@modelcontextprotocol/sdk/types.js';
8
+ import pkg from 'selenium-webdriver';
9
+ import { z } from 'zod';
10
+
11
+ const { Builder, By, Key, until, error } = pkg;
12
+
13
+ // Create an MCP server
14
+ import { createRequire } from 'node:module';
15
+ import { Options as ChromeOptions } from 'selenium-webdriver/chrome.js';
16
+ import { Options as EdgeOptions } from 'selenium-webdriver/edge.js';
17
+ import { Options as FirefoxOptions } from 'selenium-webdriver/firefox.js';
18
+ import { Options as IeOptions } from 'selenium-webdriver/ie.js';
19
+ import { Options as SafariOptions } from 'selenium-webdriver/safari.js';
20
+
21
+ const require = createRequire(import.meta.url);
22
+ const { version } = require('../../package.json');
23
+
24
+ const server = new McpServer(
25
+ { name: 'MCP Selenium', version },
26
+ {
27
+ instructions:
28
+ "To understand the current page state, read the accessibility://current resource. It provides a structured accessibility tree that's faster and more reliable for finding element locators.",
29
+ }
30
+ );
31
+
32
+ // BiDi imports — loaded dynamically to avoid hard failures if not available
33
+ let LogInspector, Network;
34
+ try {
35
+ LogInspector = (await import('selenium-webdriver/bidi/logInspector.js')).default;
36
+ const networkModule = await import('selenium-webdriver/bidi/network.js');
37
+ Network = networkModule.Network;
38
+ } catch (_) {
39
+ // BiDi modules not available in this selenium-webdriver version
40
+ LogInspector = null;
41
+ Network = null;
42
+ }
43
+
44
+ // Server state
45
+ const state = {
46
+ drivers: new Map(),
47
+ currentSession: null,
48
+ bidi: new Map(),
49
+ };
50
+
51
+ // Helper functions
52
+ const getDriver = () => {
53
+ const driver = state.drivers.get(state.currentSession);
54
+ if (!driver) {
55
+ throw new Error('No active browser session');
56
+ }
57
+ return driver;
58
+ };
59
+
60
+ const getLocator = (by, value) => {
61
+ switch (by.toLowerCase()) {
62
+ case 'id':
63
+ return By.id(value);
64
+ case 'css':
65
+ return By.css(value);
66
+ case 'xpath':
67
+ return By.xpath(value);
68
+ case 'name':
69
+ return By.name(value);
70
+ case 'tag':
71
+ return By.tagName(value);
72
+ case 'class':
73
+ return By.className(value);
74
+ default:
75
+ throw new Error(`Unsupported locator strategy: ${by}`);
76
+ }
77
+ };
78
+
79
+ const unsafeBrowserArgumentNames = new Set([
80
+ '--allow-file-access-from-files',
81
+ '--allow-running-insecure-content',
82
+ '--disable-web-security',
83
+ '--disable-site-isolation-trials',
84
+ '--disable-features',
85
+ '--disable-extensions-except',
86
+ '--load-extension',
87
+ '--remote-allow-origins',
88
+ '--remote-debugging-address',
89
+ '--remote-debugging-port',
90
+ '--remote-debugging-pipe',
91
+ '--unsafely-treat-insecure-origin-as-secure',
92
+ '--user-data-dir',
93
+ ]);
94
+
95
+ const allowUnsafeBrowserArgs = () => process.env.MCP_SELENIUM_ALLOW_UNSAFE_BROWSER_ARGS === '1';
96
+
97
+ const browserArgName = (arg) => arg.split('=')[0].toLowerCase();
98
+
99
+ const validateBrowserArguments = (args = []) => {
100
+ if (allowUnsafeBrowserArgs()) return;
101
+
102
+ for (const arg of args) {
103
+ if (arg.includes('\0')) {
104
+ throw new Error('Browser arguments must not contain NUL bytes');
105
+ }
106
+ const name = browserArgName(arg);
107
+ if (unsafeBrowserArgumentNames.has(name)) {
108
+ throw new Error(
109
+ `Browser argument "${name}" is blocked by default. Set MCP_SELENIUM_ALLOW_UNSAFE_BROWSER_ARGS=1 to allow it in a trusted environment.`
110
+ );
111
+ }
112
+ }
113
+ };
114
+
115
+ const validateNavigationUrl = (url) => {
116
+ const schemeMatch = url.trim().match(/^([a-z][a-z0-9+.-]*):/i);
117
+ const scheme = schemeMatch?.[1]?.toLowerCase();
118
+ if (scheme === 'javascript' || scheme === 'vbscript') {
119
+ throw new Error(
120
+ `Navigation to ${scheme}: URLs is blocked; use execute_script for explicit JavaScript execution.`
121
+ );
122
+ }
123
+ };
124
+
125
+ const screenshotRoot = () => resolve(process.env.MCP_SELENIUM_SCREENSHOT_DIR || process.cwd());
126
+
127
+ const resolveScreenshotOutputPath = (outputPath) => {
128
+ if (outputPath.includes('\0')) {
129
+ throw new Error('Screenshot outputPath must not contain NUL bytes');
130
+ }
131
+ if (!outputPath.toLowerCase().endsWith('.png')) {
132
+ throw new Error('Screenshot outputPath must end with .png');
133
+ }
134
+
135
+ const root = screenshotRoot();
136
+ const resolvedPath = isAbsolute(outputPath) ? resolve(outputPath) : resolve(root, outputPath);
137
+ const relativePath = relative(root, resolvedPath);
138
+ if (relativePath === '..' || relativePath.startsWith(`..${sep}`) || isAbsolute(relativePath)) {
139
+ throw new Error(`Screenshot outputPath must be inside ${root}`);
140
+ }
141
+ return resolvedPath;
142
+ };
143
+
144
+ // BiDi helpers
145
+ const newBidiState = () => ({
146
+ available: false,
147
+ consoleLogs: [],
148
+ pageErrors: [],
149
+ networkLogs: [],
150
+ });
151
+
152
+ async function setupBidi(driver, sessionId) {
153
+ const bidi = newBidiState();
154
+
155
+ const logInspector = await LogInspector(driver);
156
+ await logInspector.onConsoleEntry((entry) => {
157
+ try {
158
+ bidi.consoleLogs.push({
159
+ level: entry.level,
160
+ text: entry.text,
161
+ timestamp: entry.timestamp,
162
+ type: entry.type,
163
+ method: entry.method,
164
+ args: entry.args,
165
+ });
166
+ } catch (_) {
167
+ /* ignore malformed entry */
168
+ }
169
+ });
170
+ await logInspector.onJavascriptLog((entry) => {
171
+ try {
172
+ bidi.pageErrors.push({
173
+ level: entry.level,
174
+ text: entry.text,
175
+ timestamp: entry.timestamp,
176
+ type: entry.type,
177
+ stackTrace: entry.stackTrace,
178
+ });
179
+ } catch (_) {
180
+ /* ignore malformed entry */
181
+ }
182
+ });
183
+
184
+ const network = await Network(driver);
185
+ await network.responseCompleted((event) => {
186
+ try {
187
+ bidi.networkLogs.push({
188
+ type: 'response',
189
+ url: event.request?.url,
190
+ status: event.response?.status,
191
+ method: event.request?.method,
192
+ mimeType: event.response?.mimeType,
193
+ timestamp: Date.now(),
194
+ });
195
+ } catch (_) {
196
+ /* ignore malformed event */
197
+ }
198
+ });
199
+ await network.fetchError((event) => {
200
+ try {
201
+ bidi.networkLogs.push({
202
+ type: 'error',
203
+ url: event.request?.url,
204
+ method: event.request?.method,
205
+ errorText: event.errorText,
206
+ timestamp: Date.now(),
207
+ });
208
+ } catch (_) {
209
+ /* ignore malformed event */
210
+ }
211
+ });
212
+
213
+ bidi.available = true;
214
+ state.bidi.set(sessionId, bidi);
215
+ }
216
+
217
+ // Browser-side script loaded from file and executed via WebDriver's executeScript.
218
+ const accessibilitySnapshotScript = readFileSync(
219
+ new URL('./accessibility-snapshot.js', import.meta.url),
220
+ 'utf-8'
221
+ );
222
+
223
+ // Common schemas
224
+ const supportedBrowsers =
225
+ process.platform === 'win32'
226
+ ? ['chrome', 'firefox', 'edge', 'safari', 'edge-ie']
227
+ : ['chrome', 'firefox', 'edge', 'safari'];
228
+
229
+ const browserOptionsSchema = z
230
+ .object({
231
+ headless: z.boolean().optional().describe('Run browser in headless mode'),
232
+ arguments: z.array(z.string()).optional().describe('Additional browser arguments'),
233
+ edgePath: z
234
+ .string()
235
+ .optional()
236
+ .describe(
237
+ 'Path to msedge.exe (edge-ie only; defaults to the standard install path). Windows only.'
238
+ ),
239
+ ieIgnoreZoomSetting: z
240
+ .boolean()
241
+ .optional()
242
+ .describe('Ignore IE protected-mode zone mismatch (edge-ie only)'),
243
+ })
244
+ .optional();
245
+
246
+ const locatorSchema = {
247
+ by: z
248
+ .enum(['id', 'css', 'xpath', 'name', 'tag', 'class'])
249
+ .describe('Locator strategy to find element'),
250
+ value: z.string().describe('Value for the locator strategy'),
251
+ timeout: z.number().optional().describe('Maximum time to wait for element in milliseconds'),
252
+ };
253
+
254
+ // Browser Management Tools
255
+ server.registerTool(
256
+ 'start_browser',
257
+ {
258
+ description: 'launches browser',
259
+ inputSchema: {
260
+ browser: z
261
+ .enum(supportedBrowsers)
262
+ .describe(
263
+ "Browser to launch. On Windows, 'edge-ie' drives Microsoft Edge in Internet Explorer (IE) mode and requires IEDriverServer on PATH."
264
+ ),
265
+ options: browserOptionsSchema,
266
+ },
267
+ },
268
+ async ({ browser, options = {} }) => {
269
+ try {
270
+ validateBrowserArguments(options.arguments);
271
+
272
+ let builder = new Builder();
273
+ let driver;
274
+ const warnings = [];
275
+
276
+ // Enable BiDi websocket if the modules are available.
277
+ // IE mode does not support WebDriver BiDi, so skip it for edge-ie.
278
+ if (LogInspector && Network && browser !== 'edge-ie') {
279
+ // 'ignore' prevents BiDi from auto-dismissing alert/confirm/prompt dialogs,
280
+ // allowing the alert tool's accept, dismiss, and get_text actions to work as expected.
281
+ builder = builder.withCapabilities({
282
+ webSocketUrl: true,
283
+ unhandledPromptBehavior: 'ignore',
284
+ });
285
+ }
286
+
287
+ switch (browser) {
288
+ case 'chrome': {
289
+ const chromeOptions = new ChromeOptions();
290
+ if (options.headless) {
291
+ chromeOptions.addArguments('--headless=new');
292
+ }
293
+ if (options.arguments) {
294
+ options.arguments.forEach((arg) => {
295
+ chromeOptions.addArguments(arg);
296
+ });
297
+ }
298
+ driver = await builder
299
+ .forBrowser('chrome')
300
+ .setChromeOptions(chromeOptions)
301
+ .build();
302
+ break;
303
+ }
304
+ case 'edge': {
305
+ const edgeOptions = new EdgeOptions();
306
+ if (options.headless) {
307
+ edgeOptions.addArguments('--headless=new');
308
+ }
309
+ if (options.arguments) {
310
+ options.arguments.forEach((arg) => {
311
+ edgeOptions.addArguments(arg);
312
+ });
313
+ }
314
+ driver = await builder.forBrowser('edge').setEdgeOptions(edgeOptions).build();
315
+ break;
316
+ }
317
+ case 'firefox': {
318
+ const firefoxOptions = new FirefoxOptions();
319
+ if (options.headless) {
320
+ firefoxOptions.addArguments('--headless');
321
+ }
322
+ if (options.arguments) {
323
+ options.arguments.forEach((arg) => {
324
+ firefoxOptions.addArguments(arg);
325
+ });
326
+ }
327
+ driver = await builder
328
+ .forBrowser('firefox')
329
+ .setFirefoxOptions(firefoxOptions)
330
+ .build();
331
+ break;
332
+ }
333
+ case 'safari': {
334
+ const safariOptions = new SafariOptions();
335
+ if (options.headless) {
336
+ warnings.push(
337
+ 'Safari does not support headless mode — launching with visible window.'
338
+ );
339
+ }
340
+ if (options.arguments?.length) {
341
+ warnings.push('Safari does not support custom arguments — ignoring.');
342
+ }
343
+ driver = await builder
344
+ .forBrowser('safari')
345
+ .setSafariOptions(safariOptions)
346
+ .build();
347
+ break;
348
+ }
349
+ case 'edge-ie': {
350
+ // Microsoft Edge in Internet Explorer (IE) mode.
351
+ // Windows only: driven by IEDriverServer (must be on PATH), which attaches
352
+ // to Edge (Chromium) and renders pages with the legacy IE engine.
353
+ if (process.platform !== 'win32') {
354
+ throw new Error('Edge IE mode is only supported on Windows.');
355
+ }
356
+ const ieOptions = new IeOptions();
357
+ ieOptions.setEdgeChromium(true);
358
+ ieOptions.setEdgePath(
359
+ options.edgePath ||
360
+ 'C:\\Program Files (x86)\\Microsoft\\Edge\\Application\\msedge.exe'
361
+ );
362
+ // IE mode needs the CreateProcess API to launch reliably under Edge.
363
+ ieOptions.forceCreateProcessApi(true);
364
+ if (options.ieIgnoreZoomSetting) {
365
+ ieOptions.ignoreZoomSetting(true);
366
+ }
367
+ if (options.headless) {
368
+ warnings.push(
369
+ 'Edge IE mode does not support headless — launching with a visible window.'
370
+ );
371
+ }
372
+ if (options.arguments?.length) {
373
+ options.arguments.forEach((arg) => {
374
+ ieOptions.addArguments(arg);
375
+ });
376
+ }
377
+ driver = await builder
378
+ .forBrowser('internet explorer')
379
+ .setIeOptions(ieOptions)
380
+ .build();
381
+ break;
382
+ }
383
+ default: {
384
+ throw new Error(`Unsupported browser: ${browser}`);
385
+ }
386
+ }
387
+ const sessionId = `${browser}_${Date.now()}`;
388
+ state.drivers.set(sessionId, driver);
389
+ state.currentSession = sessionId;
390
+
391
+ // Attempt to enable BiDi for real-time log capture
392
+ if (LogInspector && Network) {
393
+ try {
394
+ await setupBidi(driver, sessionId);
395
+ } catch (_) {
396
+ // BiDi not supported by this browser/driver — continue without it
397
+ }
398
+ }
399
+
400
+ let message = `Browser started with session_id: ${sessionId}`;
401
+ if (state.bidi.get(sessionId)?.available) {
402
+ message +=
403
+ ' (BiDi enabled: console logs, JS errors, and network activity are being captured)';
404
+ }
405
+ if (warnings.length > 0) {
406
+ message += `\nWarnings: ${warnings.join(' ')}`;
407
+ }
408
+
409
+ return {
410
+ content: [{ type: 'text', text: message }],
411
+ };
412
+ } catch (e) {
413
+ return {
414
+ content: [{ type: 'text', text: `Error starting browser: ${e.message}` }],
415
+ isError: true,
416
+ };
417
+ }
418
+ }
419
+ );
420
+
421
+ server.registerTool(
422
+ 'navigate',
423
+ {
424
+ description: 'navigates to a URL',
425
+ inputSchema: {
426
+ url: z.string().describe('URL to navigate to'),
427
+ },
428
+ },
429
+ async ({ url }) => {
430
+ try {
431
+ validateNavigationUrl(url);
432
+ const driver = getDriver();
433
+ await driver.get(url);
434
+ return {
435
+ content: [{ type: 'text', text: `Navigated to ${url}` }],
436
+ };
437
+ } catch (e) {
438
+ return {
439
+ content: [{ type: 'text', text: `Error navigating: ${e.message}` }],
440
+ isError: true,
441
+ };
442
+ }
443
+ }
444
+ );
445
+
446
+ // Element Interaction Tools
447
+ server.registerTool(
448
+ 'interact',
449
+ {
450
+ description: 'performs a mouse action on an element',
451
+ inputSchema: {
452
+ action: z
453
+ .enum(['click', 'doubleclick', 'rightclick', 'hover'])
454
+ .describe('Mouse action to perform'),
455
+ ...locatorSchema,
456
+ },
457
+ },
458
+ async ({ action, by, value, timeout = 10000 }) => {
459
+ try {
460
+ const driver = getDriver();
461
+ const locator = getLocator(by, value);
462
+ const element = await driver.wait(until.elementLocated(locator), timeout);
463
+
464
+ switch (action) {
465
+ case 'click':
466
+ await element.click();
467
+ return { content: [{ type: 'text', text: 'Element clicked' }] };
468
+ case 'doubleclick': {
469
+ const dblActions = driver.actions({ bridge: true });
470
+ await dblActions.doubleClick(element).perform();
471
+ return { content: [{ type: 'text', text: 'Double click performed' }] };
472
+ }
473
+ case 'rightclick': {
474
+ const ctxActions = driver.actions({ bridge: true });
475
+ await ctxActions.contextClick(element).perform();
476
+ return { content: [{ type: 'text', text: 'Right click performed' }] };
477
+ }
478
+ case 'hover': {
479
+ const hoverActions = driver.actions({ bridge: true });
480
+ await hoverActions.move({ origin: element }).perform();
481
+ return { content: [{ type: 'text', text: 'Hovered over element' }] };
482
+ }
483
+ default:
484
+ return {
485
+ content: [{ type: 'text', text: `Unknown action: ${action}` }],
486
+ isError: true,
487
+ };
488
+ }
489
+ } catch (e) {
490
+ return {
491
+ content: [{ type: 'text', text: `Error performing ${action}: ${e.message}` }],
492
+ isError: true,
493
+ };
494
+ }
495
+ }
496
+ );
497
+
498
+ server.registerTool(
499
+ 'send_keys',
500
+ {
501
+ description: 'sends keys to an element, aka typing. Clears the field first.',
502
+ inputSchema: {
503
+ ...locatorSchema,
504
+ text: z.string().describe('Text to enter into the element'),
505
+ },
506
+ },
507
+ async ({ by, value, text, timeout = 10000 }) => {
508
+ try {
509
+ const driver = getDriver();
510
+ const locator = getLocator(by, value);
511
+ const element = await driver.wait(until.elementLocated(locator), timeout);
512
+ await element.clear();
513
+ await element.sendKeys(text);
514
+ return {
515
+ content: [{ type: 'text', text: `Text "${text}" entered into element` }],
516
+ };
517
+ } catch (e) {
518
+ return {
519
+ content: [{ type: 'text', text: `Error entering text: ${e.message}` }],
520
+ isError: true,
521
+ };
522
+ }
523
+ }
524
+ );
525
+
526
+ server.registerTool(
527
+ 'get_element_text',
528
+ {
529
+ description: 'gets the text content of an element',
530
+ inputSchema: {
531
+ ...locatorSchema,
532
+ },
533
+ },
534
+ async ({ by, value, timeout = 10000 }) => {
535
+ try {
536
+ const driver = getDriver();
537
+ const locator = getLocator(by, value);
538
+ const element = await driver.wait(until.elementLocated(locator), timeout);
539
+ const text = await element.getText();
540
+ return {
541
+ content: [{ type: 'text', text }],
542
+ };
543
+ } catch (e) {
544
+ return {
545
+ content: [{ type: 'text', text: `Error getting element text: ${e.message}` }],
546
+ isError: true,
547
+ };
548
+ }
549
+ }
550
+ );
551
+
552
+ server.registerTool(
553
+ 'press_key',
554
+ {
555
+ description: 'simulates pressing a keyboard key',
556
+ inputSchema: {
557
+ key: z.string().describe("Key to press (e.g., 'Enter', 'Tab', 'a', etc.)"),
558
+ },
559
+ },
560
+ async ({ key }) => {
561
+ try {
562
+ const driver = getDriver();
563
+ const resolvedKey =
564
+ key.length === 1 ? key : (Key[key.toUpperCase().replace(/ /g, '_')] ?? null);
565
+ if (resolvedKey === null) {
566
+ return {
567
+ content: [
568
+ {
569
+ type: 'text',
570
+ text: `Error pressing key: Unknown key name '${key}'. Use a single character or a named key like 'Enter', 'Tab', 'Escape', etc.`,
571
+ },
572
+ ],
573
+ isError: true,
574
+ };
575
+ }
576
+ const actions = driver.actions({ bridge: true });
577
+ await actions.keyDown(resolvedKey).keyUp(resolvedKey).perform();
578
+ return {
579
+ content: [{ type: 'text', text: `Key '${key}' pressed` }],
580
+ };
581
+ } catch (e) {
582
+ return {
583
+ content: [{ type: 'text', text: `Error pressing key: ${e.message}` }],
584
+ isError: true,
585
+ };
586
+ }
587
+ }
588
+ );
589
+
590
+ server.registerTool(
591
+ 'upload_file',
592
+ {
593
+ description: 'uploads a file using a file input element',
594
+ inputSchema: {
595
+ ...locatorSchema,
596
+ filePath: z.string().describe('Absolute path to the file to upload'),
597
+ },
598
+ },
599
+ async ({ by, value, filePath, timeout = 10000 }) => {
600
+ try {
601
+ const driver = getDriver();
602
+ const locator = getLocator(by, value);
603
+ const element = await driver.wait(until.elementLocated(locator), timeout);
604
+ await element.sendKeys(filePath);
605
+ return {
606
+ content: [{ type: 'text', text: 'File upload initiated' }],
607
+ };
608
+ } catch (e) {
609
+ return {
610
+ content: [{ type: 'text', text: `Error uploading file: ${e.message}` }],
611
+ isError: true,
612
+ };
613
+ }
614
+ }
615
+ );
616
+
617
+ server.registerTool(
618
+ 'take_screenshot',
619
+ {
620
+ description:
621
+ 'captures a screenshot of the current page. Prefer using the accessibility://current resource for understanding page content. Use get_element_text, get_element_attribute, or execute_script to verify element state. Only use screenshots when visual layout or styling needs to be verified.',
622
+ inputSchema: {
623
+ outputPath: z
624
+ .string()
625
+ .optional()
626
+ .describe(
627
+ 'Optional path where to save the screenshot. If not provided, returns an image/png content block.'
628
+ ),
629
+ },
630
+ },
631
+ async ({ outputPath }) => {
632
+ try {
633
+ const driver = getDriver();
634
+ const screenshot = await driver.takeScreenshot();
635
+ if (outputPath) {
636
+ const resolvedOutputPath = resolveScreenshotOutputPath(outputPath);
637
+ const fs = await import('node:fs');
638
+ await fs.promises.writeFile(resolvedOutputPath, screenshot, 'base64');
639
+ return {
640
+ content: [{ type: 'text', text: `Screenshot saved to ${resolvedOutputPath}` }],
641
+ };
642
+ } else {
643
+ return {
644
+ content: [{ type: 'image', data: screenshot, mimeType: 'image/png' }],
645
+ };
646
+ }
647
+ } catch (e) {
648
+ return {
649
+ content: [{ type: 'text', text: `Error taking screenshot: ${e.message}` }],
650
+ isError: true,
651
+ };
652
+ }
653
+ }
654
+ );
655
+
656
+ server.registerTool(
657
+ 'close_session',
658
+ {
659
+ description: 'closes the current browser session',
660
+ inputSchema: {},
661
+ },
662
+ async () => {
663
+ try {
664
+ const driver = getDriver();
665
+ const sessionId = state.currentSession;
666
+ try {
667
+ await driver.quit();
668
+ } finally {
669
+ state.drivers.delete(sessionId);
670
+ state.bidi.delete(sessionId);
671
+ state.currentSession = null;
672
+ }
673
+ return {
674
+ content: [{ type: 'text', text: `Browser session ${sessionId} closed` }],
675
+ };
676
+ } catch (e) {
677
+ return {
678
+ content: [{ type: 'text', text: `Error closing session: ${e.message}` }],
679
+ isError: true,
680
+ };
681
+ }
682
+ }
683
+ );
684
+
685
+ // Element Utility Tools
686
+ server.registerTool(
687
+ 'get_element_attribute',
688
+ {
689
+ description:
690
+ 'gets the value of an attribute on an element. Use this to verify element state. Prefer this over screenshots for validation.',
691
+ inputSchema: {
692
+ ...locatorSchema,
693
+ attribute: z
694
+ .string()
695
+ .describe("Name of the attribute to get (e.g., 'href', 'value', 'class')"),
696
+ },
697
+ },
698
+ async ({ by, value, attribute, timeout = 10000 }) => {
699
+ try {
700
+ const driver = getDriver();
701
+ const locator = getLocator(by, value);
702
+ const element = await driver.wait(until.elementLocated(locator), timeout);
703
+ const attrValue = await element.getAttribute(attribute);
704
+ return {
705
+ content: [{ type: 'text', text: attrValue !== null ? attrValue : '' }],
706
+ };
707
+ } catch (e) {
708
+ return {
709
+ content: [{ type: 'text', text: `Error getting attribute: ${e.message}` }],
710
+ isError: true,
711
+ };
712
+ }
713
+ }
714
+ );
715
+
716
+ server.registerTool(
717
+ 'execute_script',
718
+ {
719
+ description:
720
+ 'executes JavaScript in the browser and returns the result. Use for advanced interactions not covered by other tools (e.g., drag and drop, scrolling, reading computed styles, manipulating the DOM directly). Also useful for batch-reading multiple element values/states in a single call instead of multiple get_element_attribute calls.',
721
+ inputSchema: {
722
+ script: z.string().describe('JavaScript code to execute in the browser'),
723
+ args: z
724
+ .array(z.any())
725
+ .optional()
726
+ .describe(
727
+ 'Optional arguments to pass to the script (accessible via arguments[0], arguments[1], etc.)'
728
+ ),
729
+ },
730
+ },
731
+ async ({ script, args = [] }) => {
732
+ try {
733
+ const driver = getDriver();
734
+ const result = await driver.executeScript(script, ...args);
735
+ const text =
736
+ result === undefined || result === null
737
+ ? 'Script executed (no return value)'
738
+ : typeof result === 'object'
739
+ ? JSON.stringify(result, null, 2)
740
+ : String(result);
741
+ return {
742
+ content: [{ type: 'text', text }],
743
+ };
744
+ } catch (e) {
745
+ return {
746
+ content: [{ type: 'text', text: `Error executing script: ${e.message}` }],
747
+ isError: true,
748
+ };
749
+ }
750
+ }
751
+ );
752
+
753
+ // Window/Tab Management
754
+ server.registerTool(
755
+ 'window',
756
+ {
757
+ description: 'manages browser windows and tabs',
758
+ inputSchema: {
759
+ action: z
760
+ .enum(['list', 'switch', 'switch_latest', 'close'])
761
+ .describe('Window action to perform'),
762
+ handle: z.string().optional().describe('Window handle (required for switch)'),
763
+ },
764
+ },
765
+ async ({ action, handle }) => {
766
+ try {
767
+ const driver = getDriver();
768
+ switch (action) {
769
+ case 'list': {
770
+ const handles = await driver.getAllWindowHandles();
771
+ const current = await driver.getWindowHandle();
772
+ return {
773
+ content: [
774
+ {
775
+ type: 'text',
776
+ text: JSON.stringify({ current, all: handles }, null, 2),
777
+ },
778
+ ],
779
+ };
780
+ }
781
+ case 'switch': {
782
+ if (!handle) throw new Error('handle is required for switch action');
783
+ await driver.switchTo().window(handle);
784
+ return { content: [{ type: 'text', text: `Switched to window: ${handle}` }] };
785
+ }
786
+ case 'switch_latest': {
787
+ const handles = await driver.getAllWindowHandles();
788
+ if (handles.length === 0) throw new Error('No windows available');
789
+ const latest = handles[handles.length - 1];
790
+ await driver.switchTo().window(latest);
791
+ return {
792
+ content: [{ type: 'text', text: `Switched to latest window: ${latest}` }],
793
+ };
794
+ }
795
+ case 'close': {
796
+ await driver.close();
797
+ let handles = [];
798
+ try {
799
+ handles = await driver.getAllWindowHandles();
800
+ } catch (_) {
801
+ /* session gone */
802
+ }
803
+ if (handles.length > 0) {
804
+ await driver.switchTo().window(handles[0]);
805
+ return {
806
+ content: [
807
+ { type: 'text', text: `Window closed. Switched to: ${handles[0]}` },
808
+ ],
809
+ };
810
+ }
811
+ const sessionId = state.currentSession;
812
+ try {
813
+ await driver.quit();
814
+ } catch (_) {
815
+ /* ignore */
816
+ }
817
+ state.drivers.delete(sessionId);
818
+ state.bidi.delete(sessionId);
819
+ state.currentSession = null;
820
+ return {
821
+ content: [{ type: 'text', text: 'Last window closed. Session ended.' }],
822
+ };
823
+ }
824
+ default:
825
+ return {
826
+ content: [{ type: 'text', text: `Unknown action: ${action}` }],
827
+ isError: true,
828
+ };
829
+ }
830
+ } catch (e) {
831
+ return {
832
+ content: [{ type: 'text', text: `Error in window ${action}: ${e.message}` }],
833
+ isError: true,
834
+ };
835
+ }
836
+ }
837
+ );
838
+
839
+ // Frame Management
840
+ server.registerTool(
841
+ 'frame',
842
+ {
843
+ description: 'switches focus to a frame or back to the main page',
844
+ inputSchema: {
845
+ action: z.enum(['switch', 'default']).describe('Frame action to perform'),
846
+ by: z
847
+ .enum(['id', 'css', 'xpath', 'name', 'tag', 'class'])
848
+ .optional()
849
+ .describe('Locator strategy for frame element'),
850
+ value: z.string().optional().describe('Value for the locator strategy'),
851
+ index: z.number().optional().describe('Frame index (0-based)'),
852
+ timeout: z.number().optional().describe('Max wait in ms'),
853
+ },
854
+ },
855
+ async ({ action, by, value, index, timeout = 10000 }) => {
856
+ try {
857
+ const driver = getDriver();
858
+ if (action === 'default') {
859
+ await driver.switchTo().defaultContent();
860
+ return { content: [{ type: 'text', text: 'Switched to default content' }] };
861
+ }
862
+ // action === 'switch'
863
+ if (index !== undefined) {
864
+ await driver.switchTo().frame(index);
865
+ } else if (by && value) {
866
+ const locator = getLocator(by, value);
867
+ const element = await driver.wait(until.elementLocated(locator), timeout);
868
+ await driver.switchTo().frame(element);
869
+ } else {
870
+ throw new Error(
871
+ 'Provide either by/value to locate frame, or index to switch by position'
872
+ );
873
+ }
874
+ return { content: [{ type: 'text', text: 'Switched to frame' }] };
875
+ } catch (e) {
876
+ return {
877
+ content: [{ type: 'text', text: `Error in frame ${action}: ${e.message}` }],
878
+ isError: true,
879
+ };
880
+ }
881
+ }
882
+ );
883
+
884
+ // Alert/Dialog Handling
885
+ server.registerTool(
886
+ 'alert',
887
+ {
888
+ description: 'handles a browser alert, confirm, or prompt dialog',
889
+ inputSchema: {
890
+ action: z
891
+ .enum(['accept', 'dismiss', 'get_text', 'send_text'])
892
+ .describe('Action to perform on the alert'),
893
+ text: z.string().optional().describe('Text to send (required for send_text)'),
894
+ timeout: z.number().optional().describe('Max wait in ms'),
895
+ },
896
+ },
897
+ async ({ action, text, timeout = 5000 }) => {
898
+ try {
899
+ const driver = getDriver();
900
+ await driver.wait(until.alertIsPresent(), timeout);
901
+ const alertObj = await driver.switchTo().alert();
902
+ switch (action) {
903
+ case 'accept':
904
+ await alertObj.accept();
905
+ return { content: [{ type: 'text', text: 'Alert accepted' }] };
906
+ case 'dismiss':
907
+ await alertObj.dismiss();
908
+ return { content: [{ type: 'text', text: 'Alert dismissed' }] };
909
+ case 'get_text': {
910
+ const alertText = await alertObj.getText();
911
+ return { content: [{ type: 'text', text: alertText }] };
912
+ }
913
+ case 'send_text': {
914
+ if (text === undefined)
915
+ throw new Error('text is required for send_text action');
916
+ await alertObj.sendKeys(text);
917
+ await alertObj.accept();
918
+ return {
919
+ content: [
920
+ { type: 'text', text: `Text "${text}" sent to prompt and accepted` },
921
+ ],
922
+ };
923
+ }
924
+ default:
925
+ return {
926
+ content: [{ type: 'text', text: `Unknown action: ${action}` }],
927
+ isError: true,
928
+ };
929
+ }
930
+ } catch (e) {
931
+ return {
932
+ content: [{ type: 'text', text: `Error in alert ${action}: ${e.message}` }],
933
+ isError: true,
934
+ };
935
+ }
936
+ }
937
+ );
938
+
939
+ // Cookie Management Tools
940
+ server.registerTool(
941
+ 'add_cookie',
942
+ {
943
+ description:
944
+ "adds a cookie to the current browser session. The browser must be on a page from the cookie's domain before setting it.",
945
+ inputSchema: {
946
+ name: z.string().describe('Name of the cookie'),
947
+ value: z.string().describe('Value of the cookie'),
948
+ domain: z.string().optional().describe('Domain the cookie is visible to'),
949
+ path: z.string().optional().describe('Path the cookie is visible to'),
950
+ secure: z.boolean().optional().describe('Whether the cookie is a secure cookie'),
951
+ httpOnly: z.boolean().optional().describe('Whether the cookie is HTTP only'),
952
+ expiry: z
953
+ .number()
954
+ .optional()
955
+ .describe('Expiry date of the cookie as a Unix timestamp (seconds since epoch)'),
956
+ },
957
+ },
958
+ async ({ name, value, domain, path, secure, httpOnly, expiry }) => {
959
+ try {
960
+ const driver = getDriver();
961
+ const cookie = { name, value };
962
+ if (domain !== undefined) cookie.domain = domain;
963
+ if (path !== undefined) cookie.path = path;
964
+ if (secure !== undefined) cookie.secure = secure;
965
+ if (httpOnly !== undefined) cookie.httpOnly = httpOnly;
966
+ if (expiry !== undefined) cookie.expiry = expiry;
967
+ await driver.manage().addCookie(cookie);
968
+ return {
969
+ content: [{ type: 'text', text: `Cookie "${name}" added` }],
970
+ };
971
+ } catch (e) {
972
+ return {
973
+ content: [{ type: 'text', text: `Error adding cookie: ${e.message}` }],
974
+ isError: true,
975
+ };
976
+ }
977
+ }
978
+ );
979
+
980
+ server.registerTool(
981
+ 'get_cookies',
982
+ {
983
+ description:
984
+ 'retrieves cookies from the current browser session. Returns all cookies or a specific cookie by name.',
985
+ inputSchema: {
986
+ name: z
987
+ .string()
988
+ .optional()
989
+ .describe(
990
+ 'Name of a specific cookie to retrieve. If omitted, all cookies are returned.'
991
+ ),
992
+ },
993
+ },
994
+ async ({ name }) => {
995
+ try {
996
+ const driver = getDriver();
997
+ if (name) {
998
+ try {
999
+ const cookie = await driver.manage().getCookie(name);
1000
+ if (!cookie) {
1001
+ return {
1002
+ content: [{ type: 'text', text: `Cookie "${name}" not found` }],
1003
+ isError: true,
1004
+ };
1005
+ }
1006
+ return {
1007
+ content: [{ type: 'text', text: JSON.stringify(cookie, null, 2) }],
1008
+ };
1009
+ } catch (cookieError) {
1010
+ if (cookieError instanceof error.NoSuchCookieError) {
1011
+ return {
1012
+ content: [{ type: 'text', text: `Cookie "${name}" not found` }],
1013
+ isError: true,
1014
+ };
1015
+ }
1016
+ throw cookieError;
1017
+ }
1018
+ } else {
1019
+ const cookies = await driver.manage().getCookies();
1020
+ return {
1021
+ content: [{ type: 'text', text: JSON.stringify(cookies, null, 2) }],
1022
+ };
1023
+ }
1024
+ } catch (e) {
1025
+ return {
1026
+ content: [{ type: 'text', text: `Error getting cookies: ${e.message}` }],
1027
+ isError: true,
1028
+ };
1029
+ }
1030
+ }
1031
+ );
1032
+
1033
+ server.registerTool(
1034
+ 'delete_cookie',
1035
+ {
1036
+ description:
1037
+ 'deletes cookies from the current browser session. Can delete a specific cookie by name or all cookies.',
1038
+ inputSchema: {
1039
+ name: z
1040
+ .string()
1041
+ .optional()
1042
+ .describe('Name of the cookie to delete. If omitted, all cookies are deleted.'),
1043
+ },
1044
+ },
1045
+ async ({ name }) => {
1046
+ try {
1047
+ const driver = getDriver();
1048
+ if (name) {
1049
+ await driver.manage().deleteCookie(name);
1050
+ return {
1051
+ content: [{ type: 'text', text: `Cookie "${name}" deleted` }],
1052
+ };
1053
+ } else {
1054
+ await driver.manage().deleteAllCookies();
1055
+ return {
1056
+ content: [{ type: 'text', text: 'All cookies deleted' }],
1057
+ };
1058
+ }
1059
+ } catch (e) {
1060
+ return {
1061
+ content: [{ type: 'text', text: `Error deleting cookie: ${e.message}` }],
1062
+ isError: true,
1063
+ };
1064
+ }
1065
+ }
1066
+ );
1067
+
1068
+ // BiDi Diagnostic Tools
1069
+ const diagnosticTypes = {
1070
+ console: { logKey: 'consoleLogs', emptyMessage: 'No console logs captured' },
1071
+ errors: { logKey: 'pageErrors', emptyMessage: 'No page errors captured' },
1072
+ network: { logKey: 'networkLogs', emptyMessage: 'No network activity captured' },
1073
+ };
1074
+
1075
+ server.registerTool(
1076
+ 'diagnostics',
1077
+ {
1078
+ description:
1079
+ 'retrieves browser diagnostics (console logs, JS errors, or network activity) captured via WebDriver BiDi',
1080
+ inputSchema: {
1081
+ type: z
1082
+ .enum(['console', 'errors', 'network'])
1083
+ .describe('Type of diagnostic data to retrieve'),
1084
+ clear: z.boolean().optional().describe('Clear after returning (default: false)'),
1085
+ },
1086
+ },
1087
+ async ({ type, clear = false }) => {
1088
+ try {
1089
+ getDriver();
1090
+ const bidi = state.bidi.get(state.currentSession);
1091
+ if (!bidi?.available) {
1092
+ return {
1093
+ content: [
1094
+ {
1095
+ type: 'text',
1096
+ text: 'Diagnostics not available (BiDi not supported by this browser/driver)',
1097
+ },
1098
+ ],
1099
+ };
1100
+ }
1101
+ const { logKey, emptyMessage } = diagnosticTypes[type];
1102
+ const logs = bidi[logKey];
1103
+ const result = logs.length === 0 ? emptyMessage : JSON.stringify(logs, null, 2);
1104
+ if (clear) bidi[logKey] = [];
1105
+ return { content: [{ type: 'text', text: result }] };
1106
+ } catch (e) {
1107
+ return {
1108
+ content: [{ type: 'text', text: `Error getting diagnostics: ${e.message}` }],
1109
+ isError: true,
1110
+ };
1111
+ }
1112
+ }
1113
+ );
1114
+
1115
+ // Resources
1116
+ server.registerResource(
1117
+ 'browser-status',
1118
+ 'browser-status://current',
1119
+ {
1120
+ description: 'Current browser session status',
1121
+ mimeType: 'text/plain',
1122
+ },
1123
+ async (uri) => ({
1124
+ contents: [
1125
+ {
1126
+ uri: uri.href,
1127
+ mimeType: 'text/plain',
1128
+ text: state.currentSession
1129
+ ? `Active browser session: ${state.currentSession}`
1130
+ : 'No active browser session',
1131
+ },
1132
+ ],
1133
+ })
1134
+ );
1135
+
1136
+ server.registerResource(
1137
+ 'accessibility-snapshot',
1138
+ 'accessibility://current',
1139
+ {
1140
+ description:
1141
+ 'Accessibility tree snapshot of the current page. A compact, structured representation of interactive elements and text content, much smaller than full HTML. Useful for understanding page layout and finding elements to interact with.',
1142
+ mimeType: 'application/json',
1143
+ },
1144
+ async (uri) => {
1145
+ try {
1146
+ const driver = state.drivers.get(state.currentSession);
1147
+ //-32002 is not in the SDK but is noted in the MCP specification:
1148
+ // https://modelcontextprotocol.io/specification/2025-11-25/server/resources#error-handling
1149
+ if (!driver)
1150
+ throw new McpError(-32002, 'No active browser session. Start a browser first.');
1151
+ const tree = (await driver.executeScript(accessibilitySnapshotScript)) || {};
1152
+ return {
1153
+ contents: [
1154
+ {
1155
+ uri: uri.href,
1156
+ mimeType: 'application/json',
1157
+ text: JSON.stringify(tree, null, 2),
1158
+ },
1159
+ ],
1160
+ };
1161
+ } catch (e) {
1162
+ if (e instanceof McpError) throw e;
1163
+ throw new McpError(
1164
+ ErrorCode.InternalError,
1165
+ `Failed to capture accessibility snapshot: ${e.message}`
1166
+ );
1167
+ }
1168
+ }
1169
+ );
1170
+
1171
+ // Cleanup handler
1172
+ async function cleanup() {
1173
+ for (const [sessionId, driver] of state.drivers) {
1174
+ try {
1175
+ await driver.quit();
1176
+ } catch (e) {
1177
+ console.error(`Error closing browser session ${sessionId}:`, e);
1178
+ }
1179
+ }
1180
+ state.drivers.clear();
1181
+ state.bidi.clear();
1182
+ state.currentSession = null;
1183
+ process.exit(0);
1184
+ }
1185
+
1186
+ process.on('SIGTERM', cleanup);
1187
+ process.on('SIGINT', cleanup);
1188
+
1189
+ // Start the server
1190
+ const transport = new StdioServerTransport();
1191
+ await server.connect(transport);