real-browser-mcp-server 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,3 @@
1
- /**
2
- * Brave Real Browser MCP Server - Tool Handlers
3
- *
4
- * Implementation of all 23 browser automation tools (optimized from 28)
5
- *
6
- * Environment Variables:
7
- * HEADLESS=true - Run browser in headless mode
8
- * HEADLESS=false - Run browser in GUI mode (visible)
9
- */
10
-
11
1
  const path = require('path');
12
2
  const fs = require('fs');
13
3
  const crypto = require('crypto');
@@ -68,13 +58,25 @@ function notifyProgress(toolName, status, message, data = {}) {
68
58
  function getHeadlessFromEnv() {
69
59
  const envHeadless = process.env.HEADLESS;
70
60
 
71
- if (envHeadless === undefined || envHeadless === null || envHeadless === '') {
72
- return false; // Default: GUI mode
61
+ if (envHeadless !== undefined && envHeadless !== null && envHeadless !== '') {
62
+ const value = envHeadless.toLowerCase().trim();
63
+ return value === 'true' || value === '1' || value === 'yes';
64
+ }
65
+
66
+ // Auto-detect CI environments
67
+ if (process.env.CI || process.env.GITHUB_ACTIONS || process.env.TRAVIS || process.env.CIRCLECI) {
68
+ return true;
73
69
  }
74
70
 
75
- // Parse string to boolean
76
- const value = envHeadless.toLowerCase().trim();
77
- return value === 'true' || value === '1' || value === 'yes';
71
+ // Auto-detect headless Linux environments without X11 or Wayland
72
+ if (process.platform === 'linux') {
73
+ const hasDisplay = process.env.DISPLAY || process.env.WAYLAND_DISPLAY;
74
+ if (!hasDisplay) {
75
+ return true;
76
+ }
77
+ }
78
+
79
+ return false;
78
80
  }
79
81
 
80
82
  /**
@@ -94,6 +96,16 @@ function requireBrowser() {
94
96
  return { browser: browserInstance, page: pageInstance };
95
97
  }
96
98
 
99
+ /**
100
+ * Validate the waitUntil value for Playwright/Patchright.
101
+ * Only these states are supported: load | domcontentloaded | networkidle | commit.
102
+ * Any unsupported value safely falls back to 'networkidle'.
103
+ */
104
+ function resolveWaitUntil(value) {
105
+ const allowed = ['load', 'domcontentloaded', 'networkidle', 'commit'];
106
+ return allowed.includes(value) ? value : 'networkidle';
107
+ }
108
+
97
109
  /**
98
110
  * DECODER UTILITIES - URL, Base64, AES Decryption
99
111
  */
@@ -595,9 +607,9 @@ const handlers = {
595
607
  // ═══════════════════════════════════════════════════════════════
596
608
  // INJECTED SCRIPT - Silent Handling of Popups
597
609
  // Override window.confirm/alert to handle them inside the page context
598
- // Note: Using evaluateOnNewDocument (Puppeteer) instead of addInitScript (Playwright)
610
+ // Note: Using addInitScript (Playwright) to intercept popups early
599
611
  // ═══════════════════════════════════════════════════════════════
600
- await pageInstance.evaluateOnNewDocument(() => {
612
+ await pageInstance.addInitScript(() => {
601
613
  window.originalConfirm = window.confirm;
602
614
  window.originalAlert = window.alert;
603
615
 
@@ -624,7 +636,7 @@ const handlers = {
624
636
  };
625
637
  });
626
638
 
627
- const pid = browserInstance.process()?.pid;
639
+ const pid = (typeof browserInstance.process === 'function') ? browserInstance.process()?.pid : null;
628
640
 
629
641
  notifyProgress('browser_init', 'completed', `Browser started (PID: ${pid})`, {
630
642
  headless,
@@ -644,7 +656,9 @@ const handlers = {
644
656
  // 2. Navigate (ENHANCED - handles context destroyed errors, retries)
645
657
  async navigate(params) {
646
658
  const { page } = requireBrowser();
647
- const { url, waitUntil = 'networkidle2', timeout = 30000, retries = 2 } = params;
659
+ let { url, waitUntil = 'networkidle', timeout = 30000, retries = 2 } = params;
660
+ // Playwright/Patchright wait states: load | domcontentloaded | networkidle | commit
661
+ waitUntil = resolveWaitUntil(waitUntil);
648
662
 
649
663
  notifyProgress('navigate', 'started', `Navigating to: ${url}`);
650
664
 
@@ -754,7 +768,7 @@ const handlers = {
754
768
  const url = rawHttpUrl || page.url();
755
769
  notifyProgress('get_content', 'in_progress', `Fetching raw HTTP (no JS) from: ${url}`);
756
770
  try {
757
- const cookies = await page.cookies(url);
771
+ const cookies = await page.context().cookies(url);
758
772
  const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
759
773
  const response = await fetch(url, {
760
774
  headers: {
@@ -785,7 +799,59 @@ const handlers = {
785
799
 
786
800
  let content;
787
801
 
788
- if (selector) {
802
+ // === markdown: real HTML→Markdown conversion (no external deps) ===
803
+ if (format === 'markdown') {
804
+ if (selector) {
805
+ const exists = await page.$(selector);
806
+ if (!exists) {
807
+ notifyProgress('get_content', 'error', `Element not found: ${selector}`);
808
+ return { success: false, error: `Element not found: ${selector}` };
809
+ }
810
+ }
811
+ content = await page.evaluate((sel) => {
812
+ const root = sel ? document.querySelector(sel) : document.body;
813
+ if (!root) return '';
814
+ const skip = new Set(['SCRIPT', 'STYLE', 'NOSCRIPT', 'IFRAME', 'SVG', 'CANVAS']);
815
+ const inline = (node) => {
816
+ let out = '';
817
+ node.childNodes.forEach(c => {
818
+ if (c.nodeType === 3) { out += c.textContent.replace(/\s+/g, ' '); return; }
819
+ if (c.nodeType !== 1 || skip.has(c.tagName)) return;
820
+ const t = c.tagName;
821
+ if (t === 'A') { const h = c.getAttribute('href') || ''; const x = inline(c).trim(); out += h ? `[${x}](${h})` : x; }
822
+ else if (t === 'STRONG' || t === 'B') out += `**${inline(c).trim()}**`;
823
+ else if (t === 'EM' || t === 'I') out += `*${inline(c).trim()}*`;
824
+ else if (t === 'CODE') out += '`' + c.textContent.trim() + '`';
825
+ else if (t === 'IMG') { const a = c.getAttribute('alt') || ''; const s = c.getAttribute('src') || ''; if (s) out += `![${a}](${s})`; }
826
+ else if (t === 'BR') out += '\n';
827
+ else out += inline(c);
828
+ });
829
+ return out;
830
+ };
831
+ const lines = [];
832
+ const walk = (node) => {
833
+ node.childNodes.forEach(c => {
834
+ if (c.nodeType === 3) { const x = c.textContent.trim(); if (x) lines.push(x); return; }
835
+ if (c.nodeType !== 1 || skip.has(c.tagName)) return;
836
+ const t = c.tagName;
837
+ if (/^H[1-6]$/.test(t)) lines.push('\n' + '#'.repeat(+t[1]) + ' ' + inline(c).trim() + '\n');
838
+ else if (t === 'P') { const x = inline(c).trim(); if (x) lines.push(x + '\n'); }
839
+ else if (t === 'UL' || t === 'OL') {
840
+ let i = 1;
841
+ c.querySelectorAll(':scope > li').forEach(li => lines.push((t === 'OL' ? (i++) + '. ' : '- ') + inline(li).trim()));
842
+ lines.push('');
843
+ }
844
+ else if (t === 'BLOCKQUOTE') lines.push('> ' + inline(c).trim() + '\n');
845
+ else if (t === 'PRE') lines.push('```\n' + c.textContent.trim() + '\n```\n');
846
+ else if (t === 'HR') lines.push('\n---\n');
847
+ else if (['A', 'STRONG', 'B', 'EM', 'I', 'CODE', 'IMG', 'SPAN', 'LABEL'].includes(t)) { const x = inline(c).trim(); if (x) lines.push(x); }
848
+ else walk(c);
849
+ });
850
+ };
851
+ walk(root);
852
+ return lines.join('\n').replace(/\n{3,}/g, '\n\n').trim();
853
+ }, selector || null);
854
+ } else if (selector) {
789
855
  const element = await page.$(selector);
790
856
  if (!element) {
791
857
  notifyProgress('get_content', 'error', `Element not found: ${selector}`);
@@ -800,11 +866,6 @@ const handlers = {
800
866
  } else {
801
867
  if (format === 'html') {
802
868
  content = await page.content();
803
- } else if (format === 'markdown') {
804
- content = await page.evaluate(() => {
805
- const body = document.body.innerText;
806
- return body;
807
- });
808
869
  } else {
809
870
  content = await page.evaluate(() => document.body.innerText);
810
871
  }
@@ -835,7 +896,7 @@ const handlers = {
835
896
  await page.waitForNavigation({ timeout });
836
897
  break;
837
898
  case 'networkidle':
838
- await page.waitForNetworkIdle({ timeout });
899
+ await page.waitForLoadState('networkidle', { timeout });
839
900
  break;
840
901
  case 'timeout':
841
902
  default:
@@ -1239,7 +1300,7 @@ const handlers = {
1239
1300
  notifyProgress('click', 'progress', 'Used force click (JS)');
1240
1301
  } else if (humanLike) {
1241
1302
  try {
1242
- const { createCursor } = require('ghost-cursor');
1303
+ const { createCursor } = require('ghost-cursor-patchright');
1243
1304
  const cursor = createCursor(page);
1244
1305
 
1245
1306
  if (context !== page) {
@@ -1392,7 +1453,9 @@ const handlers = {
1392
1453
  notifyProgress('browser_close', 'progress', 'Browser closed gracefully');
1393
1454
  } catch (e) {
1394
1455
  if (force) {
1395
- browserInstance.process()?.kill('SIGKILL');
1456
+ if (typeof browserInstance.process === 'function') {
1457
+ browserInstance.process()?.kill('SIGKILL');
1458
+ }
1396
1459
  notifyProgress('browser_close', 'progress', 'Browser force killed');
1397
1460
  }
1398
1461
  }
@@ -1628,12 +1691,12 @@ const handlers = {
1628
1691
  try {
1629
1692
  const captchaEl = await targetFrame.$(detectedCaptchaSelector);
1630
1693
  if (captchaEl) {
1631
- captchaImageBase64 = await captchaEl.screenshot({ encoding: 'base64' });
1694
+ captchaImageBase64 = (await captchaEl.screenshot()).toString('base64');
1632
1695
  }
1633
1696
  } catch(e) {}
1634
1697
 
1635
1698
  // Take full context screenshot for host LLM fallback
1636
- const screenshotBase64 = await targetHandle.screenshot({ encoding: 'base64' });
1699
+ const screenshotBase64 = (await targetHandle.screenshot()).toString('base64');
1637
1700
 
1638
1701
  // ═══════════════════════════════════════════════════════════
1639
1702
  // STEP A: Server-Side Vision API (if aiMode enabled)
@@ -2002,7 +2065,7 @@ const handlers = {
2002
2065
 
2003
2066
  // Click submit button with human-like behavior
2004
2067
  try {
2005
- const { createCursor } = require('ghost-cursor');
2068
+ const { createCursor } = require('ghost-cursor-patchright');
2006
2069
  const cursor = createCursor(page);
2007
2070
  await cursor.click(submitSelector);
2008
2071
  } catch (e) {
@@ -2049,10 +2112,14 @@ const handlers = {
2049
2112
 
2050
2113
  notifyProgress('random_scroll', 'started', `Scrolling ${scrollDirection} ${scrollAmount}px`);
2051
2114
 
2052
- await page.evaluate(({ scrollAmount, scrollDirection, smooth }) => {
2053
- const y = scrollDirection === 'down' ? scrollAmount : -scrollAmount;
2054
- window.scrollBy({ top: y, behavior: smooth ? 'smooth' : 'auto' });
2055
- }, { scrollAmount, scrollDirection, smooth });
2115
+ const y = scrollDirection === 'down' ? scrollAmount : -scrollAmount;
2116
+ if (smooth && page.realScroll) {
2117
+ await page.realScroll(y, 600);
2118
+ } else {
2119
+ await page.evaluate(({ y, smooth }) => {
2120
+ window.scrollBy({ top: y, behavior: smooth ? 'smooth' : 'auto' });
2121
+ }, { y, smooth });
2122
+ }
2056
2123
 
2057
2124
  notifyProgress('random_scroll', 'completed', `Scrolled ${scrollDirection} ${scrollAmount}px`, { direction: scrollDirection, amount: scrollAmount });
2058
2125
 
@@ -2088,7 +2155,7 @@ const handlers = {
2088
2155
  }
2089
2156
  }
2090
2157
  } else if (xpath) {
2091
- const handles = await page.$x(xpath);
2158
+ const handles = await page.$$(`xpath=${xpath}`);
2092
2159
  elements = await Promise.all(handles.map(h => h.evaluate(el => ({
2093
2160
  tag: el.tagName,
2094
2161
  text: el.textContent?.substring(0, 100)
@@ -2182,7 +2249,7 @@ const handlers = {
2182
2249
  page.on('framenavigated', frameNavigatedHandler);
2183
2250
 
2184
2251
  try {
2185
- await page.goto(url, { waitUntil: 'networkidle2', timeout });
2252
+ await page.goto(url, { waitUntil: 'networkidle', timeout });
2186
2253
 
2187
2254
  // If followJS is enabled, wait a bit and check for meta refreshes and JS redirects
2188
2255
  if (followJS) {
@@ -2448,7 +2515,7 @@ const handlers = {
2448
2515
  // ====== FEATURE 2: Pre-page-load Runtime API Interception ======
2449
2516
  // Inject BEFORE any JS runs — catches calls from obfuscated/webpack code
2450
2517
  try {
2451
- await page.evaluateOnNewDocument(() => {
2518
+ await page.addInitScript(() => {
2452
2519
  window.__interceptedApis = [];
2453
2520
  window.__wsMessages = [];
2454
2521
 
@@ -2552,7 +2619,7 @@ const handlers = {
2552
2619
  window.WebSocket.CLOSING = OrigWS.CLOSING;
2553
2620
  window.WebSocket.CLOSED = OrigWS.CLOSED;
2554
2621
  });
2555
- } catch (e) { /* evaluateOnNewDocument may fail on already-loaded pages, that's OK */ }
2622
+ } catch (e) { /* addInitScript may fail on already-loaded pages, that's OK */ }
2556
2623
 
2557
2624
  // Request handler
2558
2625
  page.on('request', req => {
@@ -2922,29 +2989,51 @@ const handlers = {
2922
2989
 
2923
2990
  notifyProgress('cookie_manager', 'started', `Cookie action: ${action}`);
2924
2991
 
2992
+ // Playwright/Patchright: cookies are managed via the BrowserContext, not the page
2993
+ const context = page.context();
2994
+
2925
2995
  switch (action) {
2926
- case 'get':
2927
- const cookies = await page.cookies();
2996
+ case 'get': {
2997
+ const cookies = await context.cookies();
2928
2998
  notifyProgress('cookie_manager', 'completed', `Retrieved ${cookies.length} cookies`);
2929
2999
  return { success: true, cookies: name ? cookies.filter(c => c.name === name) : cookies };
3000
+ }
2930
3001
 
2931
- case 'set':
2932
- await page.setCookie({ name, value, domain: domain || new URL(page.url()).hostname, expires });
3002
+ case 'set': {
3003
+ await context.addCookies([{
3004
+ name,
3005
+ value,
3006
+ domain: domain || new URL(page.url()).hostname,
3007
+ path: '/',
3008
+ ...(expires ? { expires } : {})
3009
+ }]);
2933
3010
  notifyProgress('cookie_manager', 'completed', `Cookie set: ${name}`);
2934
3011
  return { success: true, message: `Cookie ${name} set` };
3012
+ }
2935
3013
 
2936
- case 'delete':
2937
- const toDelete = await page.cookies();
2938
- const filtered = name ? toDelete.filter(c => c.name === name) : toDelete;
2939
- await page.deleteCookie(...filtered);
2940
- notifyProgress('cookie_manager', 'completed', `Deleted ${filtered.length} cookie(s)`);
2941
- return { success: true, message: `Deleted ${filtered.length} cookie(s)` };
3014
+ case 'delete': {
3015
+ // Playwright has no per-cookie delete: clear all, then re-add the ones we keep
3016
+ const toDelete = await context.cookies();
3017
+ const remaining = name ? toDelete.filter(c => c.name !== name) : [];
3018
+ const removedCount = toDelete.length - remaining.length;
3019
+ await context.clearCookies();
3020
+ if (remaining.length) {
3021
+ await context.addCookies(remaining.map(c => ({
3022
+ name: c.name, value: c.value, domain: c.domain, path: c.path,
3023
+ ...(c.expires && c.expires > 0 ? { expires: c.expires } : {}),
3024
+ httpOnly: c.httpOnly, secure: c.secure, sameSite: c.sameSite
3025
+ })));
3026
+ }
3027
+ notifyProgress('cookie_manager', 'completed', `Deleted ${removedCount} cookie(s)`);
3028
+ return { success: true, message: `Deleted ${removedCount} cookie(s)` };
3029
+ }
2942
3030
 
2943
- case 'clear':
2944
- const allCookies = await page.cookies();
2945
- await page.deleteCookie(...allCookies);
3031
+ case 'clear': {
3032
+ const allCookies = await context.cookies();
3033
+ await context.clearCookies();
2946
3034
  notifyProgress('cookie_manager', 'completed', `Cleared ${allCookies.length} cookies`);
2947
3035
  return { success: true, message: `Cleared ${allCookies.length} cookies` };
3036
+ }
2948
3037
  }
2949
3038
 
2950
3039
  return { success: false, error: 'Invalid action' };
@@ -2961,8 +3050,8 @@ const handlers = {
2961
3050
  fs.mkdirSync(directory, { recursive: true });
2962
3051
  }
2963
3052
 
2964
- const response = await page.goto(url, { waitUntil: 'networkidle2' });
2965
- const buffer = await response.buffer();
3053
+ const response = await page.goto(url, { waitUntil: 'networkidle' });
3054
+ const buffer = await response.body();
2966
3055
 
2967
3056
  const outputFilename = filename || path.basename(new URL(url).pathname) || 'download';
2968
3057
  const outputPath = path.join(directory, outputFilename);
@@ -3500,7 +3589,7 @@ const handlers = {
3500
3589
  const inputType = await input.evaluate(el => el.type);
3501
3590
 
3502
3591
  if (tagName === 'select') {
3503
- await page.select(inputSelector, value);
3592
+ await page.selectOption(inputSelector, value);
3504
3593
  } else if (inputType === 'checkbox' || inputType === 'radio') {
3505
3594
  if (value) await input.click();
3506
3595
  } else {
@@ -3953,8 +4042,8 @@ const handlers = {
3953
4042
  notifyProgress('media_extractor', 'progress', `Processing ${i + 1}/${urls.length}: ${url}`);
3954
4043
 
3955
4044
  // Navigate to URL
3956
- await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
3957
- await page.waitForTimeout(2000); // Wait for media to load
4045
+ await page.goto(url, { waitUntil: 'networkidle', timeout: 30000 });
4046
+ await new Promise(r => setTimeout(r, 2000)); // Wait for media to load
3958
4047
 
3959
4048
  // Extract streams
3960
4049
  const streams = await extractStreamsFromContext(page, 'main');
@@ -4768,7 +4857,7 @@ const handlers = {
4768
4857
  }, identity, String(value));
4769
4858
  } else {
4770
4859
  // Smart Type
4771
- const { createCursor } = require('ghost-cursor');
4860
+ const { createCursor } = require('ghost-cursor-patchright');
4772
4861
  const cursor = createCursor(page);
4773
4862
 
4774
4863
  // Click center of element
@@ -4835,7 +4924,7 @@ const handlers = {
4835
4924
  });
4836
4925
 
4837
4926
  if (submitSelector) {
4838
- const { createCursor } = require('ghost-cursor');
4927
+ const { createCursor } = require('ghost-cursor-patchright');
4839
4928
  const cursor = createCursor(page);
4840
4929
  await cursor.click(submitSelector);
4841
4930
 
@@ -4855,6 +4944,241 @@ const handlers = {
4855
4944
  filledFields,
4856
4945
  message: `Form filled: ${filledFields.join(', ')}`
4857
4946
  };
4947
+ },
4948
+
4949
+ // 23. Screenshot - capture viewport / full page / element (returns image to AI)
4950
+ async screenshot(params = {}) {
4951
+ const { page } = requireBrowser();
4952
+ const {
4953
+ fullPage = false,
4954
+ selector,
4955
+ format = 'png',
4956
+ quality,
4957
+ path: savePath,
4958
+ returnBase64 = true,
4959
+ omitBackground = false
4960
+ } = params;
4961
+
4962
+ notifyProgress('screenshot', 'started',
4963
+ `Capturing ${selector ? 'element' : (fullPage ? 'full page' : 'viewport')} screenshot`);
4964
+
4965
+ const opts = { type: format, fullPage: selector ? false : fullPage };
4966
+ if (format === 'jpeg' && typeof quality === 'number') opts.quality = quality;
4967
+ if (format === 'png' && omitBackground) opts.omitBackground = true;
4968
+
4969
+ let buffer;
4970
+ if (selector) {
4971
+ const element = await page.$(selector);
4972
+ if (!element) {
4973
+ notifyProgress('screenshot', 'error', `Element not found: ${selector}`);
4974
+ return { success: false, error: `Element not found: ${selector}` };
4975
+ }
4976
+ buffer = await element.screenshot(opts);
4977
+ } else {
4978
+ buffer = await page.screenshot(opts);
4979
+ }
4980
+
4981
+ let savedTo = null;
4982
+ if (savePath) {
4983
+ const dir = path.dirname(savePath);
4984
+ if (dir && !fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
4985
+ fs.writeFileSync(savePath, buffer);
4986
+ savedTo = savePath;
4987
+ }
4988
+
4989
+ notifyProgress('screenshot', 'completed',
4990
+ `Screenshot captured (${buffer.length} bytes)${savedTo ? ' → ' + savedTo : ''}`);
4991
+
4992
+ const meta = { success: true, format, bytes: buffer.length, savedTo, url: page.url() };
4993
+
4994
+ if (returnBase64) {
4995
+ const base64 = Buffer.from(buffer).toString('base64');
4996
+ // mcpContent is returned directly to the AI agent (image + text summary)
4997
+ meta.mcpContent = [
4998
+ { type: 'image', data: base64, mimeType: format === 'jpeg' ? 'image/jpeg' : 'image/png' },
4999
+ { type: 'text', text: JSON.stringify({ success: true, format, bytes: buffer.length, savedTo, url: page.url() }, null, 2) }
5000
+ ];
5001
+ }
5002
+
5003
+ return meta;
5004
+ },
5005
+
5006
+ // 24. Save as PDF - Chromium print-to-PDF (headless mode only)
5007
+ async save_as_pdf(params = {}) {
5008
+ const { page } = requireBrowser();
5009
+ const {
5010
+ path: savePath = './downloads/page.pdf',
5011
+ format = 'A4',
5012
+ landscape = false,
5013
+ printBackground = true
5014
+ } = params;
5015
+
5016
+ notifyProgress('save_as_pdf', 'started', `Saving page as PDF: ${savePath}`);
5017
+
5018
+ const dir = path.dirname(savePath);
5019
+ if (dir && !fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
5020
+
5021
+ try {
5022
+ await page.pdf({ path: savePath, format, landscape, printBackground });
5023
+ } catch (e) {
5024
+ notifyProgress('save_as_pdf', 'error', e.message);
5025
+ return {
5026
+ success: false,
5027
+ error: `PDF generation failed (page.pdf() works only in headless mode): ${e.message}`
5028
+ };
5029
+ }
5030
+
5031
+ const stats = fs.existsSync(savePath) ? fs.statSync(savePath) : null;
5032
+ notifyProgress('save_as_pdf', 'completed', `PDF saved: ${savePath}`);
5033
+
5034
+ return {
5035
+ success: true,
5036
+ savedTo: savePath,
5037
+ bytes: stats ? stats.size : null,
5038
+ url: page.url()
5039
+ };
5040
+ },
5041
+
5042
+ // 25. See Page (AI Vision — "eyes": screenshot + visual map of interactive elements)
5043
+ async see_page(params = {}) {
5044
+ const { page } = requireBrowser();
5045
+ const {
5046
+ fullPage = false,
5047
+ format = 'jpeg',
5048
+ quality = 70,
5049
+ includeElements = true,
5050
+ maxElements = 60,
5051
+ path: savePath
5052
+ } = params;
5053
+
5054
+ notifyProgress('see_page', 'started', `👁️ Looking at the page (${fullPage ? 'full page' : 'viewport'})...`);
5055
+
5056
+ // 1. Capture what the page looks like (the "eyes")
5057
+ const shotOpts = { type: format, fullPage };
5058
+ if (format === 'jpeg' && typeof quality === 'number') shotOpts.quality = quality;
5059
+
5060
+ let buffer;
5061
+ try {
5062
+ buffer = await page.screenshot(shotOpts);
5063
+ } catch (e) {
5064
+ return { success: false, error: `Vision capture failed: ${e.message}` };
5065
+ }
5066
+
5067
+ // 2. Build a "visual map" of visible interactive elements (what a human can act on)
5068
+ let elements = [];
5069
+ let pageInfo = {};
5070
+ if (includeElements) {
5071
+ const data = await page.evaluate((maxEls) => {
5072
+ const out = [];
5073
+ const seen = new Set();
5074
+ const sel = 'a[href], button, input, select, textarea, [role="button"], [role="link"], [onclick], [tabindex]';
5075
+ const nodes = document.querySelectorAll(sel);
5076
+
5077
+ const cssPath = (el) => {
5078
+ if (el.id) return `#${CSS.escape(el.id)}`;
5079
+ if (el.name) return `${el.tagName.toLowerCase()}[name="${el.name}"]`;
5080
+ const parts = [];
5081
+ let node = el;
5082
+ while (node && node.nodeType === 1 && parts.length < 4) {
5083
+ let part = node.tagName.toLowerCase();
5084
+ if (node.classList.length) {
5085
+ const cls = Array.from(node.classList).slice(0, 2).map(c => '.' + CSS.escape(c)).join('');
5086
+ part += cls;
5087
+ }
5088
+ const parent = node.parentElement;
5089
+ if (parent) {
5090
+ const sibs = Array.from(parent.children).filter(c => c.tagName === node.tagName);
5091
+ if (sibs.length > 1) part += `:nth-of-type(${sibs.indexOf(node) + 1})`;
5092
+ }
5093
+ parts.unshift(part);
5094
+ node = node.parentElement;
5095
+ }
5096
+ return parts.join(' > ');
5097
+ };
5098
+
5099
+ for (const el of nodes) {
5100
+ if (out.length >= maxEls) break;
5101
+ const rect = el.getBoundingClientRect();
5102
+ // Only elements actually visible on screen
5103
+ if (rect.width < 2 || rect.height < 2) continue;
5104
+ const style = window.getComputedStyle(el);
5105
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') continue;
5106
+ if (rect.bottom < 0 || rect.right < 0 || rect.top > window.innerHeight || rect.left > window.innerWidth) {
5107
+ // outside current viewport — skip (we report what is seen)
5108
+ continue;
5109
+ }
5110
+
5111
+ const tag = el.tagName.toLowerCase();
5112
+ let label = (el.innerText || el.value || el.getAttribute('aria-label') || el.getAttribute('placeholder') || el.getAttribute('title') || el.getAttribute('alt') || '').trim().replace(/\s+/g, ' ').slice(0, 80);
5113
+ let kind = tag;
5114
+ if (tag === 'a') kind = 'link';
5115
+ else if (tag === 'button' || el.getAttribute('role') === 'button') kind = 'button';
5116
+ else if (tag === 'input') kind = `input:${el.type || 'text'}`;
5117
+ else if (tag === 'select') kind = 'select';
5118
+ else if (tag === 'textarea') kind = 'textarea';
5119
+
5120
+ const selector = cssPath(el);
5121
+ if (seen.has(selector + '|' + label)) continue;
5122
+ seen.add(selector + '|' + label);
5123
+
5124
+ out.push({
5125
+ kind,
5126
+ text: label,
5127
+ selector,
5128
+ href: tag === 'a' ? el.href : undefined,
5129
+ box: { x: Math.round(rect.x), y: Math.round(rect.y), w: Math.round(rect.width), h: Math.round(rect.height) }
5130
+ });
5131
+ }
5132
+
5133
+ return {
5134
+ elements: out,
5135
+ info: {
5136
+ title: document.title,
5137
+ url: location.href,
5138
+ viewport: { width: window.innerWidth, height: window.innerHeight },
5139
+ scrollY: Math.round(window.scrollY),
5140
+ scrollHeight: document.body ? document.body.scrollHeight : 0
5141
+ }
5142
+ };
5143
+ }, maxElements).catch(() => ({ elements: [], info: {} }));
5144
+
5145
+ elements = data.elements || [];
5146
+ pageInfo = data.info || {};
5147
+ }
5148
+
5149
+ // 3. Optionally save the image too
5150
+ let savedTo = null;
5151
+ if (savePath) {
5152
+ const dir = path.dirname(savePath);
5153
+ if (dir && !fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
5154
+ fs.writeFileSync(savePath, buffer);
5155
+ savedTo = savePath;
5156
+ }
5157
+
5158
+ notifyProgress('see_page', 'completed',
5159
+ `👁️ Saw the page: ${elements.length} interactive elements visible${savedTo ? ' (saved ' + savedTo + ')' : ''}`);
5160
+
5161
+ const base64 = Buffer.from(buffer).toString('base64');
5162
+ const summary = {
5163
+ success: true,
5164
+ url: pageInfo.url || page.url(),
5165
+ title: pageInfo.title,
5166
+ viewport: pageInfo.viewport,
5167
+ scroll: { y: pageInfo.scrollY, pageHeight: pageInfo.scrollHeight },
5168
+ visibleInteractiveElements: elements.length,
5169
+ elements,
5170
+ savedTo
5171
+ };
5172
+
5173
+ // Return BOTH the actual image (so the AI literally "sees" it) and the visual map text
5174
+ return {
5175
+ success: true,
5176
+ mcpContent: [
5177
+ { type: 'image', data: base64, mimeType: format === 'jpeg' ? 'image/jpeg' : 'image/png' },
5178
+ { type: 'text', text: JSON.stringify(summary, null, 2) }
5179
+ ],
5180
+ ...summary
5181
+ };
4858
5182
  }
4859
5183
  };
4860
5184
 
@@ -4934,7 +5258,7 @@ async function aiEnhancedSelector(page, selector, operation, options = {}) {
4934
5258
  * AI Features automatically applied:
4935
5259
  * - Auto-healing: If selector fails, AI tries to find alternatives
4936
5260
  * - Smart retry: Failed operations are retried with AI assistance
4937
- * - All 28 tools benefit from AI without any changes
5261
+ * - All tools benefit from AI without any changes
4938
5262
  */
4939
5263
  async function executeTool(name, params = {}) {
4940
5264
  const handler = handlers[name];
@@ -5047,7 +5371,9 @@ async function cleanup() {
5047
5371
  try {
5048
5372
  await browserInstance.close();
5049
5373
  } catch (e) {
5050
- browserInstance.process()?.kill('SIGKILL');
5374
+ if (typeof browserInstance.process === 'function') {
5375
+ browserInstance.process()?.kill('SIGKILL');
5376
+ }
5051
5377
  }
5052
5378
  browserInstance = null;
5053
5379
  pageInstance = null;