npm - mcpbrowser - Versions diffs - 0.3.45 → 0.3.46 - Mend

mcpbrowser 0.3.45 → 0.3.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/actions/execute-javascript.js +18 -0
package/src/actions/fetch-page.js +5 -4
package/src/actions/get-current-html.js +20 -4
package/src/core/html.js +3 -2
package/src/core/page.js +39 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mcpbrowser",
-  "version": "0.3.45",
+  "version": "0.3.46",
   "mcpName": "io.github.cherchyk/mcpbrowser",
   "type": "module",
   "description": "MCP browser server - fetch web pages using real Chrome/Edge/Brave browser. Handles authentication, SSO, CAPTCHAs, and anti-bot protection. Browser automation for AI assistants.",

package/src/actions/execute-javascript.js CHANGED Viewed

@@ -209,6 +209,24 @@ export async function executeJavascript({ url, script, timeoutMs = EXECUTION_TIM
   }
   const urlChanged = currentUrl !== beforeUrl;
+  // Detect CSP block or silent evaluation failure:
+  // When page.evaluate() is blocked by CSP, Puppeteer returns undefined (not an error).
+  // Distinguish this from a script that intentionally returns nothing.
+  if (evalResult === undefined || evalResult === null) {
+    return new ExecuteJavascriptResponse({
+      result: null,
+      type: 'undefined',
+      executionTimeMs,
+      truncated: false,
+      urlChanged,
+      currentUrl,
+      error: {
+        name: 'EvaluationEmpty',
+        message: 'Script evaluation returned no result. Possible causes: page Content Security Policy (CSP) blocked evaluation, the script has no return value, or the page context is sandboxed. Try browser_take_screenshot to verify the page is loaded, or use a simpler expression like "document.title" to test page accessibility.'
+      }
+    });
+  }
   if (evalResult?.error) {
     return new ExecuteJavascriptResponse({
       result: null,

package/src/actions/fetch-page.js CHANGED Viewed

@@ -77,6 +77,7 @@ export const FETCH_WEBPAGE_TOOL = {
         enum: ["", "chrome", "edge"]
       },
       removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true },
+      selector: { type: "string", description: "CSS selector to extract a specific DOM subtree instead of the full page. Use to scope extraction and reduce response size (e.g., 'main', '[role=\"main\"]', 'body > div:first-child'). If no elements match, falls back to full page with a note." },
       postLoadWait: { type: "number", description: "Additional milliseconds to wait after page load before extracting HTML. Use for pages that need extra time to render. Default: 0 (no extra wait, SPA detection handles most cases automatically).", default: 0 }
     },
     required: ["url"],
@@ -122,7 +123,7 @@ export const FETCH_WEBPAGE_TOOL = {
  * @param {number} [params.postLoadWait=0] - Additional milliseconds to wait after page load before extracting HTML
  * @returns {Promise<Object>} Result object with success status, URL, HTML content, or error details
  */
-export async function fetchPage({ url, browser = '', removeUnnecessaryHTML = true, postLoadWait = 0 }) {
+export async function fetchPage({ url, browser = '', removeUnnecessaryHTML = true, selector = null, postLoadWait = 0 }) {
   logger.info(`browser_fetch_webpage called: url=${url}`);
   // Handle missing URL with environment variable fallback
@@ -150,7 +151,7 @@ export async function fetchPage({ url, browser = '', removeUnnecessaryHTML = tru
   // Queue this request - processed sequentially, one at a time
   return queueRequest(async () => {
-    return await doFetchPage({ url, browser, removeUnnecessaryHTML, postLoadWait });
+    return await doFetchPage({ url, browser, removeUnnecessaryHTML, selector, postLoadWait });
   });
 }
@@ -158,7 +159,7 @@ export async function fetchPage({ url, browser = '', removeUnnecessaryHTML = tru
  * Internal function that does the actual page fetching.
  * Called by the queue processor - only one runs at a time.
  */
-async function doFetchPage({ url, browser, removeUnnecessaryHTML, postLoadWait }) {
+async function doFetchPage({ url, browser, removeUnnecessaryHTML, selector, postLoadWait }) {
   const originalHostname = new URL(url).hostname;
   // Ensure browser connection
@@ -215,7 +216,7 @@ async function doFetchPage({ url, browser, removeUnnecessaryHTML, postLoadWait }
     }
     // Extract and process HTML
-    const processedHtml = await extractAndProcessHtml(page, removeUnnecessaryHTML);
+    const processedHtml = await extractAndProcessHtml(page, removeUnnecessaryHTML, selector);
     logger.info(`browser_fetch_webpage completed: ${page.url()}`);

package/src/actions/get-current-html.js CHANGED Viewed

@@ -69,7 +69,8 @@ export const GET_CURRENT_HTML_TOOL = {
     type: "object",
     properties: {
       url: { type: "string", description: "The URL of the page (must match a previously fetched page)" },
-      removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true }
+      removeUnnecessaryHTML: { type: "boolean", description: "Remove Unnecessary HTML for size reduction by 90%.", default: true },
+      selector: { type: "string", description: "CSS selector to extract a specific DOM subtree instead of the full page. Use to scope extraction and reduce response size (e.g., 'main', '[role=\"main\"]', 'body > div:first-child'). If no elements match, falls back to full page with a note." }
     },
     required: ["url"],
     additionalProperties: false
@@ -107,9 +108,9 @@ export const GET_CURRENT_HTML_TOOL = {
  * @param {boolean} [params.removeUnnecessaryHTML=true] - Whether to clean HTML
  * @returns {Promise<Object>} Result object with current HTML
  */
-export async function getCurrentHtml({ url, removeUnnecessaryHTML = true }) {
+export async function getCurrentHtml({ url, removeUnnecessaryHTML = true, selector = null }) {
   const startTime = Date.now();
-  logger.info(`browser_get_current_html called: url=${url}`);
+  logger.info(`browser_get_current_html called: url=${url}${selector ? ` selector=${selector}` : ''}`);
   if (!url) {
     throw new Error("url parameter is required");
@@ -158,7 +159,22 @@ export async function getCurrentHtml({ url, removeUnnecessaryHTML = true }) {
   try {
     const currentUrl = page.url();
-    const html = await extractAndProcessHtml(page, removeUnnecessaryHTML);
+    const html = await extractAndProcessHtml(page, removeUnnecessaryHTML, selector);
+    // Detect empty/near-empty HTML extraction (e.g., CSP blocking page.evaluate)
+    if (!html || html.trim().length < 100) {
+      logger.warn(`browser_get_current_html: HTML extraction returned empty/minimal content from ${currentUrl} (${html ? html.trim().length : 0} chars)`);
+      return new InformationalResponse(
+        `HTML extraction returned empty content from ${currentUrl}`,
+        'The page may be blocking evaluation via Content Security Policy (CSP), the page has not fully rendered, or the page uses a sandboxed context that prevents DOM reading.',
+        [
+          "Use MCPBrowser's browser_take_screenshot to verify the page is visually loaded",
+          "Use MCPBrowser's browser_execute_javascript with a simple script like 'document.title' to test page accessibility",
+          "Try MCPBrowser's browser_fetch_webpage to reload the page",
+          "Wait and retry — the page may still be rendering"
+        ]
+      );
+    }
     logger.info(`browser_get_current_html completed: got HTML from ${currentUrl}`);

package/src/core/html.js CHANGED Viewed

@@ -70,8 +70,9 @@ export function cleanHtml(html) {
   // Remove event handler attributes (onclick, onload, etc.)
   cleaned = cleaned.replace(/\s+on[a-z]+\s*=\s*["'][^"']*["']/gi, '');
-  // Remove role attributes
-  cleaned = cleaned.replace(/\s+role=["'][^"']*["']/gi, '');
+  // Keep role attributes — they're semantically valuable for LLM understanding
+  // and enable stable selectors like [role="main"], [role="navigation"]
+  // cleaned = cleaned.replace(/\s+role=["'][^"']*["']/gi, '');
   // Remove aria-* attributes
   cleaned = cleaned.replace(/\s+aria-[a-z0-9-]+=["'][^"']*["']/gi, '');

package/src/core/page.js CHANGED Viewed

@@ -475,23 +475,52 @@ async function waitForNavigationToSettle(page) {
  * settle and retries once.
  * @param {Page} page - The Puppeteer page instance
  * @param {boolean} removeUnnecessaryHTML - Whether to clean the HTML
+ * @param {string|null} [selector=null] - CSS selector to extract a DOM subtree instead of full page
  * @returns {Promise<string>} The processed HTML
  */
-export async function extractAndProcessHtml(page, removeUnnecessaryHTML) {
+export async function extractAndProcessHtml(page, removeUnnecessaryHTML, selector = null) {
   let html;
+  const extractFn = selector
+    ? (sel) => {
+        const els = document.querySelectorAll(sel);
+        if (!els.length) return null;
+        return Array.from(els).map(el => el.outerHTML).join('\n');
+      }
+    : () => document.documentElement?.outerHTML || "";
+  const extractArg = selector || undefined;
   try {
-    html = await page.evaluate(() => document.documentElement?.outerHTML || "");
+    html = await page.evaluate(extractFn, extractArg);
   } catch (err) {
     if (isNavigationError(err)) {
       logger.debug('Late navigation during HTML extraction, waiting for settle...');
       await waitForNavigationToSettle(page);
       // Re-run page readiness — the new page may be a SPA that needs rendering time
       await waitForPageReady(page);
-      html = await page.evaluate(() => document.documentElement?.outerHTML || "");
+      html = await page.evaluate(extractFn, extractArg);
     } else {
       throw err;
     }
   }
+  // If selector matched nothing, fall back to full page with a note
+  if (selector && html === null) {
+    logger.debug(`Selector "${selector}" matched no elements, falling back to full page`);
+    try {
+      html = await page.evaluate(() => document.documentElement?.outerHTML || "");
+    } catch (err) {
+      if (isNavigationError(err)) {
+        await waitForNavigationToSettle(page);
+        await waitForPageReady(page);
+        html = await page.evaluate(() => document.documentElement?.outerHTML || "");
+      } else {
+        throw err;
+      }
+    }
+    html = `<!-- selector "${selector}" matched no elements; returning full page -->\n` + html;
+  }
   let processedHtml;
   if (removeUnnecessaryHTML) {
@@ -501,5 +530,12 @@ export async function extractAndProcessHtml(page, removeUnnecessaryHTML) {
     processedHtml = enrichHtml(html, page.url());
   }
+  // Warn when response is very large — the agent should use the selector parameter
+  // to scope extraction to a DOM subtree instead of fetching the entire page.
+  const htmlByteLength = new TextEncoder().encode(processedHtml).length;
+  if (htmlByteLength > 500_000) {
+    logger.warn(`Large HTML response (${(htmlByteLength / 1024).toFixed(0)}KB). Consider using the "selector" parameter to extract a specific DOM subtree instead of the full page.`);
+  }
   return processedHtml;
 }